{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 800, "global_step": 666250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004803001876172608, "grad_norm": 3.330345630645752, "learning_rate": 0.0001113525, "loss": 4.3152, "step": 64 }, { "epoch": 0.009606003752345216, "grad_norm": 2.5987207889556885, "learning_rate": 0.00022447249999999998, "loss": 3.5908, "step": 128 }, { "epoch": 0.014409005628517824, "grad_norm": 10.176867485046387, "learning_rate": 0.00033759249999999996, "loss": 3.3927, "step": 192 }, { "epoch": 0.01921200750469043, "grad_norm": 6.534875869750977, "learning_rate": 0.00045071249999999993, "loss": 3.3333, "step": 256 }, { "epoch": 0.02401500938086304, "grad_norm": 6.088456630706787, "learning_rate": 0.0005638325, "loss": 3.2928, "step": 320 }, { "epoch": 0.028818011257035647, "grad_norm": 6.937580108642578, "learning_rate": 0.0006769524999999999, "loss": 3.2901, "step": 384 }, { "epoch": 0.033621013133208255, "grad_norm": 6.744969844818115, "learning_rate": 0.0007900724999999999, "loss": 3.2589, "step": 448 }, { "epoch": 0.03842401500938086, "grad_norm": 2.2261719703674316, "learning_rate": 0.0009031925, "loss": 3.231, "step": 512 }, { "epoch": 0.04322701688555347, "grad_norm": 1.030404806137085, "learning_rate": 0.0010163124999999999, "loss": 3.2278, "step": 576 }, { "epoch": 0.04803001876172608, "grad_norm": 1.036293387413025, "learning_rate": 0.0011294324999999998, "loss": 3.272, "step": 640 }, { "epoch": 0.05283302063789869, "grad_norm": 1.1835274696350098, "learning_rate": 0.0012425525, "loss": 3.256, "step": 704 }, { "epoch": 0.057636022514071295, "grad_norm": 0.8378634452819824, "learning_rate": 0.0013556724999999998, "loss": 3.27, "step": 768 }, { "epoch": 0.0624390243902439, "grad_norm": 0.7602612972259521, "learning_rate": 0.0014687925, "loss": 3.2261, "step": 832 }, { "epoch": 0.06724202626641651, "grad_norm": 0.6387987732887268, "learning_rate": 0.0015819124999999997, "loss": 3.2153, "step": 896 }, { "epoch": 0.07204502814258912, "grad_norm": 0.4422095715999603, "learning_rate": 0.0016950325, "loss": 3.1975, "step": 960 }, { "epoch": 0.07684803001876173, "grad_norm": 0.39002183079719543, "learning_rate": 0.0018081524999999999, "loss": 3.1983, "step": 1024 }, { "epoch": 0.08165103189493433, "grad_norm": 5.926162242889404, "learning_rate": 0.0019212724999999996, "loss": 3.1763, "step": 1088 }, { "epoch": 0.08645403377110694, "grad_norm": 0.4173193871974945, "learning_rate": 0.0020343924999999996, "loss": 3.1833, "step": 1152 }, { "epoch": 0.09125703564727955, "grad_norm": 0.4136042594909668, "learning_rate": 0.0021475125, "loss": 3.1846, "step": 1216 }, { "epoch": 0.09606003752345216, "grad_norm": 0.39301183819770813, "learning_rate": 0.0022606324999999996, "loss": 3.1739, "step": 1280 }, { "epoch": 0.10086303939962477, "grad_norm": 0.4910842776298523, "learning_rate": 0.0023737525, "loss": 3.1614, "step": 1344 }, { "epoch": 0.10566604127579737, "grad_norm": 0.4039038121700287, "learning_rate": 0.0024868725, "loss": 3.1577, "step": 1408 }, { "epoch": 0.11046904315196998, "grad_norm": 0.3286585211753845, "learning_rate": 0.0025999925, "loss": 3.1458, "step": 1472 }, { "epoch": 0.11527204502814259, "grad_norm": 0.44095373153686523, "learning_rate": 0.0027131125, "loss": 3.155, "step": 1536 }, { "epoch": 0.1200750469043152, "grad_norm": 0.40613290667533875, "learning_rate": 0.0028262325, "loss": 3.1469, "step": 1600 }, { "epoch": 0.1248780487804878, "grad_norm": 0.4613141417503357, "learning_rate": 0.002828, "loss": 3.1392, "step": 1664 }, { "epoch": 0.1296810506566604, "grad_norm": 0.3758493661880493, "learning_rate": 0.002828, "loss": 3.1298, "step": 1728 }, { "epoch": 0.13448405253283302, "grad_norm": 0.32609787583351135, "learning_rate": 0.002828, "loss": 3.123, "step": 1792 }, { "epoch": 0.13928705440900563, "grad_norm": 0.4221761226654053, "learning_rate": 0.002828, "loss": 3.1076, "step": 1856 }, { "epoch": 0.14409005628517824, "grad_norm": 0.4372267425060272, "learning_rate": 0.002828, "loss": 3.098, "step": 1920 }, { "epoch": 0.14889305816135084, "grad_norm": 0.36804404854774475, "learning_rate": 0.002828, "loss": 3.0952, "step": 1984 }, { "epoch": 0.15369606003752345, "grad_norm": 0.314120888710022, "learning_rate": 0.002828, "loss": 3.0751, "step": 2048 }, { "epoch": 0.15849906191369606, "grad_norm": 0.3158409297466278, "learning_rate": 0.002828, "loss": 3.0574, "step": 2112 }, { "epoch": 0.16330206378986867, "grad_norm": 0.35668376088142395, "learning_rate": 0.002828, "loss": 3.0598, "step": 2176 }, { "epoch": 0.16810506566604128, "grad_norm": 0.3429064452648163, "learning_rate": 0.002828, "loss": 3.0554, "step": 2240 }, { "epoch": 0.17290806754221388, "grad_norm": 0.37981563806533813, "learning_rate": 0.002828, "loss": 3.0439, "step": 2304 }, { "epoch": 0.1777110694183865, "grad_norm": 0.45046043395996094, "learning_rate": 0.002828, "loss": 3.034, "step": 2368 }, { "epoch": 0.1825140712945591, "grad_norm": 0.30424681305885315, "learning_rate": 0.002828, "loss": 3.0408, "step": 2432 }, { "epoch": 0.1873170731707317, "grad_norm": 0.4374525845050812, "learning_rate": 0.002828, "loss": 3.0289, "step": 2496 }, { "epoch": 0.19212007504690432, "grad_norm": 0.4312361776828766, "learning_rate": 0.002828, "loss": 3.0252, "step": 2560 }, { "epoch": 0.19692307692307692, "grad_norm": 0.33109021186828613, "learning_rate": 0.002828, "loss": 3.0094, "step": 2624 }, { "epoch": 0.20172607879924953, "grad_norm": 0.4393901228904724, "learning_rate": 0.002828, "loss": 3.0021, "step": 2688 }, { "epoch": 0.20652908067542214, "grad_norm": 0.44241341948509216, "learning_rate": 0.002828, "loss": 3.0005, "step": 2752 }, { "epoch": 0.21133208255159475, "grad_norm": 0.36241745948791504, "learning_rate": 0.002828, "loss": 2.9939, "step": 2816 }, { "epoch": 0.21613508442776735, "grad_norm": 0.40780672430992126, "learning_rate": 0.002828, "loss": 2.9788, "step": 2880 }, { "epoch": 0.22093808630393996, "grad_norm": 0.3944590389728546, "learning_rate": 0.002828, "loss": 2.9854, "step": 2944 }, { "epoch": 0.22574108818011257, "grad_norm": 0.40449267625808716, "learning_rate": 0.002828, "loss": 2.9819, "step": 3008 }, { "epoch": 0.23054409005628518, "grad_norm": 0.37247487902641296, "learning_rate": 0.002828, "loss": 2.9827, "step": 3072 }, { "epoch": 0.23534709193245779, "grad_norm": 0.3732891082763672, "learning_rate": 0.002828, "loss": 2.9714, "step": 3136 }, { "epoch": 0.2401500938086304, "grad_norm": 0.3168690800666809, "learning_rate": 0.002828, "loss": 2.9649, "step": 3200 }, { "epoch": 0.244953095684803, "grad_norm": 0.32185083627700806, "learning_rate": 0.002828, "loss": 2.9607, "step": 3264 }, { "epoch": 0.2497560975609756, "grad_norm": 0.3293335437774658, "learning_rate": 0.002828, "loss": 2.9464, "step": 3328 }, { "epoch": 0.2545590994371482, "grad_norm": 0.39153945446014404, "learning_rate": 0.002828, "loss": 2.9513, "step": 3392 }, { "epoch": 0.2593621013133208, "grad_norm": 0.36884990334510803, "learning_rate": 0.002828, "loss": 2.9418, "step": 3456 }, { "epoch": 0.26416510318949343, "grad_norm": 0.39196011424064636, "learning_rate": 0.002828, "loss": 2.9407, "step": 3520 }, { "epoch": 0.26896810506566604, "grad_norm": 0.36011603474617004, "learning_rate": 0.002828, "loss": 2.9461, "step": 3584 }, { "epoch": 0.27377110694183865, "grad_norm": 0.3608081638813019, "learning_rate": 0.002828, "loss": 2.937, "step": 3648 }, { "epoch": 0.27857410881801126, "grad_norm": 0.3833774924278259, "learning_rate": 0.002828, "loss": 2.9254, "step": 3712 }, { "epoch": 0.28337711069418386, "grad_norm": 0.35225459933280945, "learning_rate": 0.002828, "loss": 2.9165, "step": 3776 }, { "epoch": 0.2881801125703565, "grad_norm": 0.39832860231399536, "learning_rate": 0.002828, "loss": 2.9259, "step": 3840 }, { "epoch": 0.2929831144465291, "grad_norm": 0.36834558844566345, "learning_rate": 0.002828, "loss": 2.9186, "step": 3904 }, { "epoch": 0.2977861163227017, "grad_norm": 0.3877101540565491, "learning_rate": 0.002828, "loss": 2.9107, "step": 3968 }, { "epoch": 0.3025891181988743, "grad_norm": 0.40037983655929565, "learning_rate": 0.002828, "loss": 2.9086, "step": 4032 }, { "epoch": 0.3073921200750469, "grad_norm": 0.35432353615760803, "learning_rate": 0.002828, "loss": 2.9039, "step": 4096 }, { "epoch": 0.3121951219512195, "grad_norm": 0.3740752935409546, "learning_rate": 0.002828, "loss": 2.8973, "step": 4160 }, { "epoch": 0.3169981238273921, "grad_norm": 0.3972289264202118, "learning_rate": 0.002828, "loss": 2.8868, "step": 4224 }, { "epoch": 0.3218011257035647, "grad_norm": 0.3818065822124481, "learning_rate": 0.002828, "loss": 2.8916, "step": 4288 }, { "epoch": 0.32660412757973734, "grad_norm": 0.31802886724472046, "learning_rate": 0.002828, "loss": 2.895, "step": 4352 }, { "epoch": 0.33140712945590994, "grad_norm": 0.3920498192310333, "learning_rate": 0.002828, "loss": 2.896, "step": 4416 }, { "epoch": 0.33621013133208255, "grad_norm": 0.42001602053642273, "learning_rate": 0.002828, "loss": 2.8757, "step": 4480 }, { "epoch": 0.34101313320825516, "grad_norm": 0.38037222623825073, "learning_rate": 0.002828, "loss": 2.8812, "step": 4544 }, { "epoch": 0.34581613508442777, "grad_norm": 0.6402748823165894, "learning_rate": 0.002828, "loss": 2.8741, "step": 4608 }, { "epoch": 0.3506191369606004, "grad_norm": 0.3265625536441803, "learning_rate": 0.002828, "loss": 2.8659, "step": 4672 }, { "epoch": 0.355422138836773, "grad_norm": 0.3389698565006256, "learning_rate": 0.002828, "loss": 2.863, "step": 4736 }, { "epoch": 0.3602251407129456, "grad_norm": 0.34922096133232117, "learning_rate": 0.002828, "loss": 2.8555, "step": 4800 }, { "epoch": 0.3650281425891182, "grad_norm": 0.370980441570282, "learning_rate": 0.002828, "loss": 2.8624, "step": 4864 }, { "epoch": 0.3698311444652908, "grad_norm": 0.3553221821784973, "learning_rate": 0.002828, "loss": 2.8573, "step": 4928 }, { "epoch": 0.3746341463414634, "grad_norm": 0.36796537041664124, "learning_rate": 0.002828, "loss": 2.8567, "step": 4992 }, { "epoch": 0.379437148217636, "grad_norm": 0.3615240752696991, "learning_rate": 0.002828, "loss": 2.8444, "step": 5056 }, { "epoch": 0.38424015009380863, "grad_norm": 0.4196101427078247, "learning_rate": 0.002828, "loss": 2.845, "step": 5120 }, { "epoch": 0.38904315196998124, "grad_norm": 0.334185928106308, "learning_rate": 0.002828, "loss": 2.8376, "step": 5184 }, { "epoch": 0.39384615384615385, "grad_norm": 0.30093756318092346, "learning_rate": 0.002828, "loss": 2.8302, "step": 5248 }, { "epoch": 0.39864915572232645, "grad_norm": 0.41615140438079834, "learning_rate": 0.002828, "loss": 2.8365, "step": 5312 }, { "epoch": 0.40345215759849906, "grad_norm": 0.38547712564468384, "learning_rate": 0.002828, "loss": 2.833, "step": 5376 }, { "epoch": 0.40825515947467167, "grad_norm": 0.336453378200531, "learning_rate": 0.002828, "loss": 2.8289, "step": 5440 }, { "epoch": 0.4130581613508443, "grad_norm": 0.33043336868286133, "learning_rate": 0.002828, "loss": 2.8154, "step": 5504 }, { "epoch": 0.4178611632270169, "grad_norm": 0.33151519298553467, "learning_rate": 0.002828, "loss": 2.8267, "step": 5568 }, { "epoch": 0.4226641651031895, "grad_norm": 0.29418498277664185, "learning_rate": 0.002828, "loss": 2.8167, "step": 5632 }, { "epoch": 0.4274671669793621, "grad_norm": 0.3507523536682129, "learning_rate": 0.002828, "loss": 2.8227, "step": 5696 }, { "epoch": 0.4322701688555347, "grad_norm": 0.36976736783981323, "learning_rate": 0.002828, "loss": 2.8087, "step": 5760 }, { "epoch": 0.4370731707317073, "grad_norm": 0.4142448604106903, "learning_rate": 0.002828, "loss": 2.8191, "step": 5824 }, { "epoch": 0.4418761726078799, "grad_norm": 0.3893688917160034, "learning_rate": 0.002828, "loss": 2.8032, "step": 5888 }, { "epoch": 0.44667917448405253, "grad_norm": 0.3025995194911957, "learning_rate": 0.002828, "loss": 2.8049, "step": 5952 }, { "epoch": 0.45148217636022514, "grad_norm": 0.3676198422908783, "learning_rate": 0.002828, "loss": 2.7976, "step": 6016 }, { "epoch": 0.45628517823639775, "grad_norm": 0.39022454619407654, "learning_rate": 0.002828, "loss": 2.796, "step": 6080 }, { "epoch": 0.46108818011257036, "grad_norm": 0.38986560702323914, "learning_rate": 0.002828, "loss": 2.791, "step": 6144 }, { "epoch": 0.46589118198874296, "grad_norm": 0.35879769921302795, "learning_rate": 0.002828, "loss": 2.7949, "step": 6208 }, { "epoch": 0.47069418386491557, "grad_norm": 0.44419315457344055, "learning_rate": 0.002828, "loss": 2.7862, "step": 6272 }, { "epoch": 0.4754971857410882, "grad_norm": 0.30884304642677307, "learning_rate": 0.002828, "loss": 2.7864, "step": 6336 }, { "epoch": 0.4803001876172608, "grad_norm": 0.542960524559021, "learning_rate": 0.002828, "loss": 2.7842, "step": 6400 }, { "epoch": 0.4851031894934334, "grad_norm": 0.39032405614852905, "learning_rate": 0.002828, "loss": 2.7798, "step": 6464 }, { "epoch": 0.489906191369606, "grad_norm": 0.3760650157928467, "learning_rate": 0.002828, "loss": 2.78, "step": 6528 }, { "epoch": 0.4947091932457786, "grad_norm": 0.33309632539749146, "learning_rate": 0.002828, "loss": 2.7741, "step": 6592 }, { "epoch": 0.4995121951219512, "grad_norm": 0.37640711665153503, "learning_rate": 0.002828, "loss": 2.7795, "step": 6656 }, { "epoch": 0.5043151969981239, "grad_norm": 0.36830273270606995, "learning_rate": 0.002828, "loss": 2.7596, "step": 6720 }, { "epoch": 0.5091181988742964, "grad_norm": 0.3751394748687744, "learning_rate": 0.002828, "loss": 2.761, "step": 6784 }, { "epoch": 0.5139212007504691, "grad_norm": 0.3472868800163269, "learning_rate": 0.002828, "loss": 2.7567, "step": 6848 }, { "epoch": 0.5187242026266417, "grad_norm": 0.3749905526638031, "learning_rate": 0.002828, "loss": 2.7654, "step": 6912 }, { "epoch": 0.5235272045028143, "grad_norm": 0.4672335982322693, "learning_rate": 0.002828, "loss": 2.7467, "step": 6976 }, { "epoch": 0.5283302063789869, "grad_norm": 0.30083194375038147, "learning_rate": 0.002828, "loss": 2.7596, "step": 7040 }, { "epoch": 0.5331332082551595, "grad_norm": 0.34232673048973083, "learning_rate": 0.002828, "loss": 2.7425, "step": 7104 }, { "epoch": 0.5379362101313321, "grad_norm": 0.42222973704338074, "learning_rate": 0.002828, "loss": 2.7486, "step": 7168 }, { "epoch": 0.5427392120075047, "grad_norm": 0.36008650064468384, "learning_rate": 0.002828, "loss": 2.7451, "step": 7232 }, { "epoch": 0.5475422138836773, "grad_norm": 0.34359127283096313, "learning_rate": 0.002828, "loss": 2.734, "step": 7296 }, { "epoch": 0.55234521575985, "grad_norm": 0.3953745663166046, "learning_rate": 0.002828, "loss": 2.7397, "step": 7360 }, { "epoch": 0.5571482176360225, "grad_norm": 0.36703094840049744, "learning_rate": 0.002828, "loss": 2.7313, "step": 7424 }, { "epoch": 0.5619512195121952, "grad_norm": 0.31787919998168945, "learning_rate": 0.002828, "loss": 2.7363, "step": 7488 }, { "epoch": 0.5667542213883677, "grad_norm": 0.31179967522621155, "learning_rate": 0.002828, "loss": 2.7236, "step": 7552 }, { "epoch": 0.5715572232645404, "grad_norm": 0.3990299105644226, "learning_rate": 0.002828, "loss": 2.7191, "step": 7616 }, { "epoch": 0.576360225140713, "grad_norm": 0.3776848018169403, "learning_rate": 0.002828, "loss": 2.7244, "step": 7680 }, { "epoch": 0.5811632270168856, "grad_norm": 0.36117562651634216, "learning_rate": 0.002828, "loss": 2.7131, "step": 7744 }, { "epoch": 0.5859662288930582, "grad_norm": 0.3219313323497772, "learning_rate": 0.002828, "loss": 2.7202, "step": 7808 }, { "epoch": 0.5907692307692308, "grad_norm": 0.4501495957374573, "learning_rate": 0.002828, "loss": 2.7115, "step": 7872 }, { "epoch": 0.5955722326454034, "grad_norm": 0.3939913809299469, "learning_rate": 0.002828, "loss": 2.7076, "step": 7936 }, { "epoch": 0.600375234521576, "grad_norm": 0.3244933485984802, "learning_rate": 0.002828, "loss": 2.7047, "step": 8000 }, { "epoch": 0.6051782363977486, "grad_norm": 0.3094891607761383, "learning_rate": 0.002828, "loss": 2.698, "step": 8064 }, { "epoch": 0.6099812382739213, "grad_norm": 0.3525580167770386, "learning_rate": 0.002828, "loss": 2.7056, "step": 8128 }, { "epoch": 0.6147842401500938, "grad_norm": 0.3058718144893646, "learning_rate": 0.002828, "loss": 2.6937, "step": 8192 }, { "epoch": 0.6195872420262665, "grad_norm": 0.31864726543426514, "learning_rate": 0.002828, "loss": 2.6935, "step": 8256 }, { "epoch": 0.624390243902439, "grad_norm": 0.3197256028652191, "learning_rate": 0.002828, "loss": 2.6981, "step": 8320 }, { "epoch": 0.6291932457786117, "grad_norm": 0.30954182147979736, "learning_rate": 0.002828, "loss": 2.705, "step": 8384 }, { "epoch": 0.6339962476547842, "grad_norm": 0.4144911468029022, "learning_rate": 0.002828, "loss": 2.6832, "step": 8448 }, { "epoch": 0.6387992495309569, "grad_norm": 0.34720951318740845, "learning_rate": 0.002828, "loss": 2.6858, "step": 8512 }, { "epoch": 0.6436022514071295, "grad_norm": 0.30545172095298767, "learning_rate": 0.002828, "loss": 2.6758, "step": 8576 }, { "epoch": 0.6484052532833021, "grad_norm": 0.3341416120529175, "learning_rate": 0.002828, "loss": 2.6673, "step": 8640 }, { "epoch": 0.6532082551594747, "grad_norm": 0.5191973447799683, "learning_rate": 0.002828, "loss": 2.6798, "step": 8704 }, { "epoch": 0.6580112570356473, "grad_norm": 0.44382575154304504, "learning_rate": 0.002828, "loss": 2.683, "step": 8768 }, { "epoch": 0.6628142589118199, "grad_norm": 0.45676809549331665, "learning_rate": 0.002828, "loss": 2.6731, "step": 8832 }, { "epoch": 0.6676172607879926, "grad_norm": 0.3542475700378418, "learning_rate": 0.002828, "loss": 2.6813, "step": 8896 }, { "epoch": 0.6724202626641651, "grad_norm": 0.3976110517978668, "learning_rate": 0.002828, "loss": 2.6714, "step": 8960 }, { "epoch": 0.6772232645403378, "grad_norm": 0.37194061279296875, "learning_rate": 0.002828, "loss": 2.6646, "step": 9024 }, { "epoch": 0.6820262664165103, "grad_norm": 0.4080849289894104, "learning_rate": 0.002828, "loss": 2.6638, "step": 9088 }, { "epoch": 0.686829268292683, "grad_norm": 0.3275296986103058, "learning_rate": 0.002828, "loss": 2.6643, "step": 9152 }, { "epoch": 0.6916322701688555, "grad_norm": 0.4300732910633087, "learning_rate": 0.002828, "loss": 2.6545, "step": 9216 }, { "epoch": 0.6964352720450282, "grad_norm": 0.528816282749176, "learning_rate": 0.002828, "loss": 2.6639, "step": 9280 }, { "epoch": 0.7012382739212007, "grad_norm": 0.39729437232017517, "learning_rate": 0.002828, "loss": 2.6669, "step": 9344 }, { "epoch": 0.7060412757973734, "grad_norm": 0.36177024245262146, "learning_rate": 0.002828, "loss": 2.6429, "step": 9408 }, { "epoch": 0.710844277673546, "grad_norm": 0.3488599956035614, "learning_rate": 0.002828, "loss": 2.6409, "step": 9472 }, { "epoch": 0.7156472795497186, "grad_norm": 0.361208438873291, "learning_rate": 0.002828, "loss": 2.6354, "step": 9536 }, { "epoch": 0.7204502814258912, "grad_norm": 0.3307696282863617, "learning_rate": 0.002828, "loss": 2.6398, "step": 9600 }, { "epoch": 0.7252532833020638, "grad_norm": 0.47409588098526, "learning_rate": 0.002828, "loss": 2.6492, "step": 9664 }, { "epoch": 0.7300562851782364, "grad_norm": 0.43482983112335205, "learning_rate": 0.002828, "loss": 2.6251, "step": 9728 }, { "epoch": 0.7348592870544091, "grad_norm": 0.43177512288093567, "learning_rate": 0.002828, "loss": 2.6277, "step": 9792 }, { "epoch": 0.7396622889305816, "grad_norm": 0.5830815434455872, "learning_rate": 0.002828, "loss": 2.6291, "step": 9856 }, { "epoch": 0.7444652908067543, "grad_norm": 0.42559024691581726, "learning_rate": 0.002828, "loss": 2.6248, "step": 9920 }, { "epoch": 0.7492682926829268, "grad_norm": 0.36572182178497314, "learning_rate": 0.002828, "loss": 2.6175, "step": 9984 }, { "epoch": 0.7540712945590995, "grad_norm": 0.42863738536834717, "learning_rate": 0.002828, "loss": 2.6249, "step": 10048 }, { "epoch": 0.758874296435272, "grad_norm": 0.34681934118270874, "learning_rate": 0.002828, "loss": 2.6207, "step": 10112 }, { "epoch": 0.7636772983114447, "grad_norm": 0.40332967042922974, "learning_rate": 0.002828, "loss": 2.6088, "step": 10176 }, { "epoch": 0.7684803001876173, "grad_norm": 0.47137463092803955, "learning_rate": 0.002828, "loss": 2.6109, "step": 10240 }, { "epoch": 0.7732833020637899, "grad_norm": 0.3324384093284607, "learning_rate": 0.002828, "loss": 2.6025, "step": 10304 }, { "epoch": 0.7780863039399625, "grad_norm": 0.3714103698730469, "learning_rate": 0.002828, "loss": 2.6038, "step": 10368 }, { "epoch": 0.7828893058161351, "grad_norm": 0.3684547543525696, "learning_rate": 0.002828, "loss": 2.6059, "step": 10432 }, { "epoch": 0.7876923076923077, "grad_norm": 0.3580617606639862, "learning_rate": 0.002828, "loss": 2.5972, "step": 10496 }, { "epoch": 0.7924953095684804, "grad_norm": 0.4132176339626312, "learning_rate": 0.002828, "loss": 2.5979, "step": 10560 }, { "epoch": 0.7972983114446529, "grad_norm": 0.4079800546169281, "learning_rate": 0.002828, "loss": 2.5923, "step": 10624 }, { "epoch": 0.8021013133208256, "grad_norm": 0.40170854330062866, "learning_rate": 0.002828, "loss": 2.5861, "step": 10688 }, { "epoch": 0.8069043151969981, "grad_norm": 0.4748755097389221, "learning_rate": 0.002828, "loss": 2.6031, "step": 10752 }, { "epoch": 0.8117073170731708, "grad_norm": 0.3806183338165283, "learning_rate": 0.002828, "loss": 2.5913, "step": 10816 }, { "epoch": 0.8165103189493433, "grad_norm": 0.32777532935142517, "learning_rate": 0.002828, "loss": 2.5937, "step": 10880 }, { "epoch": 0.821313320825516, "grad_norm": 0.4884773790836334, "learning_rate": 0.002828, "loss": 2.5851, "step": 10944 }, { "epoch": 0.8261163227016886, "grad_norm": 0.43175649642944336, "learning_rate": 0.002828, "loss": 2.5861, "step": 11008 }, { "epoch": 0.8309193245778612, "grad_norm": 0.44375500082969666, "learning_rate": 0.002828, "loss": 2.5823, "step": 11072 }, { "epoch": 0.8357223264540338, "grad_norm": 0.36503469944000244, "learning_rate": 0.002828, "loss": 2.579, "step": 11136 }, { "epoch": 0.8405253283302064, "grad_norm": 0.3493196368217468, "learning_rate": 0.002828, "loss": 2.5776, "step": 11200 }, { "epoch": 0.845328330206379, "grad_norm": 0.3593812584877014, "learning_rate": 0.002828, "loss": 2.5681, "step": 11264 }, { "epoch": 0.8501313320825516, "grad_norm": 0.4043927788734436, "learning_rate": 0.002828, "loss": 2.5766, "step": 11328 }, { "epoch": 0.8549343339587242, "grad_norm": 0.3805730938911438, "learning_rate": 0.002828, "loss": 2.5713, "step": 11392 }, { "epoch": 0.8597373358348969, "grad_norm": 0.40822461247444153, "learning_rate": 0.002828, "loss": 2.576, "step": 11456 }, { "epoch": 0.8645403377110694, "grad_norm": 0.3430253565311432, "learning_rate": 0.002828, "loss": 2.5669, "step": 11520 }, { "epoch": 0.8693433395872421, "grad_norm": 0.3665921688079834, "learning_rate": 0.002828, "loss": 2.5619, "step": 11584 }, { "epoch": 0.8741463414634146, "grad_norm": 0.3768637776374817, "learning_rate": 0.002828, "loss": 2.5617, "step": 11648 }, { "epoch": 0.8789493433395873, "grad_norm": 0.6709098219871521, "learning_rate": 0.002828, "loss": 2.5592, "step": 11712 }, { "epoch": 0.8837523452157598, "grad_norm": 0.37109729647636414, "learning_rate": 0.002828, "loss": 2.55, "step": 11776 }, { "epoch": 0.8885553470919325, "grad_norm": 0.35545358061790466, "learning_rate": 0.002828, "loss": 2.5601, "step": 11840 }, { "epoch": 0.8933583489681051, "grad_norm": 0.34493309259414673, "learning_rate": 0.002828, "loss": 2.5555, "step": 11904 }, { "epoch": 0.8981613508442777, "grad_norm": 0.35226738452911377, "learning_rate": 0.002828, "loss": 2.5464, "step": 11968 }, { "epoch": 0.9029643527204503, "grad_norm": 0.3626823425292969, "learning_rate": 0.002828, "loss": 2.5526, "step": 12032 }, { "epoch": 0.9077673545966229, "grad_norm": 0.4639281630516052, "learning_rate": 0.002828, "loss": 2.5535, "step": 12096 }, { "epoch": 0.9125703564727955, "grad_norm": 0.425073504447937, "learning_rate": 0.002828, "loss": 2.54, "step": 12160 }, { "epoch": 0.9173733583489682, "grad_norm": 0.4849206507205963, "learning_rate": 0.002828, "loss": 2.5415, "step": 12224 }, { "epoch": 0.9221763602251407, "grad_norm": 0.3517647385597229, "learning_rate": 0.002828, "loss": 2.5396, "step": 12288 }, { "epoch": 0.9269793621013134, "grad_norm": 0.4217440187931061, "learning_rate": 0.002828, "loss": 2.5353, "step": 12352 }, { "epoch": 0.9317823639774859, "grad_norm": 0.3862438499927521, "learning_rate": 0.002828, "loss": 2.532, "step": 12416 }, { "epoch": 0.9365853658536586, "grad_norm": 0.4026007056236267, "learning_rate": 0.002828, "loss": 2.5342, "step": 12480 }, { "epoch": 0.9413883677298311, "grad_norm": 0.3353049159049988, "learning_rate": 0.002828, "loss": 2.5303, "step": 12544 }, { "epoch": 0.9461913696060038, "grad_norm": 0.35357797145843506, "learning_rate": 0.002828, "loss": 2.5283, "step": 12608 }, { "epoch": 0.9509943714821764, "grad_norm": 0.35685861110687256, "learning_rate": 0.002828, "loss": 2.5331, "step": 12672 }, { "epoch": 0.955797373358349, "grad_norm": 0.36265361309051514, "learning_rate": 0.002828, "loss": 2.5306, "step": 12736 }, { "epoch": 0.9606003752345216, "grad_norm": 0.4119773805141449, "learning_rate": 0.002828, "loss": 2.5199, "step": 12800 }, { "epoch": 0.9654033771106942, "grad_norm": 0.3662680387496948, "learning_rate": 0.002828, "loss": 2.523, "step": 12864 }, { "epoch": 0.9702063789868668, "grad_norm": 0.3822716772556305, "learning_rate": 0.002828, "loss": 2.5186, "step": 12928 }, { "epoch": 0.9750093808630395, "grad_norm": 0.3412950038909912, "learning_rate": 0.002828, "loss": 2.5201, "step": 12992 }, { "epoch": 0.979812382739212, "grad_norm": 0.373353511095047, "learning_rate": 0.002828, "loss": 2.518, "step": 13056 }, { "epoch": 0.9846153846153847, "grad_norm": 10.112526893615723, "learning_rate": 0.002828, "loss": 2.5128, "step": 13120 }, { "epoch": 0.9894183864915572, "grad_norm": 0.36393383145332336, "learning_rate": 0.002828, "loss": 2.5249, "step": 13184 }, { "epoch": 0.9942213883677299, "grad_norm": 0.4176023006439209, "learning_rate": 0.002828, "loss": 2.5082, "step": 13248 }, { "epoch": 0.9990243902439024, "grad_norm": 0.4248984456062317, "learning_rate": 0.002828, "loss": 2.5106, "step": 13312 }, { "epoch": 1.003827392120075, "grad_norm": 0.3931824564933777, "learning_rate": 0.002828, "loss": 2.4797, "step": 13376 }, { "epoch": 1.0086303939962478, "grad_norm": 0.3742982745170593, "learning_rate": 0.002828, "loss": 2.4675, "step": 13440 }, { "epoch": 1.0134333958724202, "grad_norm": 0.4388613998889923, "learning_rate": 0.002828, "loss": 2.4746, "step": 13504 }, { "epoch": 1.0182363977485929, "grad_norm": 0.41458427906036377, "learning_rate": 0.002828, "loss": 2.4699, "step": 13568 }, { "epoch": 1.0230393996247655, "grad_norm": 0.3841855227947235, "learning_rate": 0.002828, "loss": 2.4675, "step": 13632 }, { "epoch": 1.027842401500938, "grad_norm": 0.43930500745773315, "learning_rate": 0.002828, "loss": 2.4725, "step": 13696 }, { "epoch": 1.0326454033771106, "grad_norm": 0.3687760531902313, "learning_rate": 0.002828, "loss": 2.4573, "step": 13760 }, { "epoch": 1.0374484052532833, "grad_norm": 0.3823833465576172, "learning_rate": 0.002828, "loss": 2.4592, "step": 13824 }, { "epoch": 1.042251407129456, "grad_norm": 0.40025222301483154, "learning_rate": 0.002828, "loss": 2.4713, "step": 13888 }, { "epoch": 1.0470544090056286, "grad_norm": 0.40790122747421265, "learning_rate": 0.002828, "loss": 2.4618, "step": 13952 }, { "epoch": 1.051857410881801, "grad_norm": 0.42718634009361267, "learning_rate": 0.002828, "loss": 2.4646, "step": 14016 }, { "epoch": 1.0566604127579737, "grad_norm": 0.3305782079696655, "learning_rate": 0.002828, "loss": 2.4563, "step": 14080 }, { "epoch": 1.0614634146341464, "grad_norm": 0.37126559019088745, "learning_rate": 0.002828, "loss": 2.4666, "step": 14144 }, { "epoch": 1.0662664165103188, "grad_norm": 0.414987176656723, "learning_rate": 0.002828, "loss": 2.4558, "step": 14208 }, { "epoch": 1.0710694183864915, "grad_norm": 0.45917075872421265, "learning_rate": 0.002828, "loss": 2.4604, "step": 14272 }, { "epoch": 1.0758724202626642, "grad_norm": 0.4362465739250183, "learning_rate": 0.002828, "loss": 2.4504, "step": 14336 }, { "epoch": 1.0806754221388368, "grad_norm": 0.40015289187431335, "learning_rate": 0.002828, "loss": 2.4501, "step": 14400 }, { "epoch": 1.0854784240150095, "grad_norm": 0.3781159818172455, "learning_rate": 0.002828, "loss": 2.4555, "step": 14464 }, { "epoch": 1.090281425891182, "grad_norm": 0.4165579676628113, "learning_rate": 0.002828, "loss": 2.4472, "step": 14528 }, { "epoch": 1.0950844277673546, "grad_norm": 0.4100767970085144, "learning_rate": 0.002828, "loss": 2.4545, "step": 14592 }, { "epoch": 1.0998874296435273, "grad_norm": 0.4211256504058838, "learning_rate": 0.002828, "loss": 2.4444, "step": 14656 }, { "epoch": 1.1046904315196997, "grad_norm": 0.390396386384964, "learning_rate": 0.002828, "loss": 2.4502, "step": 14720 }, { "epoch": 1.1094934333958724, "grad_norm": 0.3585355281829834, "learning_rate": 0.002828, "loss": 2.437, "step": 14784 }, { "epoch": 1.114296435272045, "grad_norm": 0.5148431062698364, "learning_rate": 0.002828, "loss": 2.4429, "step": 14848 }, { "epoch": 1.1190994371482177, "grad_norm": 0.44254639744758606, "learning_rate": 0.002828, "loss": 2.4393, "step": 14912 }, { "epoch": 1.1239024390243904, "grad_norm": 0.3710468113422394, "learning_rate": 0.002828, "loss": 2.4379, "step": 14976 }, { "epoch": 1.1287054409005628, "grad_norm": 0.41197285056114197, "learning_rate": 0.002828, "loss": 2.4387, "step": 15040 }, { "epoch": 1.1335084427767355, "grad_norm": 0.37512508034706116, "learning_rate": 0.002828, "loss": 2.4327, "step": 15104 }, { "epoch": 1.1383114446529081, "grad_norm": 0.4286038279533386, "learning_rate": 0.002828, "loss": 2.4314, "step": 15168 }, { "epoch": 1.1431144465290806, "grad_norm": 0.37446776032447815, "learning_rate": 0.002828, "loss": 2.4304, "step": 15232 }, { "epoch": 1.1479174484052532, "grad_norm": 0.4537597894668579, "learning_rate": 0.002828, "loss": 2.4239, "step": 15296 }, { "epoch": 1.152720450281426, "grad_norm": 0.36247050762176514, "learning_rate": 0.002828, "loss": 2.4284, "step": 15360 }, { "epoch": 1.1575234521575986, "grad_norm": 0.3772297501564026, "learning_rate": 0.002828, "loss": 2.4307, "step": 15424 }, { "epoch": 1.1623264540337712, "grad_norm": 0.3789200186729431, "learning_rate": 0.002828, "loss": 2.4261, "step": 15488 }, { "epoch": 1.1671294559099437, "grad_norm": 0.36870113015174866, "learning_rate": 0.002828, "loss": 2.4242, "step": 15552 }, { "epoch": 1.1719324577861163, "grad_norm": 0.37448298931121826, "learning_rate": 0.002828, "loss": 2.418, "step": 15616 }, { "epoch": 1.176735459662289, "grad_norm": 0.4384878873825073, "learning_rate": 0.002828, "loss": 2.4222, "step": 15680 }, { "epoch": 1.1815384615384614, "grad_norm": 0.37811148166656494, "learning_rate": 0.002828, "loss": 2.4187, "step": 15744 }, { "epoch": 1.186341463414634, "grad_norm": 0.4190385341644287, "learning_rate": 0.002828, "loss": 2.4221, "step": 15808 }, { "epoch": 1.1911444652908068, "grad_norm": 0.48885485529899597, "learning_rate": 0.002828, "loss": 2.4146, "step": 15872 }, { "epoch": 1.1959474671669794, "grad_norm": 0.42267611622810364, "learning_rate": 0.002828, "loss": 2.4245, "step": 15936 }, { "epoch": 1.200750469043152, "grad_norm": 0.3886626064777374, "learning_rate": 0.002828, "loss": 2.4283, "step": 16000 }, { "epoch": 1.2055534709193245, "grad_norm": 0.40871456265449524, "learning_rate": 0.002828, "loss": 2.4127, "step": 16064 }, { "epoch": 1.2103564727954972, "grad_norm": 0.46952739357948303, "learning_rate": 0.002828, "loss": 2.408, "step": 16128 }, { "epoch": 1.2151594746716698, "grad_norm": 0.41340023279190063, "learning_rate": 0.002828, "loss": 2.3977, "step": 16192 }, { "epoch": 1.2199624765478423, "grad_norm": 0.36176440119743347, "learning_rate": 0.002828, "loss": 2.405, "step": 16256 }, { "epoch": 1.224765478424015, "grad_norm": 0.4117899239063263, "learning_rate": 0.002828, "loss": 2.4011, "step": 16320 }, { "epoch": 1.2295684803001876, "grad_norm": 0.5039286613464355, "learning_rate": 0.002828, "loss": 2.4142, "step": 16384 }, { "epoch": 1.2343714821763603, "grad_norm": 0.3716677129268646, "learning_rate": 0.002828, "loss": 2.4098, "step": 16448 }, { "epoch": 1.239174484052533, "grad_norm": 0.42316168546676636, "learning_rate": 0.002828, "loss": 2.4004, "step": 16512 }, { "epoch": 1.2439774859287054, "grad_norm": 0.5081620216369629, "learning_rate": 0.002828, "loss": 2.3926, "step": 16576 }, { "epoch": 1.248780487804878, "grad_norm": 0.39409589767456055, "learning_rate": 0.002828, "loss": 2.3942, "step": 16640 }, { "epoch": 1.2535834896810507, "grad_norm": 0.38638824224472046, "learning_rate": 0.002828, "loss": 2.3936, "step": 16704 }, { "epoch": 1.2583864915572232, "grad_norm": 0.41918718814849854, "learning_rate": 0.002828, "loss": 2.3972, "step": 16768 }, { "epoch": 1.2631894934333958, "grad_norm": 0.3932395279407501, "learning_rate": 0.002828, "loss": 2.3983, "step": 16832 }, { "epoch": 1.2679924953095685, "grad_norm": 0.3787371814250946, "learning_rate": 0.002828, "loss": 2.3963, "step": 16896 }, { "epoch": 1.2727954971857411, "grad_norm": 0.40612953901290894, "learning_rate": 0.002828, "loss": 2.3825, "step": 16960 }, { "epoch": 1.2775984990619138, "grad_norm": 0.4243071675300598, "learning_rate": 0.002828, "loss": 2.3835, "step": 17024 }, { "epoch": 1.2824015009380862, "grad_norm": 0.4240303039550781, "learning_rate": 0.002828, "loss": 2.4023, "step": 17088 }, { "epoch": 1.287204502814259, "grad_norm": 0.4888259470462799, "learning_rate": 0.002828, "loss": 2.3918, "step": 17152 }, { "epoch": 1.2920075046904316, "grad_norm": 0.4678399860858917, "learning_rate": 0.002828, "loss": 2.389, "step": 17216 }, { "epoch": 1.296810506566604, "grad_norm": 0.38733649253845215, "learning_rate": 0.002828, "loss": 2.3898, "step": 17280 }, { "epoch": 1.3016135084427767, "grad_norm": 0.38587358593940735, "learning_rate": 0.002828, "loss": 2.3797, "step": 17344 }, { "epoch": 1.3064165103189493, "grad_norm": 0.39998751878738403, "learning_rate": 0.002828, "loss": 2.3907, "step": 17408 }, { "epoch": 1.311219512195122, "grad_norm": 0.36294978857040405, "learning_rate": 0.002828, "loss": 2.3822, "step": 17472 }, { "epoch": 1.3160225140712947, "grad_norm": 0.3924562633037567, "learning_rate": 0.002828, "loss": 2.3772, "step": 17536 }, { "epoch": 1.320825515947467, "grad_norm": 0.3837553560733795, "learning_rate": 0.002828, "loss": 2.3804, "step": 17600 }, { "epoch": 1.3256285178236398, "grad_norm": 0.38875913619995117, "learning_rate": 0.002828, "loss": 2.3747, "step": 17664 }, { "epoch": 1.3304315196998124, "grad_norm": 0.41738125681877136, "learning_rate": 0.002828, "loss": 2.3778, "step": 17728 }, { "epoch": 1.3352345215759849, "grad_norm": 0.3645491898059845, "learning_rate": 0.002828, "loss": 2.3731, "step": 17792 }, { "epoch": 1.3400375234521575, "grad_norm": 0.43829870223999023, "learning_rate": 0.002828, "loss": 2.3674, "step": 17856 }, { "epoch": 1.3448405253283302, "grad_norm": 0.3851640820503235, "learning_rate": 0.002828, "loss": 2.3753, "step": 17920 }, { "epoch": 1.3496435272045029, "grad_norm": 0.36147060990333557, "learning_rate": 0.002828, "loss": 2.3669, "step": 17984 }, { "epoch": 1.3544465290806755, "grad_norm": 0.42050638794898987, "learning_rate": 0.002828, "loss": 2.3693, "step": 18048 }, { "epoch": 1.359249530956848, "grad_norm": 0.3830699920654297, "learning_rate": 0.002828, "loss": 2.3668, "step": 18112 }, { "epoch": 1.3640525328330206, "grad_norm": 0.3830968737602234, "learning_rate": 0.002828, "loss": 2.3647, "step": 18176 }, { "epoch": 1.3688555347091933, "grad_norm": 0.3880060017108917, "learning_rate": 0.002828, "loss": 2.37, "step": 18240 }, { "epoch": 1.3736585365853657, "grad_norm": 0.45445796847343445, "learning_rate": 0.002828, "loss": 2.3594, "step": 18304 }, { "epoch": 1.3784615384615384, "grad_norm": 0.3750540316104889, "learning_rate": 0.002828, "loss": 2.3574, "step": 18368 }, { "epoch": 1.383264540337711, "grad_norm": 0.3783455193042755, "learning_rate": 0.002828, "loss": 2.3537, "step": 18432 }, { "epoch": 1.3880675422138837, "grad_norm": 0.40336528420448303, "learning_rate": 0.002828, "loss": 2.3672, "step": 18496 }, { "epoch": 1.3928705440900564, "grad_norm": 0.43220385909080505, "learning_rate": 0.002828, "loss": 2.3588, "step": 18560 }, { "epoch": 1.3976735459662288, "grad_norm": 0.4069630205631256, "learning_rate": 0.002828, "loss": 2.3619, "step": 18624 }, { "epoch": 1.4024765478424015, "grad_norm": 0.3866819441318512, "learning_rate": 0.002828, "loss": 2.3493, "step": 18688 }, { "epoch": 1.4072795497185742, "grad_norm": 0.3699668347835541, "learning_rate": 0.002828, "loss": 2.349, "step": 18752 }, { "epoch": 1.4120825515947466, "grad_norm": 0.377645879983902, "learning_rate": 0.002828, "loss": 2.3523, "step": 18816 }, { "epoch": 1.4168855534709193, "grad_norm": 0.36612892150878906, "learning_rate": 0.002828, "loss": 2.3542, "step": 18880 }, { "epoch": 1.421688555347092, "grad_norm": 0.385735422372818, "learning_rate": 0.002828, "loss": 2.3528, "step": 18944 }, { "epoch": 1.4264915572232646, "grad_norm": 0.4026818871498108, "learning_rate": 0.002828, "loss": 2.3478, "step": 19008 }, { "epoch": 1.4312945590994373, "grad_norm": 0.39212891459465027, "learning_rate": 0.002828, "loss": 2.348, "step": 19072 }, { "epoch": 1.4360975609756097, "grad_norm": 0.43533411622047424, "learning_rate": 0.002828, "loss": 2.3502, "step": 19136 }, { "epoch": 1.4409005628517824, "grad_norm": 0.4136466383934021, "learning_rate": 0.002828, "loss": 2.347, "step": 19200 }, { "epoch": 1.445703564727955, "grad_norm": 0.38349345326423645, "learning_rate": 0.002828, "loss": 2.3468, "step": 19264 }, { "epoch": 1.4505065666041275, "grad_norm": 0.42666760087013245, "learning_rate": 0.002828, "loss": 2.3306, "step": 19328 }, { "epoch": 1.4553095684803001, "grad_norm": 0.3926577866077423, "learning_rate": 0.002828, "loss": 2.3466, "step": 19392 }, { "epoch": 1.4601125703564728, "grad_norm": 0.3736414611339569, "learning_rate": 0.002828, "loss": 2.3425, "step": 19456 }, { "epoch": 1.4649155722326455, "grad_norm": 0.36343908309936523, "learning_rate": 0.002828, "loss": 2.3371, "step": 19520 }, { "epoch": 1.4697185741088181, "grad_norm": 0.380211740732193, "learning_rate": 0.002828, "loss": 2.34, "step": 19584 }, { "epoch": 1.4745215759849906, "grad_norm": 0.40481454133987427, "learning_rate": 0.002828, "loss": 2.332, "step": 19648 }, { "epoch": 1.4793245778611632, "grad_norm": 0.45368635654449463, "learning_rate": 0.002828, "loss": 2.3288, "step": 19712 }, { "epoch": 1.484127579737336, "grad_norm": 0.4029395580291748, "learning_rate": 0.002828, "loss": 2.3293, "step": 19776 }, { "epoch": 1.4889305816135083, "grad_norm": 0.3748946785926819, "learning_rate": 0.002828, "loss": 2.332, "step": 19840 }, { "epoch": 1.493733583489681, "grad_norm": 0.36640551686286926, "learning_rate": 0.002828, "loss": 2.3231, "step": 19904 }, { "epoch": 1.4985365853658537, "grad_norm": 0.4150533676147461, "learning_rate": 0.002828, "loss": 2.3284, "step": 19968 }, { "epoch": 1.5033395872420263, "grad_norm": 0.49730879068374634, "learning_rate": 0.002828, "loss": 2.3243, "step": 20032 }, { "epoch": 1.508142589118199, "grad_norm": 0.37675461173057556, "learning_rate": 0.002828, "loss": 2.3273, "step": 20096 }, { "epoch": 1.5129455909943714, "grad_norm": 0.3647516965866089, "learning_rate": 0.002828, "loss": 2.3308, "step": 20160 }, { "epoch": 1.517748592870544, "grad_norm": 1.2981253862380981, "learning_rate": 0.002828, "loss": 2.3265, "step": 20224 }, { "epoch": 1.5225515947467168, "grad_norm": 0.5044511556625366, "learning_rate": 0.002828, "loss": 2.3173, "step": 20288 }, { "epoch": 1.5273545966228892, "grad_norm": 0.3651883006095886, "learning_rate": 0.002828, "loss": 2.319, "step": 20352 }, { "epoch": 1.532157598499062, "grad_norm": 0.4419403076171875, "learning_rate": 0.002828, "loss": 2.3188, "step": 20416 }, { "epoch": 1.5369606003752345, "grad_norm": 0.38631224632263184, "learning_rate": 0.002828, "loss": 2.3189, "step": 20480 }, { "epoch": 1.5417636022514072, "grad_norm": 0.34725359082221985, "learning_rate": 0.002828, "loss": 2.3162, "step": 20544 }, { "epoch": 1.5465666041275798, "grad_norm": 0.3991786241531372, "learning_rate": 0.002828, "loss": 2.3154, "step": 20608 }, { "epoch": 1.5513696060037523, "grad_norm": 0.3595084846019745, "learning_rate": 0.002828, "loss": 2.3149, "step": 20672 }, { "epoch": 1.556172607879925, "grad_norm": 0.4021853804588318, "learning_rate": 0.002828, "loss": 2.3113, "step": 20736 }, { "epoch": 1.5609756097560976, "grad_norm": 0.3939075767993927, "learning_rate": 0.002828, "loss": 2.3187, "step": 20800 }, { "epoch": 1.56577861163227, "grad_norm": 0.3889540135860443, "learning_rate": 0.002828, "loss": 2.316, "step": 20864 }, { "epoch": 1.570581613508443, "grad_norm": 0.41366517543792725, "learning_rate": 0.002828, "loss": 2.3025, "step": 20928 }, { "epoch": 1.5753846153846154, "grad_norm": 0.37127187848091125, "learning_rate": 0.002828, "loss": 2.3049, "step": 20992 }, { "epoch": 1.580187617260788, "grad_norm": 0.4014946520328522, "learning_rate": 0.002828, "loss": 2.3025, "step": 21056 }, { "epoch": 1.5849906191369607, "grad_norm": 0.35794708132743835, "learning_rate": 0.002828, "loss": 2.3089, "step": 21120 }, { "epoch": 1.5897936210131332, "grad_norm": 0.3924767076969147, "learning_rate": 0.002828, "loss": 2.3042, "step": 21184 }, { "epoch": 1.5945966228893058, "grad_norm": 0.34789031744003296, "learning_rate": 0.002828, "loss": 2.3025, "step": 21248 }, { "epoch": 1.5993996247654785, "grad_norm": 0.37461933493614197, "learning_rate": 0.002828, "loss": 2.2977, "step": 21312 }, { "epoch": 1.604202626641651, "grad_norm": 0.40146076679229736, "learning_rate": 0.002828, "loss": 2.2997, "step": 21376 }, { "epoch": 1.6090056285178238, "grad_norm": 0.4080921411514282, "learning_rate": 0.002828, "loss": 2.3001, "step": 21440 }, { "epoch": 1.6138086303939962, "grad_norm": 0.40802744030952454, "learning_rate": 0.002828, "loss": 2.2964, "step": 21504 }, { "epoch": 1.618611632270169, "grad_norm": 0.420188307762146, "learning_rate": 0.002828, "loss": 2.2981, "step": 21568 }, { "epoch": 1.6234146341463416, "grad_norm": 0.40739214420318604, "learning_rate": 0.002828, "loss": 2.2994, "step": 21632 }, { "epoch": 1.628217636022514, "grad_norm": 0.41674676537513733, "learning_rate": 0.002828, "loss": 2.292, "step": 21696 }, { "epoch": 1.6330206378986867, "grad_norm": 0.41856762766838074, "learning_rate": 0.002828, "loss": 2.2941, "step": 21760 }, { "epoch": 1.6378236397748593, "grad_norm": 0.39763346314430237, "learning_rate": 0.002828, "loss": 2.2939, "step": 21824 }, { "epoch": 1.6426266416510318, "grad_norm": 0.3777034282684326, "learning_rate": 0.002828, "loss": 2.2995, "step": 21888 }, { "epoch": 1.6474296435272047, "grad_norm": 0.3617188036441803, "learning_rate": 0.002828, "loss": 2.2924, "step": 21952 }, { "epoch": 1.652232645403377, "grad_norm": 0.4504718482494354, "learning_rate": 0.002828, "loss": 2.2869, "step": 22016 }, { "epoch": 1.6570356472795498, "grad_norm": 0.37388357520103455, "learning_rate": 0.002828, "loss": 2.2973, "step": 22080 }, { "epoch": 1.6618386491557224, "grad_norm": 0.3807313144207001, "learning_rate": 0.002828, "loss": 2.2829, "step": 22144 }, { "epoch": 1.6666416510318949, "grad_norm": 0.4428509771823883, "learning_rate": 0.002828, "loss": 2.2933, "step": 22208 }, { "epoch": 1.6714446529080675, "grad_norm": 0.39028382301330566, "learning_rate": 0.002828, "loss": 2.2931, "step": 22272 }, { "epoch": 1.6762476547842402, "grad_norm": 0.482424259185791, "learning_rate": 0.002828, "loss": 2.2781, "step": 22336 }, { "epoch": 1.6810506566604126, "grad_norm": 0.39801299571990967, "learning_rate": 0.002828, "loss": 2.2722, "step": 22400 }, { "epoch": 1.6858536585365855, "grad_norm": 0.4351527988910675, "learning_rate": 0.002828, "loss": 2.2801, "step": 22464 }, { "epoch": 1.690656660412758, "grad_norm": 0.4509490430355072, "learning_rate": 0.002828, "loss": 2.2839, "step": 22528 }, { "epoch": 1.6954596622889306, "grad_norm": 0.35885152220726013, "learning_rate": 0.002828, "loss": 2.2783, "step": 22592 }, { "epoch": 1.7002626641651033, "grad_norm": 0.4146900177001953, "learning_rate": 0.002828, "loss": 2.2803, "step": 22656 }, { "epoch": 1.7050656660412757, "grad_norm": 0.40194573998451233, "learning_rate": 0.002828, "loss": 2.2807, "step": 22720 }, { "epoch": 1.7098686679174484, "grad_norm": 0.43570390343666077, "learning_rate": 0.002828, "loss": 2.2824, "step": 22784 }, { "epoch": 1.714671669793621, "grad_norm": 0.35558512806892395, "learning_rate": 0.002828, "loss": 2.2777, "step": 22848 }, { "epoch": 1.7194746716697935, "grad_norm": 0.3700902760028839, "learning_rate": 0.002828, "loss": 2.2802, "step": 22912 }, { "epoch": 1.7242776735459664, "grad_norm": 0.4287453591823578, "learning_rate": 0.002828, "loss": 2.267, "step": 22976 }, { "epoch": 1.7290806754221388, "grad_norm": 0.41843536496162415, "learning_rate": 0.002828, "loss": 2.2695, "step": 23040 }, { "epoch": 1.7338836772983115, "grad_norm": 0.3938317596912384, "learning_rate": 0.002828, "loss": 2.2765, "step": 23104 }, { "epoch": 1.7386866791744842, "grad_norm": 0.44625958800315857, "learning_rate": 0.002828, "loss": 2.2661, "step": 23168 }, { "epoch": 1.7434896810506566, "grad_norm": 0.4598078727722168, "learning_rate": 0.002828, "loss": 2.2565, "step": 23232 }, { "epoch": 1.7482926829268293, "grad_norm": 0.4126788377761841, "learning_rate": 0.002828, "loss": 2.2688, "step": 23296 }, { "epoch": 1.753095684803002, "grad_norm": 0.3801914155483246, "learning_rate": 0.002828, "loss": 2.2633, "step": 23360 }, { "epoch": 1.7578986866791744, "grad_norm": 0.4619985818862915, "learning_rate": 0.002828, "loss": 2.2666, "step": 23424 }, { "epoch": 1.7627016885553473, "grad_norm": 0.4068593680858612, "learning_rate": 0.002828, "loss": 2.2578, "step": 23488 }, { "epoch": 1.7675046904315197, "grad_norm": 0.36146870255470276, "learning_rate": 0.002828, "loss": 2.2641, "step": 23552 }, { "epoch": 1.7723076923076924, "grad_norm": 0.3995908200740814, "learning_rate": 0.002828, "loss": 2.2592, "step": 23616 }, { "epoch": 1.777110694183865, "grad_norm": 0.3970596492290497, "learning_rate": 0.002828, "loss": 2.2719, "step": 23680 }, { "epoch": 1.7819136960600375, "grad_norm": 0.4287073612213135, "learning_rate": 0.002828, "loss": 2.2566, "step": 23744 }, { "epoch": 1.7867166979362101, "grad_norm": 0.41250482201576233, "learning_rate": 0.002828, "loss": 2.2533, "step": 23808 }, { "epoch": 1.7915196998123828, "grad_norm": 0.411668062210083, "learning_rate": 0.002828, "loss": 2.2586, "step": 23872 }, { "epoch": 1.7963227016885552, "grad_norm": 0.4834740459918976, "learning_rate": 0.002828, "loss": 2.2488, "step": 23936 }, { "epoch": 1.8011257035647281, "grad_norm": 0.3624022603034973, "learning_rate": 0.002828, "loss": 2.2485, "step": 24000 }, { "epoch": 1.8059287054409006, "grad_norm": 0.36700454354286194, "learning_rate": 0.002828, "loss": 2.2516, "step": 24064 }, { "epoch": 1.8107317073170732, "grad_norm": 0.3666454255580902, "learning_rate": 0.002828, "loss": 2.2483, "step": 24128 }, { "epoch": 1.8155347091932459, "grad_norm": 0.4110506474971771, "learning_rate": 0.002828, "loss": 2.2539, "step": 24192 }, { "epoch": 1.8203377110694183, "grad_norm": 0.3604464530944824, "learning_rate": 0.002828, "loss": 2.2477, "step": 24256 }, { "epoch": 1.825140712945591, "grad_norm": 0.40807706117630005, "learning_rate": 0.002828, "loss": 2.2483, "step": 24320 }, { "epoch": 1.8299437148217637, "grad_norm": 0.3632533848285675, "learning_rate": 0.002828, "loss": 2.2496, "step": 24384 }, { "epoch": 1.834746716697936, "grad_norm": 0.38520562648773193, "learning_rate": 0.002828, "loss": 2.238, "step": 24448 }, { "epoch": 1.839549718574109, "grad_norm": 0.4228810966014862, "learning_rate": 0.002828, "loss": 2.2437, "step": 24512 }, { "epoch": 1.8443527204502814, "grad_norm": 0.4542325735092163, "learning_rate": 0.002828, "loss": 2.2392, "step": 24576 }, { "epoch": 1.849155722326454, "grad_norm": 0.37316882610321045, "learning_rate": 0.002828, "loss": 2.2414, "step": 24640 }, { "epoch": 1.8539587242026268, "grad_norm": 0.5505624413490295, "learning_rate": 0.002828, "loss": 2.2535, "step": 24704 }, { "epoch": 1.8587617260787992, "grad_norm": 0.4269484281539917, "learning_rate": 0.002828, "loss": 2.2403, "step": 24768 }, { "epoch": 1.8635647279549719, "grad_norm": 0.407760888338089, "learning_rate": 0.002828, "loss": 2.2399, "step": 24832 }, { "epoch": 1.8683677298311445, "grad_norm": 0.4192192554473877, "learning_rate": 0.002828, "loss": 2.2418, "step": 24896 }, { "epoch": 1.873170731707317, "grad_norm": 0.3924838602542877, "learning_rate": 0.002828, "loss": 2.2345, "step": 24960 }, { "epoch": 1.8779737335834898, "grad_norm": 0.3799656629562378, "learning_rate": 0.002828, "loss": 2.2281, "step": 25024 }, { "epoch": 1.8827767354596623, "grad_norm": 0.40570494532585144, "learning_rate": 0.002828, "loss": 2.2401, "step": 25088 }, { "epoch": 1.887579737335835, "grad_norm": 0.3898228704929352, "learning_rate": 0.002828, "loss": 2.2313, "step": 25152 }, { "epoch": 1.8923827392120076, "grad_norm": 0.393216073513031, "learning_rate": 0.002828, "loss": 2.2407, "step": 25216 }, { "epoch": 1.89718574108818, "grad_norm": 0.4247749149799347, "learning_rate": 0.002828, "loss": 2.2365, "step": 25280 }, { "epoch": 1.9019887429643527, "grad_norm": 0.4670035243034363, "learning_rate": 0.002828, "loss": 2.2345, "step": 25344 }, { "epoch": 1.9067917448405254, "grad_norm": 0.40336644649505615, "learning_rate": 0.002828, "loss": 2.2367, "step": 25408 }, { "epoch": 1.9115947467166978, "grad_norm": 0.48462921380996704, "learning_rate": 0.002828, "loss": 2.2239, "step": 25472 }, { "epoch": 1.9163977485928707, "grad_norm": 0.44047805666923523, "learning_rate": 0.002828, "loss": 2.2291, "step": 25536 }, { "epoch": 1.9212007504690432, "grad_norm": 0.4221409261226654, "learning_rate": 0.002828, "loss": 2.2331, "step": 25600 }, { "epoch": 1.9260037523452158, "grad_norm": 0.4272362291812897, "learning_rate": 0.002828, "loss": 2.2286, "step": 25664 }, { "epoch": 1.9308067542213885, "grad_norm": 0.4309645891189575, "learning_rate": 0.002828, "loss": 2.2208, "step": 25728 }, { "epoch": 1.935609756097561, "grad_norm": 0.4220867156982422, "learning_rate": 0.002828, "loss": 2.2257, "step": 25792 }, { "epoch": 1.9404127579737336, "grad_norm": 0.3765920102596283, "learning_rate": 0.002828, "loss": 2.2262, "step": 25856 }, { "epoch": 1.9452157598499062, "grad_norm": 0.44643986225128174, "learning_rate": 0.002828, "loss": 2.2225, "step": 25920 }, { "epoch": 1.9500187617260787, "grad_norm": 0.4022061824798584, "learning_rate": 0.002828, "loss": 2.2263, "step": 25984 }, { "epoch": 1.9548217636022516, "grad_norm": 0.3935778737068176, "learning_rate": 0.002828, "loss": 2.2187, "step": 26048 }, { "epoch": 1.959624765478424, "grad_norm": 0.3877500295639038, "learning_rate": 0.002828, "loss": 2.2231, "step": 26112 }, { "epoch": 1.9644277673545967, "grad_norm": 0.3891729712486267, "learning_rate": 0.002828, "loss": 2.219, "step": 26176 }, { "epoch": 1.9692307692307693, "grad_norm": 0.3616099953651428, "learning_rate": 0.002828, "loss": 2.2221, "step": 26240 }, { "epoch": 1.9740337711069418, "grad_norm": 0.3855280578136444, "learning_rate": 0.002828, "loss": 2.2288, "step": 26304 }, { "epoch": 1.9788367729831144, "grad_norm": 0.44039493799209595, "learning_rate": 0.002828, "loss": 2.2248, "step": 26368 }, { "epoch": 1.983639774859287, "grad_norm": 0.37217262387275696, "learning_rate": 0.002828, "loss": 2.2201, "step": 26432 }, { "epoch": 1.9884427767354595, "grad_norm": 0.3942553997039795, "learning_rate": 0.002828, "loss": 2.2087, "step": 26496 }, { "epoch": 1.9932457786116324, "grad_norm": 0.3975297808647156, "learning_rate": 0.002828, "loss": 2.2037, "step": 26560 }, { "epoch": 1.9980487804878049, "grad_norm": 0.39197394251823425, "learning_rate": 0.002828, "loss": 2.2114, "step": 26624 }, { "epoch": 2.0028517823639773, "grad_norm": 0.38722801208496094, "learning_rate": 0.002828, "loss": 2.1951, "step": 26688 }, { "epoch": 2.00765478424015, "grad_norm": 0.38619640469551086, "learning_rate": 0.002828, "loss": 2.1736, "step": 26752 }, { "epoch": 2.0124577861163226, "grad_norm": 0.49529945850372314, "learning_rate": 0.002828, "loss": 2.1734, "step": 26816 }, { "epoch": 2.0172607879924955, "grad_norm": 0.4199656844139099, "learning_rate": 0.002828, "loss": 2.1684, "step": 26880 }, { "epoch": 2.022063789868668, "grad_norm": 0.45820868015289307, "learning_rate": 0.002828, "loss": 2.1861, "step": 26944 }, { "epoch": 2.0268667917448404, "grad_norm": 0.4006725251674652, "learning_rate": 0.002828, "loss": 2.1743, "step": 27008 }, { "epoch": 2.0316697936210133, "grad_norm": 0.4596467614173889, "learning_rate": 0.002828, "loss": 2.1734, "step": 27072 }, { "epoch": 2.0364727954971857, "grad_norm": 0.38660213351249695, "learning_rate": 0.002828, "loss": 2.1673, "step": 27136 }, { "epoch": 2.041275797373358, "grad_norm": 0.44082361459732056, "learning_rate": 0.002828, "loss": 2.1747, "step": 27200 }, { "epoch": 2.046078799249531, "grad_norm": 0.3886605203151703, "learning_rate": 0.002828, "loss": 2.1771, "step": 27264 }, { "epoch": 2.0508818011257035, "grad_norm": 0.41386017203330994, "learning_rate": 0.002828, "loss": 2.1734, "step": 27328 }, { "epoch": 2.055684803001876, "grad_norm": 0.411478191614151, "learning_rate": 0.002828, "loss": 2.1682, "step": 27392 }, { "epoch": 2.060487804878049, "grad_norm": 0.47288912534713745, "learning_rate": 0.002828, "loss": 2.1703, "step": 27456 }, { "epoch": 2.0652908067542213, "grad_norm": 0.36384883522987366, "learning_rate": 0.002828, "loss": 2.1678, "step": 27520 }, { "epoch": 2.070093808630394, "grad_norm": 0.40636852383613586, "learning_rate": 0.002828, "loss": 2.1671, "step": 27584 }, { "epoch": 2.0748968105065666, "grad_norm": 0.4425170421600342, "learning_rate": 0.002828, "loss": 2.1727, "step": 27648 }, { "epoch": 2.079699812382739, "grad_norm": 0.48468896746635437, "learning_rate": 0.002828, "loss": 2.1678, "step": 27712 }, { "epoch": 2.084502814258912, "grad_norm": 0.40420570969581604, "learning_rate": 0.002828, "loss": 2.1763, "step": 27776 }, { "epoch": 2.0893058161350844, "grad_norm": 0.44314709305763245, "learning_rate": 0.002828, "loss": 2.166, "step": 27840 }, { "epoch": 2.0941088180112573, "grad_norm": 0.5187743306159973, "learning_rate": 0.002828, "loss": 2.1702, "step": 27904 }, { "epoch": 2.0989118198874297, "grad_norm": 0.4796048104763031, "learning_rate": 0.002828, "loss": 2.1618, "step": 27968 }, { "epoch": 2.103714821763602, "grad_norm": 0.43605130910873413, "learning_rate": 0.002828, "loss": 2.1622, "step": 28032 }, { "epoch": 2.108517823639775, "grad_norm": 0.4523628056049347, "learning_rate": 0.002828, "loss": 2.1633, "step": 28096 }, { "epoch": 2.1133208255159475, "grad_norm": 0.4183247983455658, "learning_rate": 0.002828, "loss": 2.1612, "step": 28160 }, { "epoch": 2.11812382739212, "grad_norm": 0.5113268494606018, "learning_rate": 0.002828, "loss": 2.1591, "step": 28224 }, { "epoch": 2.122926829268293, "grad_norm": 0.40837016701698303, "learning_rate": 0.002828, "loss": 2.1616, "step": 28288 }, { "epoch": 2.1277298311444652, "grad_norm": 0.40093889832496643, "learning_rate": 0.002828, "loss": 2.1672, "step": 28352 }, { "epoch": 2.1325328330206377, "grad_norm": 0.3988894820213318, "learning_rate": 0.002828, "loss": 2.1638, "step": 28416 }, { "epoch": 2.1373358348968106, "grad_norm": 0.42024731636047363, "learning_rate": 0.002828, "loss": 2.1544, "step": 28480 }, { "epoch": 2.142138836772983, "grad_norm": 0.38691264390945435, "learning_rate": 0.002828, "loss": 2.1649, "step": 28544 }, { "epoch": 2.146941838649156, "grad_norm": 0.41956332325935364, "learning_rate": 0.002828, "loss": 2.1586, "step": 28608 }, { "epoch": 2.1517448405253283, "grad_norm": 0.4035188555717468, "learning_rate": 0.002828, "loss": 2.165, "step": 28672 }, { "epoch": 2.1565478424015008, "grad_norm": 0.35282230377197266, "learning_rate": 0.002828, "loss": 2.145, "step": 28736 }, { "epoch": 2.1613508442776737, "grad_norm": 0.43618568778038025, "learning_rate": 0.002828, "loss": 2.1548, "step": 28800 }, { "epoch": 2.166153846153846, "grad_norm": 0.4310976564884186, "learning_rate": 0.002828, "loss": 2.1476, "step": 28864 }, { "epoch": 2.170956848030019, "grad_norm": 0.4475420415401459, "learning_rate": 0.002828, "loss": 2.1556, "step": 28928 }, { "epoch": 2.1757598499061914, "grad_norm": 0.4384845197200775, "learning_rate": 0.002828, "loss": 2.154, "step": 28992 }, { "epoch": 2.180562851782364, "grad_norm": 0.40141811966896057, "learning_rate": 0.002828, "loss": 2.1534, "step": 29056 }, { "epoch": 2.1853658536585368, "grad_norm": 0.3754780888557434, "learning_rate": 0.002828, "loss": 2.1488, "step": 29120 }, { "epoch": 2.190168855534709, "grad_norm": 0.40471306443214417, "learning_rate": 0.002828, "loss": 2.1514, "step": 29184 }, { "epoch": 2.1949718574108816, "grad_norm": 1.464024543762207, "learning_rate": 0.002828, "loss": 2.1525, "step": 29248 }, { "epoch": 2.1997748592870545, "grad_norm": 0.3818819522857666, "learning_rate": 0.002828, "loss": 2.1484, "step": 29312 }, { "epoch": 2.204577861163227, "grad_norm": 0.3688436448574066, "learning_rate": 0.002828, "loss": 2.1451, "step": 29376 }, { "epoch": 2.2093808630393994, "grad_norm": 0.4367921054363251, "learning_rate": 0.002828, "loss": 2.147, "step": 29440 }, { "epoch": 2.2141838649155723, "grad_norm": 0.3566763401031494, "learning_rate": 0.002828, "loss": 2.1476, "step": 29504 }, { "epoch": 2.2189868667917447, "grad_norm": 0.4481133222579956, "learning_rate": 0.002828, "loss": 2.1431, "step": 29568 }, { "epoch": 2.2237898686679176, "grad_norm": 0.44622039794921875, "learning_rate": 0.002828, "loss": 2.148, "step": 29632 }, { "epoch": 2.22859287054409, "grad_norm": 0.4857657253742218, "learning_rate": 0.002828, "loss": 2.1383, "step": 29696 }, { "epoch": 2.2333958724202625, "grad_norm": 0.41923773288726807, "learning_rate": 0.002828, "loss": 2.1443, "step": 29760 }, { "epoch": 2.2381988742964354, "grad_norm": 0.4176802933216095, "learning_rate": 0.002828, "loss": 2.1413, "step": 29824 }, { "epoch": 2.243001876172608, "grad_norm": 0.4086935520172119, "learning_rate": 0.002828, "loss": 2.1349, "step": 29888 }, { "epoch": 2.2478048780487807, "grad_norm": 0.40138566493988037, "learning_rate": 0.002828, "loss": 2.1481, "step": 29952 }, { "epoch": 2.252607879924953, "grad_norm": 0.393996000289917, "learning_rate": 0.002828, "loss": 2.1435, "step": 30016 }, { "epoch": 2.2574108818011256, "grad_norm": 0.3962005078792572, "learning_rate": 0.002828, "loss": 2.1345, "step": 30080 }, { "epoch": 2.2622138836772985, "grad_norm": 0.41648438572883606, "learning_rate": 0.002828, "loss": 2.1469, "step": 30144 }, { "epoch": 2.267016885553471, "grad_norm": 0.3810112774372101, "learning_rate": 0.002828, "loss": 2.1365, "step": 30208 }, { "epoch": 2.2718198874296434, "grad_norm": 0.4520975649356842, "learning_rate": 0.002828, "loss": 2.1419, "step": 30272 }, { "epoch": 2.2766228893058162, "grad_norm": 0.4406943917274475, "learning_rate": 0.002828, "loss": 2.1363, "step": 30336 }, { "epoch": 2.2814258911819887, "grad_norm": 0.4186633825302124, "learning_rate": 0.002828, "loss": 2.1406, "step": 30400 }, { "epoch": 2.286228893058161, "grad_norm": 0.6210718154907227, "learning_rate": 0.002828, "loss": 2.1333, "step": 30464 }, { "epoch": 2.291031894934334, "grad_norm": 0.4192369282245636, "learning_rate": 0.002828, "loss": 2.1396, "step": 30528 }, { "epoch": 2.2958348968105065, "grad_norm": 0.4297274053096771, "learning_rate": 0.002828, "loss": 2.1304, "step": 30592 }, { "epoch": 2.3006378986866793, "grad_norm": 0.4519890546798706, "learning_rate": 0.002828, "loss": 2.1356, "step": 30656 }, { "epoch": 2.305440900562852, "grad_norm": 0.3641507625579834, "learning_rate": 0.002828, "loss": 2.1211, "step": 30720 }, { "epoch": 2.310243902439024, "grad_norm": 0.4051010310649872, "learning_rate": 0.002828, "loss": 2.1318, "step": 30784 }, { "epoch": 2.315046904315197, "grad_norm": 0.43515390157699585, "learning_rate": 0.002828, "loss": 2.1277, "step": 30848 }, { "epoch": 2.3198499061913695, "grad_norm": 0.4015902280807495, "learning_rate": 0.002828, "loss": 2.1343, "step": 30912 }, { "epoch": 2.3246529080675424, "grad_norm": 0.41404440999031067, "learning_rate": 0.002828, "loss": 2.13, "step": 30976 }, { "epoch": 2.329455909943715, "grad_norm": 0.3820297122001648, "learning_rate": 0.002828, "loss": 2.1323, "step": 31040 }, { "epoch": 2.3342589118198873, "grad_norm": 0.4260556697845459, "learning_rate": 0.002828, "loss": 2.1355, "step": 31104 }, { "epoch": 2.33906191369606, "grad_norm": 0.38206708431243896, "learning_rate": 0.002828, "loss": 2.133, "step": 31168 }, { "epoch": 2.3438649155722326, "grad_norm": 0.3741287291049957, "learning_rate": 0.002828, "loss": 2.1312, "step": 31232 }, { "epoch": 2.348667917448405, "grad_norm": 0.3969305157661438, "learning_rate": 0.002828, "loss": 2.1328, "step": 31296 }, { "epoch": 2.353470919324578, "grad_norm": 0.3579777479171753, "learning_rate": 0.002828, "loss": 2.1292, "step": 31360 }, { "epoch": 2.3582739212007504, "grad_norm": 0.40797194838523865, "learning_rate": 0.002828, "loss": 2.1268, "step": 31424 }, { "epoch": 2.363076923076923, "grad_norm": 0.4283476173877716, "learning_rate": 0.002828, "loss": 2.1298, "step": 31488 }, { "epoch": 2.3678799249530957, "grad_norm": 0.4102086126804352, "learning_rate": 0.002828, "loss": 2.1269, "step": 31552 }, { "epoch": 2.372682926829268, "grad_norm": 0.47719141840934753, "learning_rate": 0.002828, "loss": 2.1286, "step": 31616 }, { "epoch": 2.377485928705441, "grad_norm": 0.4112071692943573, "learning_rate": 0.002828, "loss": 2.1385, "step": 31680 }, { "epoch": 2.3822889305816135, "grad_norm": 0.3762068450450897, "learning_rate": 0.002828, "loss": 2.1208, "step": 31744 }, { "epoch": 2.387091932457786, "grad_norm": 0.4260576665401459, "learning_rate": 0.002828, "loss": 2.1238, "step": 31808 }, { "epoch": 2.391894934333959, "grad_norm": 0.4393415153026581, "learning_rate": 0.002828, "loss": 2.121, "step": 31872 }, { "epoch": 2.3966979362101313, "grad_norm": 0.36091846227645874, "learning_rate": 0.002828, "loss": 2.1181, "step": 31936 }, { "epoch": 2.401500938086304, "grad_norm": 0.3533402681350708, "learning_rate": 0.002828, "loss": 2.1184, "step": 32000 }, { "epoch": 2.4063039399624766, "grad_norm": 0.3956640362739563, "learning_rate": 0.002828, "loss": 2.1242, "step": 32064 }, { "epoch": 2.411106941838649, "grad_norm": 0.40656957030296326, "learning_rate": 0.002828, "loss": 2.1145, "step": 32128 }, { "epoch": 2.415909943714822, "grad_norm": 0.3594007194042206, "learning_rate": 0.002828, "loss": 2.1179, "step": 32192 }, { "epoch": 2.4207129455909944, "grad_norm": 0.4054548740386963, "learning_rate": 0.002828, "loss": 2.1081, "step": 32256 }, { "epoch": 2.425515947467167, "grad_norm": 0.39665716886520386, "learning_rate": 0.002828, "loss": 2.1192, "step": 32320 }, { "epoch": 2.4303189493433397, "grad_norm": 0.39287346601486206, "learning_rate": 0.002828, "loss": 2.1178, "step": 32384 }, { "epoch": 2.435121951219512, "grad_norm": 0.4614468514919281, "learning_rate": 0.002828, "loss": 2.1123, "step": 32448 }, { "epoch": 2.4399249530956846, "grad_norm": 0.36579185724258423, "learning_rate": 0.002828, "loss": 2.1149, "step": 32512 }, { "epoch": 2.4447279549718575, "grad_norm": 0.41442158818244934, "learning_rate": 0.002828, "loss": 2.1095, "step": 32576 }, { "epoch": 2.44953095684803, "grad_norm": 0.4049338102340698, "learning_rate": 0.002828, "loss": 2.1155, "step": 32640 }, { "epoch": 2.454333958724203, "grad_norm": 0.3908383250236511, "learning_rate": 0.002828, "loss": 2.1099, "step": 32704 }, { "epoch": 2.4591369606003752, "grad_norm": 0.4169704020023346, "learning_rate": 0.002828, "loss": 2.1162, "step": 32768 }, { "epoch": 2.4639399624765477, "grad_norm": 0.3969012498855591, "learning_rate": 0.002828, "loss": 2.1074, "step": 32832 }, { "epoch": 2.4687429643527206, "grad_norm": 0.40591520071029663, "learning_rate": 0.002828, "loss": 2.1114, "step": 32896 }, { "epoch": 2.473545966228893, "grad_norm": 0.44612857699394226, "learning_rate": 0.002828, "loss": 2.1111, "step": 32960 }, { "epoch": 2.478348968105066, "grad_norm": 0.38000956177711487, "learning_rate": 0.002828, "loss": 2.1076, "step": 33024 }, { "epoch": 2.4831519699812383, "grad_norm": 0.4089069068431854, "learning_rate": 0.002828, "loss": 2.1048, "step": 33088 }, { "epoch": 2.4879549718574108, "grad_norm": 0.38385865092277527, "learning_rate": 0.002828, "loss": 2.1078, "step": 33152 }, { "epoch": 2.4927579737335837, "grad_norm": 0.40285399556159973, "learning_rate": 0.002828, "loss": 2.106, "step": 33216 }, { "epoch": 2.497560975609756, "grad_norm": 0.43141239881515503, "learning_rate": 0.002828, "loss": 2.1022, "step": 33280 }, { "epoch": 2.5023639774859285, "grad_norm": 0.43552160263061523, "learning_rate": 0.002828, "loss": 2.1015, "step": 33344 }, { "epoch": 2.5071669793621014, "grad_norm": 0.41757678985595703, "learning_rate": 0.002828, "loss": 2.106, "step": 33408 }, { "epoch": 2.511969981238274, "grad_norm": 0.3721197247505188, "learning_rate": 0.002828, "loss": 2.108, "step": 33472 }, { "epoch": 2.5167729831144463, "grad_norm": 0.4426725208759308, "learning_rate": 0.002828, "loss": 2.11, "step": 33536 }, { "epoch": 2.521575984990619, "grad_norm": 0.37126022577285767, "learning_rate": 0.002828, "loss": 2.0907, "step": 33600 }, { "epoch": 2.5263789868667916, "grad_norm": 0.42241954803466797, "learning_rate": 0.002828, "loss": 2.099, "step": 33664 }, { "epoch": 2.5311819887429645, "grad_norm": 0.40958234667778015, "learning_rate": 0.002828, "loss": 2.1024, "step": 33728 }, { "epoch": 2.535984990619137, "grad_norm": 0.36936697363853455, "learning_rate": 0.002828, "loss": 2.0945, "step": 33792 }, { "epoch": 2.54078799249531, "grad_norm": 0.44610148668289185, "learning_rate": 0.002828, "loss": 2.1003, "step": 33856 }, { "epoch": 2.5455909943714823, "grad_norm": 0.5091739296913147, "learning_rate": 0.002828, "loss": 2.1024, "step": 33920 }, { "epoch": 2.5503939962476547, "grad_norm": 0.40063703060150146, "learning_rate": 0.002828, "loss": 2.094, "step": 33984 }, { "epoch": 2.5551969981238276, "grad_norm": 0.41543325781822205, "learning_rate": 0.002828, "loss": 2.1005, "step": 34048 }, { "epoch": 2.56, "grad_norm": 0.38303419947624207, "learning_rate": 0.002828, "loss": 2.0995, "step": 34112 }, { "epoch": 2.5648030018761725, "grad_norm": 0.3361968696117401, "learning_rate": 0.002828, "loss": 2.0978, "step": 34176 }, { "epoch": 2.5696060037523454, "grad_norm": 0.4174779951572418, "learning_rate": 0.002828, "loss": 2.0965, "step": 34240 }, { "epoch": 2.574409005628518, "grad_norm": 0.3994362950325012, "learning_rate": 0.002828, "loss": 2.0917, "step": 34304 }, { "epoch": 2.5792120075046903, "grad_norm": 0.41613155603408813, "learning_rate": 0.002828, "loss": 2.0848, "step": 34368 }, { "epoch": 2.584015009380863, "grad_norm": 0.4391821324825287, "learning_rate": 0.002828, "loss": 2.0962, "step": 34432 }, { "epoch": 2.5888180112570356, "grad_norm": 0.3478713035583496, "learning_rate": 0.002828, "loss": 2.0881, "step": 34496 }, { "epoch": 2.593621013133208, "grad_norm": 0.4065977931022644, "learning_rate": 0.002828, "loss": 2.092, "step": 34560 }, { "epoch": 2.598424015009381, "grad_norm": 0.4749963581562042, "learning_rate": 0.002828, "loss": 2.0899, "step": 34624 }, { "epoch": 2.6032270168855534, "grad_norm": 0.4545346796512604, "learning_rate": 0.002828, "loss": 2.097, "step": 34688 }, { "epoch": 2.6080300187617262, "grad_norm": 0.40314188599586487, "learning_rate": 0.002828, "loss": 2.0837, "step": 34752 }, { "epoch": 2.6128330206378987, "grad_norm": 0.3616882562637329, "learning_rate": 0.002828, "loss": 2.0867, "step": 34816 }, { "epoch": 2.6176360225140716, "grad_norm": 0.37748977541923523, "learning_rate": 0.002828, "loss": 2.0899, "step": 34880 }, { "epoch": 2.622439024390244, "grad_norm": 0.43397149443626404, "learning_rate": 0.002828, "loss": 2.0831, "step": 34944 }, { "epoch": 2.6272420262664165, "grad_norm": 0.41302821040153503, "learning_rate": 0.002828, "loss": 2.0822, "step": 35008 }, { "epoch": 2.6320450281425893, "grad_norm": 0.47421374917030334, "learning_rate": 0.002828, "loss": 2.0803, "step": 35072 }, { "epoch": 2.636848030018762, "grad_norm": 0.4405677020549774, "learning_rate": 0.002828, "loss": 2.0802, "step": 35136 }, { "epoch": 2.641651031894934, "grad_norm": 0.448106974363327, "learning_rate": 0.002828, "loss": 2.0778, "step": 35200 }, { "epoch": 2.646454033771107, "grad_norm": 0.4040149748325348, "learning_rate": 0.002828, "loss": 2.0831, "step": 35264 }, { "epoch": 2.6512570356472795, "grad_norm": 0.3847975730895996, "learning_rate": 0.002828, "loss": 2.08, "step": 35328 }, { "epoch": 2.656060037523452, "grad_norm": 0.4193227291107178, "learning_rate": 0.002828, "loss": 2.0845, "step": 35392 }, { "epoch": 2.660863039399625, "grad_norm": 0.4024184048175812, "learning_rate": 0.002828, "loss": 2.0831, "step": 35456 }, { "epoch": 2.6656660412757973, "grad_norm": 0.3550715744495392, "learning_rate": 0.002828, "loss": 2.077, "step": 35520 }, { "epoch": 2.6704690431519698, "grad_norm": 0.41777607798576355, "learning_rate": 0.002828, "loss": 2.0735, "step": 35584 }, { "epoch": 2.6752720450281426, "grad_norm": 0.4474610388278961, "learning_rate": 0.002828, "loss": 2.0759, "step": 35648 }, { "epoch": 2.680075046904315, "grad_norm": 0.41162994503974915, "learning_rate": 0.002828, "loss": 2.0713, "step": 35712 }, { "epoch": 2.684878048780488, "grad_norm": 0.4786086976528168, "learning_rate": 0.002828, "loss": 2.0813, "step": 35776 }, { "epoch": 2.6896810506566604, "grad_norm": 0.3829205334186554, "learning_rate": 0.002828, "loss": 2.0697, "step": 35840 }, { "epoch": 2.6944840525328333, "grad_norm": 0.3732524514198303, "learning_rate": 0.002828, "loss": 2.077, "step": 35904 }, { "epoch": 2.6992870544090057, "grad_norm": 0.40800660848617554, "learning_rate": 0.002828, "loss": 2.0806, "step": 35968 }, { "epoch": 2.704090056285178, "grad_norm": 0.4223944842815399, "learning_rate": 0.002828, "loss": 2.0789, "step": 36032 }, { "epoch": 2.708893058161351, "grad_norm": 0.4355471134185791, "learning_rate": 0.002828, "loss": 2.0814, "step": 36096 }, { "epoch": 2.7136960600375235, "grad_norm": 0.4148624539375305, "learning_rate": 0.002828, "loss": 2.0716, "step": 36160 }, { "epoch": 2.718499061913696, "grad_norm": 0.399810254573822, "learning_rate": 0.002828, "loss": 2.08, "step": 36224 }, { "epoch": 2.723302063789869, "grad_norm": 0.47194531559944153, "learning_rate": 0.002828, "loss": 2.0709, "step": 36288 }, { "epoch": 2.7281050656660413, "grad_norm": 0.4752259850502014, "learning_rate": 0.002828, "loss": 2.0634, "step": 36352 }, { "epoch": 2.7329080675422137, "grad_norm": 0.38671767711639404, "learning_rate": 0.002828, "loss": 2.0782, "step": 36416 }, { "epoch": 2.7377110694183866, "grad_norm": 0.38888657093048096, "learning_rate": 0.002828, "loss": 2.0725, "step": 36480 }, { "epoch": 2.742514071294559, "grad_norm": 0.4416655898094177, "learning_rate": 0.002828, "loss": 2.0659, "step": 36544 }, { "epoch": 2.7473170731707315, "grad_norm": 0.4418342113494873, "learning_rate": 0.002828, "loss": 2.0783, "step": 36608 }, { "epoch": 2.7521200750469044, "grad_norm": 0.40704232454299927, "learning_rate": 0.002828, "loss": 2.0693, "step": 36672 }, { "epoch": 2.756923076923077, "grad_norm": 0.3956950008869171, "learning_rate": 0.002828, "loss": 2.0622, "step": 36736 }, { "epoch": 2.7617260787992497, "grad_norm": 0.43156442046165466, "learning_rate": 0.002828, "loss": 2.0664, "step": 36800 }, { "epoch": 2.766529080675422, "grad_norm": 0.3915492296218872, "learning_rate": 0.002828, "loss": 2.0751, "step": 36864 }, { "epoch": 2.771332082551595, "grad_norm": 0.39085447788238525, "learning_rate": 0.002828, "loss": 2.0688, "step": 36928 }, { "epoch": 2.7761350844277675, "grad_norm": 0.39727815985679626, "learning_rate": 0.002828, "loss": 2.0623, "step": 36992 }, { "epoch": 2.78093808630394, "grad_norm": 0.36193764209747314, "learning_rate": 0.002828, "loss": 2.0653, "step": 37056 }, { "epoch": 2.785741088180113, "grad_norm": 0.39149948954582214, "learning_rate": 0.002828, "loss": 2.066, "step": 37120 }, { "epoch": 2.7905440900562852, "grad_norm": 0.3922441601753235, "learning_rate": 0.002828, "loss": 2.0651, "step": 37184 }, { "epoch": 2.7953470919324577, "grad_norm": 0.40434616804122925, "learning_rate": 0.002828, "loss": 2.0569, "step": 37248 }, { "epoch": 2.8001500938086306, "grad_norm": 0.4445114731788635, "learning_rate": 0.002828, "loss": 2.0641, "step": 37312 }, { "epoch": 2.804953095684803, "grad_norm": 0.4767908751964569, "learning_rate": 0.002828, "loss": 2.0639, "step": 37376 }, { "epoch": 2.8097560975609754, "grad_norm": 0.49127042293548584, "learning_rate": 0.002828, "loss": 2.0545, "step": 37440 }, { "epoch": 2.8145590994371483, "grad_norm": 0.4333418607711792, "learning_rate": 0.002828, "loss": 2.0632, "step": 37504 }, { "epoch": 2.8193621013133208, "grad_norm": 0.4018191993236542, "learning_rate": 0.002828, "loss": 2.0601, "step": 37568 }, { "epoch": 2.824165103189493, "grad_norm": 0.4417150318622589, "learning_rate": 0.002828, "loss": 2.0596, "step": 37632 }, { "epoch": 2.828968105065666, "grad_norm": 0.38089001178741455, "learning_rate": 0.002828, "loss": 2.0592, "step": 37696 }, { "epoch": 2.8337711069418385, "grad_norm": 0.47056955099105835, "learning_rate": 0.002828, "loss": 2.0534, "step": 37760 }, { "epoch": 2.8385741088180114, "grad_norm": 0.3839799165725708, "learning_rate": 0.002828, "loss": 2.0564, "step": 37824 }, { "epoch": 2.843377110694184, "grad_norm": 0.4095609784126282, "learning_rate": 0.002828, "loss": 2.0529, "step": 37888 }, { "epoch": 2.8481801125703567, "grad_norm": 0.435360848903656, "learning_rate": 0.002828, "loss": 2.0605, "step": 37952 }, { "epoch": 2.852983114446529, "grad_norm": 0.40065625309944153, "learning_rate": 0.002828, "loss": 2.0606, "step": 38016 }, { "epoch": 2.8577861163227016, "grad_norm": 0.42678216099739075, "learning_rate": 0.002828, "loss": 2.0544, "step": 38080 }, { "epoch": 2.8625891181988745, "grad_norm": 0.46134546399116516, "learning_rate": 0.002828, "loss": 2.0529, "step": 38144 }, { "epoch": 2.867392120075047, "grad_norm": 0.42689305543899536, "learning_rate": 0.002828, "loss": 2.0568, "step": 38208 }, { "epoch": 2.8721951219512194, "grad_norm": 0.4389091730117798, "learning_rate": 0.002828, "loss": 2.0466, "step": 38272 }, { "epoch": 2.8769981238273923, "grad_norm": 0.416259229183197, "learning_rate": 0.002828, "loss": 2.0555, "step": 38336 }, { "epoch": 2.8818011257035647, "grad_norm": 0.3812130093574524, "learning_rate": 0.002828, "loss": 2.0505, "step": 38400 }, { "epoch": 2.886604127579737, "grad_norm": 0.41849854588508606, "learning_rate": 0.002828, "loss": 2.0533, "step": 38464 }, { "epoch": 2.89140712945591, "grad_norm": 0.38751932978630066, "learning_rate": 0.002828, "loss": 2.047, "step": 38528 }, { "epoch": 2.8962101313320825, "grad_norm": 0.4492979347705841, "learning_rate": 0.002828, "loss": 2.0477, "step": 38592 }, { "epoch": 2.901013133208255, "grad_norm": 0.4301586449146271, "learning_rate": 0.002828, "loss": 2.0517, "step": 38656 }, { "epoch": 2.905816135084428, "grad_norm": 0.42884722352027893, "learning_rate": 0.002828, "loss": 2.0513, "step": 38720 }, { "epoch": 2.9106191369606003, "grad_norm": 0.43645617365837097, "learning_rate": 0.002828, "loss": 2.0469, "step": 38784 }, { "epoch": 2.915422138836773, "grad_norm": 0.41132786870002747, "learning_rate": 0.002828, "loss": 2.0514, "step": 38848 }, { "epoch": 2.9202251407129456, "grad_norm": 0.40186938643455505, "learning_rate": 0.002828, "loss": 2.0473, "step": 38912 }, { "epoch": 2.9250281425891185, "grad_norm": 0.43834152817726135, "learning_rate": 0.002828, "loss": 2.0431, "step": 38976 }, { "epoch": 2.929831144465291, "grad_norm": 0.42384204268455505, "learning_rate": 0.002828, "loss": 2.0423, "step": 39040 }, { "epoch": 2.9346341463414634, "grad_norm": 0.41747942566871643, "learning_rate": 0.002828, "loss": 2.0459, "step": 39104 }, { "epoch": 2.9394371482176362, "grad_norm": 0.4123360514640808, "learning_rate": 0.002828, "loss": 2.0454, "step": 39168 }, { "epoch": 2.9442401500938087, "grad_norm": 0.3995719254016876, "learning_rate": 0.002828, "loss": 2.0422, "step": 39232 }, { "epoch": 2.949043151969981, "grad_norm": 0.38762396574020386, "learning_rate": 0.002828, "loss": 2.0428, "step": 39296 }, { "epoch": 2.953846153846154, "grad_norm": 0.4130190312862396, "learning_rate": 0.002828, "loss": 2.0383, "step": 39360 }, { "epoch": 2.9586491557223265, "grad_norm": 0.401840478181839, "learning_rate": 0.002828, "loss": 2.037, "step": 39424 }, { "epoch": 2.963452157598499, "grad_norm": 0.44890210032463074, "learning_rate": 0.002828, "loss": 2.0477, "step": 39488 }, { "epoch": 2.968255159474672, "grad_norm": 0.4392335116863251, "learning_rate": 0.002828, "loss": 2.0329, "step": 39552 }, { "epoch": 2.973058161350844, "grad_norm": 0.37307026982307434, "learning_rate": 0.002828, "loss": 2.0361, "step": 39616 }, { "epoch": 2.9778611632270167, "grad_norm": 0.3836345970630646, "learning_rate": 0.002828, "loss": 2.0389, "step": 39680 }, { "epoch": 2.9826641651031895, "grad_norm": 0.43940141797065735, "learning_rate": 0.002828, "loss": 2.0424, "step": 39744 }, { "epoch": 2.987467166979362, "grad_norm": 0.4530318081378937, "learning_rate": 0.002828, "loss": 2.0361, "step": 39808 }, { "epoch": 2.992270168855535, "grad_norm": 0.4106103777885437, "learning_rate": 0.002828, "loss": 2.0349, "step": 39872 }, { "epoch": 2.9970731707317073, "grad_norm": 0.3956582546234131, "learning_rate": 0.002828, "loss": 2.0371, "step": 39936 }, { "epoch": 3.0018761726078798, "grad_norm": 0.5075650811195374, "learning_rate": 0.002828, "loss": 2.0121, "step": 40000 }, { "epoch": 3.0066791744840526, "grad_norm": 0.39176109433174133, "learning_rate": 0.002828, "loss": 1.9939, "step": 40064 }, { "epoch": 3.011482176360225, "grad_norm": 0.40185442566871643, "learning_rate": 0.002828, "loss": 1.9972, "step": 40128 }, { "epoch": 3.016285178236398, "grad_norm": 0.41981250047683716, "learning_rate": 0.002828, "loss": 1.9962, "step": 40192 }, { "epoch": 3.0210881801125704, "grad_norm": 0.42668989300727844, "learning_rate": 0.002828, "loss": 1.9907, "step": 40256 }, { "epoch": 3.025891181988743, "grad_norm": 0.42785272002220154, "learning_rate": 0.002828, "loss": 1.9959, "step": 40320 }, { "epoch": 3.0306941838649157, "grad_norm": 0.4478468596935272, "learning_rate": 0.002828, "loss": 1.9921, "step": 40384 }, { "epoch": 3.035497185741088, "grad_norm": 0.4365384578704834, "learning_rate": 0.002828, "loss": 1.9849, "step": 40448 }, { "epoch": 3.0403001876172606, "grad_norm": 0.36439043283462524, "learning_rate": 0.002828, "loss": 1.9975, "step": 40512 }, { "epoch": 3.0451031894934335, "grad_norm": 0.38191044330596924, "learning_rate": 0.002828, "loss": 1.9898, "step": 40576 }, { "epoch": 3.049906191369606, "grad_norm": 0.403347909450531, "learning_rate": 0.002828, "loss": 1.9979, "step": 40640 }, { "epoch": 3.0547091932457784, "grad_norm": 0.40450242161750793, "learning_rate": 0.002828, "loss": 2.0019, "step": 40704 }, { "epoch": 3.0595121951219513, "grad_norm": 0.5009217858314514, "learning_rate": 0.002828, "loss": 1.9905, "step": 40768 }, { "epoch": 3.0643151969981237, "grad_norm": 0.4922749400138855, "learning_rate": 0.002828, "loss": 1.9924, "step": 40832 }, { "epoch": 3.0691181988742966, "grad_norm": 0.39887261390686035, "learning_rate": 0.002828, "loss": 1.9911, "step": 40896 }, { "epoch": 3.073921200750469, "grad_norm": 0.5205056071281433, "learning_rate": 0.002828, "loss": 1.9878, "step": 40960 }, { "epoch": 3.0787242026266415, "grad_norm": 0.41092032194137573, "learning_rate": 0.002828, "loss": 1.9867, "step": 41024 }, { "epoch": 3.0835272045028144, "grad_norm": 0.41716116666793823, "learning_rate": 0.002828, "loss": 1.9856, "step": 41088 }, { "epoch": 3.088330206378987, "grad_norm": 0.44289645552635193, "learning_rate": 0.002828, "loss": 1.986, "step": 41152 }, { "epoch": 3.0931332082551597, "grad_norm": 0.3966487944126129, "learning_rate": 0.002828, "loss": 1.99, "step": 41216 }, { "epoch": 3.097936210131332, "grad_norm": 0.4425393044948578, "learning_rate": 0.002828, "loss": 1.9896, "step": 41280 }, { "epoch": 3.1027392120075046, "grad_norm": 0.40042001008987427, "learning_rate": 0.002828, "loss": 1.9902, "step": 41344 }, { "epoch": 3.1075422138836775, "grad_norm": 0.3969666361808777, "learning_rate": 0.002828, "loss": 1.989, "step": 41408 }, { "epoch": 3.11234521575985, "grad_norm": 0.41544854640960693, "learning_rate": 0.002828, "loss": 1.9933, "step": 41472 }, { "epoch": 3.1171482176360223, "grad_norm": 0.436469167470932, "learning_rate": 0.002828, "loss": 1.9848, "step": 41536 }, { "epoch": 3.1219512195121952, "grad_norm": 0.39815834164619446, "learning_rate": 0.002828, "loss": 1.9866, "step": 41600 }, { "epoch": 3.1267542213883677, "grad_norm": 0.39662250876426697, "learning_rate": 0.002828, "loss": 1.985, "step": 41664 }, { "epoch": 3.13155722326454, "grad_norm": 0.4261801838874817, "learning_rate": 0.002828, "loss": 1.991, "step": 41728 }, { "epoch": 3.136360225140713, "grad_norm": 0.4388674199581146, "learning_rate": 0.002828, "loss": 1.9894, "step": 41792 }, { "epoch": 3.1411632270168854, "grad_norm": 0.4637046456336975, "learning_rate": 0.002828, "loss": 1.9897, "step": 41856 }, { "epoch": 3.1459662288930583, "grad_norm": 0.49249354004859924, "learning_rate": 0.002828, "loss": 1.985, "step": 41920 }, { "epoch": 3.1507692307692308, "grad_norm": 0.40578174591064453, "learning_rate": 0.002828, "loss": 1.9785, "step": 41984 }, { "epoch": 3.155572232645403, "grad_norm": 0.46401965618133545, "learning_rate": 0.002828, "loss": 1.9894, "step": 42048 }, { "epoch": 3.160375234521576, "grad_norm": 0.4829576909542084, "learning_rate": 0.002828, "loss": 1.9867, "step": 42112 }, { "epoch": 3.1651782363977485, "grad_norm": 0.4457123875617981, "learning_rate": 0.002828, "loss": 1.9877, "step": 42176 }, { "epoch": 3.1699812382739214, "grad_norm": 0.44146353006362915, "learning_rate": 0.002828, "loss": 1.9919, "step": 42240 }, { "epoch": 3.174784240150094, "grad_norm": 0.4063395857810974, "learning_rate": 0.002828, "loss": 1.9886, "step": 42304 }, { "epoch": 3.1795872420262663, "grad_norm": 0.40236446261405945, "learning_rate": 0.002828, "loss": 1.9808, "step": 42368 }, { "epoch": 3.184390243902439, "grad_norm": 0.41492629051208496, "learning_rate": 0.002828, "loss": 1.9826, "step": 42432 }, { "epoch": 3.1891932457786116, "grad_norm": 0.39708060026168823, "learning_rate": 0.002828, "loss": 1.9853, "step": 42496 }, { "epoch": 3.193996247654784, "grad_norm": 0.427532821893692, "learning_rate": 0.002828, "loss": 1.9845, "step": 42560 }, { "epoch": 3.198799249530957, "grad_norm": 0.48030269145965576, "learning_rate": 0.002828, "loss": 1.9801, "step": 42624 }, { "epoch": 3.2036022514071294, "grad_norm": 0.4397580027580261, "learning_rate": 0.002828, "loss": 1.982, "step": 42688 }, { "epoch": 3.208405253283302, "grad_norm": 0.41475823521614075, "learning_rate": 0.002828, "loss": 1.9828, "step": 42752 }, { "epoch": 3.2132082551594747, "grad_norm": 0.4905751049518585, "learning_rate": 0.002828, "loss": 1.9821, "step": 42816 }, { "epoch": 3.218011257035647, "grad_norm": 0.46179690957069397, "learning_rate": 0.002828, "loss": 1.9842, "step": 42880 }, { "epoch": 3.22281425891182, "grad_norm": 0.38780882954597473, "learning_rate": 0.002828, "loss": 1.9819, "step": 42944 }, { "epoch": 3.2276172607879925, "grad_norm": 0.48357096314430237, "learning_rate": 0.002828, "loss": 1.9851, "step": 43008 }, { "epoch": 3.232420262664165, "grad_norm": 0.4514494836330414, "learning_rate": 0.002828, "loss": 1.9788, "step": 43072 }, { "epoch": 3.237223264540338, "grad_norm": 0.3997925817966461, "learning_rate": 0.002828, "loss": 1.9815, "step": 43136 }, { "epoch": 3.2420262664165103, "grad_norm": 0.47594690322875977, "learning_rate": 0.002828, "loss": 1.9809, "step": 43200 }, { "epoch": 3.246829268292683, "grad_norm": 0.41271838545799255, "learning_rate": 0.002828, "loss": 1.9821, "step": 43264 }, { "epoch": 3.2516322701688556, "grad_norm": 0.3931159973144531, "learning_rate": 0.002828, "loss": 1.9772, "step": 43328 }, { "epoch": 3.256435272045028, "grad_norm": 0.4065423011779785, "learning_rate": 0.002828, "loss": 1.9776, "step": 43392 }, { "epoch": 3.261238273921201, "grad_norm": 0.4076635241508484, "learning_rate": 0.002828, "loss": 1.9906, "step": 43456 }, { "epoch": 3.2660412757973734, "grad_norm": 0.39124780893325806, "learning_rate": 0.002828, "loss": 1.986, "step": 43520 }, { "epoch": 3.270844277673546, "grad_norm": 0.40072616934776306, "learning_rate": 0.002828, "loss": 1.978, "step": 43584 }, { "epoch": 3.2756472795497187, "grad_norm": 0.41515403985977173, "learning_rate": 0.002828, "loss": 1.9816, "step": 43648 }, { "epoch": 3.280450281425891, "grad_norm": 0.3668558895587921, "learning_rate": 0.002828, "loss": 1.9737, "step": 43712 }, { "epoch": 3.2852532833020636, "grad_norm": 0.4493162930011749, "learning_rate": 0.002828, "loss": 1.9814, "step": 43776 }, { "epoch": 3.2900562851782365, "grad_norm": 0.38466036319732666, "learning_rate": 0.002828, "loss": 1.9762, "step": 43840 }, { "epoch": 3.294859287054409, "grad_norm": 0.5075457692146301, "learning_rate": 0.002828, "loss": 1.9858, "step": 43904 }, { "epoch": 3.2996622889305818, "grad_norm": 0.5009188055992126, "learning_rate": 0.002828, "loss": 1.9826, "step": 43968 }, { "epoch": 3.304465290806754, "grad_norm": 0.45684710144996643, "learning_rate": 0.002828, "loss": 1.9658, "step": 44032 }, { "epoch": 3.3092682926829267, "grad_norm": 0.4678841531276703, "learning_rate": 0.002828, "loss": 1.9745, "step": 44096 }, { "epoch": 3.3140712945590995, "grad_norm": 0.44828855991363525, "learning_rate": 0.002828, "loss": 1.9794, "step": 44160 }, { "epoch": 3.318874296435272, "grad_norm": 0.4242972731590271, "learning_rate": 0.002828, "loss": 1.977, "step": 44224 }, { "epoch": 3.323677298311445, "grad_norm": 0.4554446339607239, "learning_rate": 0.002828, "loss": 1.9722, "step": 44288 }, { "epoch": 3.3284803001876173, "grad_norm": 0.3926396667957306, "learning_rate": 0.002828, "loss": 1.9777, "step": 44352 }, { "epoch": 3.3332833020637898, "grad_norm": 0.4520684778690338, "learning_rate": 0.002828, "loss": 1.9738, "step": 44416 }, { "epoch": 3.3380863039399626, "grad_norm": 0.3810643255710602, "learning_rate": 0.002828, "loss": 1.9692, "step": 44480 }, { "epoch": 3.342889305816135, "grad_norm": 0.3734522759914398, "learning_rate": 0.002828, "loss": 1.9693, "step": 44544 }, { "epoch": 3.3476923076923075, "grad_norm": 0.43206340074539185, "learning_rate": 0.002828, "loss": 1.9667, "step": 44608 }, { "epoch": 3.3524953095684804, "grad_norm": 0.4002898335456848, "learning_rate": 0.002828, "loss": 1.9713, "step": 44672 }, { "epoch": 3.357298311444653, "grad_norm": 0.40499213337898254, "learning_rate": 0.002828, "loss": 1.9679, "step": 44736 }, { "epoch": 3.3621013133208253, "grad_norm": 0.41439104080200195, "learning_rate": 0.002828, "loss": 1.9746, "step": 44800 }, { "epoch": 3.366904315196998, "grad_norm": 0.3716445565223694, "learning_rate": 0.002828, "loss": 1.977, "step": 44864 }, { "epoch": 3.3717073170731706, "grad_norm": 0.40978100895881653, "learning_rate": 0.002828, "loss": 1.9733, "step": 44928 }, { "epoch": 3.3765103189493435, "grad_norm": 0.4042293131351471, "learning_rate": 0.002828, "loss": 1.964, "step": 44992 }, { "epoch": 3.381313320825516, "grad_norm": 0.3775876462459564, "learning_rate": 0.002828, "loss": 1.9592, "step": 45056 }, { "epoch": 3.3861163227016884, "grad_norm": 0.40276941657066345, "learning_rate": 0.002828, "loss": 1.9675, "step": 45120 }, { "epoch": 3.3909193245778613, "grad_norm": 0.43238821625709534, "learning_rate": 0.002828, "loss": 1.9659, "step": 45184 }, { "epoch": 3.3957223264540337, "grad_norm": 0.43237367272377014, "learning_rate": 0.002828, "loss": 1.9658, "step": 45248 }, { "epoch": 3.4005253283302066, "grad_norm": 0.4257899522781372, "learning_rate": 0.002828, "loss": 1.9661, "step": 45312 }, { "epoch": 3.405328330206379, "grad_norm": 0.4422502815723419, "learning_rate": 0.002828, "loss": 1.9674, "step": 45376 }, { "epoch": 3.4101313320825515, "grad_norm": 0.49216824769973755, "learning_rate": 0.002828, "loss": 1.9677, "step": 45440 }, { "epoch": 3.4149343339587244, "grad_norm": 0.43938690423965454, "learning_rate": 0.002828, "loss": 1.964, "step": 45504 }, { "epoch": 3.419737335834897, "grad_norm": 0.4162237346172333, "learning_rate": 0.002828, "loss": 1.9675, "step": 45568 }, { "epoch": 3.4245403377110692, "grad_norm": 0.37918907403945923, "learning_rate": 0.002828, "loss": 1.9639, "step": 45632 }, { "epoch": 3.429343339587242, "grad_norm": 0.3653721809387207, "learning_rate": 0.002828, "loss": 1.9695, "step": 45696 }, { "epoch": 3.4341463414634146, "grad_norm": 0.4152611494064331, "learning_rate": 0.002828, "loss": 1.9628, "step": 45760 }, { "epoch": 3.438949343339587, "grad_norm": 0.42145082354545593, "learning_rate": 0.002828, "loss": 1.9626, "step": 45824 }, { "epoch": 3.44375234521576, "grad_norm": 0.43907630443573, "learning_rate": 0.002828, "loss": 1.9604, "step": 45888 }, { "epoch": 3.4485553470919323, "grad_norm": 0.4232199788093567, "learning_rate": 0.002828, "loss": 1.9663, "step": 45952 }, { "epoch": 3.4533583489681052, "grad_norm": 0.4069931209087372, "learning_rate": 0.002828, "loss": 1.9663, "step": 46016 }, { "epoch": 3.4581613508442777, "grad_norm": 0.42427578568458557, "learning_rate": 0.002828, "loss": 1.9562, "step": 46080 }, { "epoch": 3.46296435272045, "grad_norm": 0.417915016412735, "learning_rate": 0.002828, "loss": 1.9594, "step": 46144 }, { "epoch": 3.467767354596623, "grad_norm": 0.46104422211647034, "learning_rate": 0.002828, "loss": 1.9634, "step": 46208 }, { "epoch": 3.4725703564727954, "grad_norm": 0.5542020201683044, "learning_rate": 0.002828, "loss": 1.9643, "step": 46272 }, { "epoch": 3.4773733583489683, "grad_norm": 0.5005798935890198, "learning_rate": 0.002828, "loss": 1.9575, "step": 46336 }, { "epoch": 3.4821763602251408, "grad_norm": 0.41317811608314514, "learning_rate": 0.002828, "loss": 1.9564, "step": 46400 }, { "epoch": 3.486979362101313, "grad_norm": 0.4168444275856018, "learning_rate": 0.002828, "loss": 1.9619, "step": 46464 }, { "epoch": 3.491782363977486, "grad_norm": 0.4567578434944153, "learning_rate": 0.002828, "loss": 1.9597, "step": 46528 }, { "epoch": 3.4965853658536585, "grad_norm": 0.4371911287307739, "learning_rate": 0.002828, "loss": 1.9596, "step": 46592 }, { "epoch": 3.501388367729831, "grad_norm": 0.38273531198501587, "learning_rate": 0.002828, "loss": 1.9554, "step": 46656 }, { "epoch": 3.506191369606004, "grad_norm": 0.4254724681377411, "learning_rate": 0.002828, "loss": 1.9556, "step": 46720 }, { "epoch": 3.5109943714821763, "grad_norm": 0.39332327246665955, "learning_rate": 0.002828, "loss": 1.9555, "step": 46784 }, { "epoch": 3.5157973733583487, "grad_norm": 0.3772275745868683, "learning_rate": 0.002828, "loss": 1.9573, "step": 46848 }, { "epoch": 3.5206003752345216, "grad_norm": 0.4225102663040161, "learning_rate": 0.002828, "loss": 1.9581, "step": 46912 }, { "epoch": 3.525403377110694, "grad_norm": 0.4391845762729645, "learning_rate": 0.002828, "loss": 1.958, "step": 46976 }, { "epoch": 3.530206378986867, "grad_norm": 0.43624913692474365, "learning_rate": 0.002828, "loss": 1.9596, "step": 47040 }, { "epoch": 3.5350093808630394, "grad_norm": 0.4952937364578247, "learning_rate": 0.002828, "loss": 1.9511, "step": 47104 }, { "epoch": 3.5398123827392123, "grad_norm": 0.4626399576663971, "learning_rate": 0.002828, "loss": 1.9537, "step": 47168 }, { "epoch": 3.5446153846153847, "grad_norm": 0.40251588821411133, "learning_rate": 0.002828, "loss": 1.9512, "step": 47232 }, { "epoch": 3.549418386491557, "grad_norm": 0.4524631202220917, "learning_rate": 0.002828, "loss": 1.953, "step": 47296 }, { "epoch": 3.55422138836773, "grad_norm": 0.3918924629688263, "learning_rate": 0.002828, "loss": 1.9534, "step": 47360 }, { "epoch": 3.5590243902439025, "grad_norm": 0.3780909478664398, "learning_rate": 0.002828, "loss": 1.9525, "step": 47424 }, { "epoch": 3.563827392120075, "grad_norm": 0.47011178731918335, "learning_rate": 0.002828, "loss": 1.9526, "step": 47488 }, { "epoch": 3.568630393996248, "grad_norm": 0.5357784628868103, "learning_rate": 0.002828, "loss": 1.9562, "step": 47552 }, { "epoch": 3.5734333958724203, "grad_norm": 0.3771846890449524, "learning_rate": 0.002828, "loss": 1.9556, "step": 47616 }, { "epoch": 3.5782363977485927, "grad_norm": 0.44459056854248047, "learning_rate": 0.002828, "loss": 1.9459, "step": 47680 }, { "epoch": 3.5830393996247656, "grad_norm": 0.45114779472351074, "learning_rate": 0.002828, "loss": 1.9509, "step": 47744 }, { "epoch": 3.587842401500938, "grad_norm": 0.49684053659439087, "learning_rate": 0.002828, "loss": 1.9499, "step": 47808 }, { "epoch": 3.5926454033771105, "grad_norm": 0.5553718209266663, "learning_rate": 0.002828, "loss": 1.9526, "step": 47872 }, { "epoch": 3.5974484052532834, "grad_norm": 0.4265674650669098, "learning_rate": 0.002828, "loss": 1.9544, "step": 47936 }, { "epoch": 3.602251407129456, "grad_norm": 0.401682585477829, "learning_rate": 0.002828, "loss": 1.9458, "step": 48000 }, { "epoch": 3.6070544090056287, "grad_norm": 0.4206481873989105, "learning_rate": 0.002828, "loss": 1.9523, "step": 48064 }, { "epoch": 3.611857410881801, "grad_norm": 0.40604767203330994, "learning_rate": 0.002828, "loss": 1.9511, "step": 48128 }, { "epoch": 3.616660412757974, "grad_norm": 0.43780919909477234, "learning_rate": 0.002828, "loss": 1.9409, "step": 48192 }, { "epoch": 3.6214634146341464, "grad_norm": 0.46581652760505676, "learning_rate": 0.002828, "loss": 1.9493, "step": 48256 }, { "epoch": 3.626266416510319, "grad_norm": 0.4305538237094879, "learning_rate": 0.002828, "loss": 1.9489, "step": 48320 }, { "epoch": 3.6310694183864918, "grad_norm": 0.4123769998550415, "learning_rate": 0.002828, "loss": 1.9512, "step": 48384 }, { "epoch": 3.635872420262664, "grad_norm": 0.41718190908432007, "learning_rate": 0.002828, "loss": 1.9449, "step": 48448 }, { "epoch": 3.6406754221388367, "grad_norm": 0.4468398988246918, "learning_rate": 0.002828, "loss": 1.9395, "step": 48512 }, { "epoch": 3.6454784240150095, "grad_norm": 0.47508302330970764, "learning_rate": 0.002828, "loss": 1.9455, "step": 48576 }, { "epoch": 3.650281425891182, "grad_norm": 0.4177429676055908, "learning_rate": 0.002828, "loss": 1.9431, "step": 48640 }, { "epoch": 3.6550844277673544, "grad_norm": 0.44048476219177246, "learning_rate": 0.002828, "loss": 1.9477, "step": 48704 }, { "epoch": 3.6598874296435273, "grad_norm": 0.41455012559890747, "learning_rate": 0.002828, "loss": 1.9436, "step": 48768 }, { "epoch": 3.6646904315196998, "grad_norm": 0.39578694105148315, "learning_rate": 0.002828, "loss": 1.9437, "step": 48832 }, { "epoch": 3.669493433395872, "grad_norm": 0.45000991225242615, "learning_rate": 0.002828, "loss": 1.9433, "step": 48896 }, { "epoch": 3.674296435272045, "grad_norm": 0.43240562081336975, "learning_rate": 0.002828, "loss": 1.9445, "step": 48960 }, { "epoch": 3.6790994371482175, "grad_norm": 0.4270322322845459, "learning_rate": 0.002828, "loss": 1.9489, "step": 49024 }, { "epoch": 3.6839024390243904, "grad_norm": 0.3854438066482544, "learning_rate": 0.002828, "loss": 1.9316, "step": 49088 }, { "epoch": 3.688705440900563, "grad_norm": 0.4212285280227661, "learning_rate": 0.002828, "loss": 1.941, "step": 49152 }, { "epoch": 3.6935084427767357, "grad_norm": 0.38058602809906006, "learning_rate": 0.002828, "loss": 1.948, "step": 49216 }, { "epoch": 3.698311444652908, "grad_norm": 0.4383929669857025, "learning_rate": 0.002828, "loss": 1.9357, "step": 49280 }, { "epoch": 3.7031144465290806, "grad_norm": 0.42744216322898865, "learning_rate": 0.002828, "loss": 1.9379, "step": 49344 }, { "epoch": 3.7079174484052535, "grad_norm": 0.43913623690605164, "learning_rate": 0.002828, "loss": 1.94, "step": 49408 }, { "epoch": 3.712720450281426, "grad_norm": 0.3816799521446228, "learning_rate": 0.002828, "loss": 1.9438, "step": 49472 }, { "epoch": 3.7175234521575984, "grad_norm": 0.4266246259212494, "learning_rate": 0.002828, "loss": 1.9346, "step": 49536 }, { "epoch": 3.7223264540337713, "grad_norm": 0.4960864186286926, "learning_rate": 0.002828, "loss": 1.9402, "step": 49600 }, { "epoch": 3.7271294559099437, "grad_norm": 0.4896080493927002, "learning_rate": 0.002828, "loss": 1.935, "step": 49664 }, { "epoch": 3.731932457786116, "grad_norm": 0.4197131395339966, "learning_rate": 0.002828, "loss": 1.9363, "step": 49728 }, { "epoch": 3.736735459662289, "grad_norm": 0.4813455045223236, "learning_rate": 0.002828, "loss": 1.9419, "step": 49792 }, { "epoch": 3.7415384615384615, "grad_norm": 0.4002421796321869, "learning_rate": 0.002828, "loss": 1.941, "step": 49856 }, { "epoch": 3.746341463414634, "grad_norm": 0.37192049622535706, "learning_rate": 0.002828, "loss": 1.9327, "step": 49920 }, { "epoch": 3.751144465290807, "grad_norm": 0.39583730697631836, "learning_rate": 0.002828, "loss": 1.9378, "step": 49984 }, { "epoch": 3.7559474671669792, "grad_norm": 0.43674999475479126, "learning_rate": 0.002828, "loss": 1.9378, "step": 50048 }, { "epoch": 3.760750469043152, "grad_norm": 0.5882846117019653, "learning_rate": 0.002828, "loss": 1.9322, "step": 50112 }, { "epoch": 3.7655534709193246, "grad_norm": 0.448281466960907, "learning_rate": 0.002828, "loss": 1.9353, "step": 50176 }, { "epoch": 3.7703564727954975, "grad_norm": 0.4727672338485718, "learning_rate": 0.002828, "loss": 1.9298, "step": 50240 }, { "epoch": 3.77515947467167, "grad_norm": 0.4881625175476074, "learning_rate": 0.002828, "loss": 1.9377, "step": 50304 }, { "epoch": 3.7799624765478423, "grad_norm": 0.5209890007972717, "learning_rate": 0.002828, "loss": 1.933, "step": 50368 }, { "epoch": 3.7847654784240152, "grad_norm": 0.38057464361190796, "learning_rate": 0.002828, "loss": 1.9348, "step": 50432 }, { "epoch": 3.7895684803001877, "grad_norm": 0.4636854827404022, "learning_rate": 0.002828, "loss": 1.9339, "step": 50496 }, { "epoch": 3.79437148217636, "grad_norm": 0.48334306478500366, "learning_rate": 0.002828, "loss": 1.9333, "step": 50560 }, { "epoch": 3.799174484052533, "grad_norm": 0.3957020938396454, "learning_rate": 0.002828, "loss": 1.9392, "step": 50624 }, { "epoch": 3.8039774859287054, "grad_norm": 0.3829587996006012, "learning_rate": 0.002828, "loss": 1.9245, "step": 50688 }, { "epoch": 3.808780487804878, "grad_norm": 0.38526973128318787, "learning_rate": 0.002828, "loss": 1.9269, "step": 50752 }, { "epoch": 3.8135834896810508, "grad_norm": 0.3830317258834839, "learning_rate": 0.002828, "loss": 1.9258, "step": 50816 }, { "epoch": 3.818386491557223, "grad_norm": 0.39473289251327515, "learning_rate": 0.002828, "loss": 1.9269, "step": 50880 }, { "epoch": 3.8231894934333956, "grad_norm": 0.4700523614883423, "learning_rate": 0.002828, "loss": 1.925, "step": 50944 }, { "epoch": 3.8279924953095685, "grad_norm": 0.42843738198280334, "learning_rate": 0.002828, "loss": 1.9293, "step": 51008 }, { "epoch": 3.832795497185741, "grad_norm": 0.47291210293769836, "learning_rate": 0.002828, "loss": 1.9304, "step": 51072 }, { "epoch": 3.837598499061914, "grad_norm": 0.46181681752204895, "learning_rate": 0.002828, "loss": 1.9339, "step": 51136 }, { "epoch": 3.8424015009380863, "grad_norm": 0.4250589609146118, "learning_rate": 0.002828, "loss": 1.9271, "step": 51200 }, { "epoch": 3.847204502814259, "grad_norm": 0.45093217492103577, "learning_rate": 0.002828, "loss": 1.9299, "step": 51264 }, { "epoch": 3.8520075046904316, "grad_norm": 0.40182143449783325, "learning_rate": 0.002828, "loss": 1.9321, "step": 51328 }, { "epoch": 3.856810506566604, "grad_norm": 0.43783003091812134, "learning_rate": 0.002828, "loss": 1.9267, "step": 51392 }, { "epoch": 3.861613508442777, "grad_norm": 0.3968484103679657, "learning_rate": 0.002828, "loss": 1.9245, "step": 51456 }, { "epoch": 3.8664165103189494, "grad_norm": 0.4145183563232422, "learning_rate": 0.002828, "loss": 1.9171, "step": 51520 }, { "epoch": 3.871219512195122, "grad_norm": 0.42234665155410767, "learning_rate": 0.002828, "loss": 1.9269, "step": 51584 }, { "epoch": 3.8760225140712947, "grad_norm": 0.414061963558197, "learning_rate": 0.002828, "loss": 1.931, "step": 51648 }, { "epoch": 3.880825515947467, "grad_norm": 0.36635687947273254, "learning_rate": 0.002828, "loss": 1.9259, "step": 51712 }, { "epoch": 3.8856285178236396, "grad_norm": 0.37826499342918396, "learning_rate": 0.002828, "loss": 1.9234, "step": 51776 }, { "epoch": 3.8904315196998125, "grad_norm": 0.39356598258018494, "learning_rate": 0.002828, "loss": 1.925, "step": 51840 }, { "epoch": 3.895234521575985, "grad_norm": 0.49679431319236755, "learning_rate": 0.002828, "loss": 1.9167, "step": 51904 }, { "epoch": 3.9000375234521574, "grad_norm": 0.4129243493080139, "learning_rate": 0.002828, "loss": 1.9151, "step": 51968 }, { "epoch": 3.9048405253283303, "grad_norm": 0.4656813442707062, "learning_rate": 0.002828, "loss": 1.9203, "step": 52032 }, { "epoch": 3.9096435272045027, "grad_norm": 0.4859287440776825, "learning_rate": 0.002828, "loss": 1.9345, "step": 52096 }, { "epoch": 3.9144465290806756, "grad_norm": 0.4260033965110779, "learning_rate": 0.002828, "loss": 1.9259, "step": 52160 }, { "epoch": 3.919249530956848, "grad_norm": 0.4332883059978485, "learning_rate": 0.002828, "loss": 1.9214, "step": 52224 }, { "epoch": 3.924052532833021, "grad_norm": 0.418282687664032, "learning_rate": 0.002828, "loss": 1.9226, "step": 52288 }, { "epoch": 3.9288555347091934, "grad_norm": 0.38305866718292236, "learning_rate": 0.002828, "loss": 1.918, "step": 52352 }, { "epoch": 3.933658536585366, "grad_norm": 0.4469663202762604, "learning_rate": 0.002828, "loss": 1.9224, "step": 52416 }, { "epoch": 3.9384615384615387, "grad_norm": 0.4243173599243164, "learning_rate": 0.002828, "loss": 1.9261, "step": 52480 }, { "epoch": 3.943264540337711, "grad_norm": 0.4377906918525696, "learning_rate": 0.002828, "loss": 1.9203, "step": 52544 }, { "epoch": 3.9480675422138836, "grad_norm": 0.4146629571914673, "learning_rate": 0.002828, "loss": 1.9228, "step": 52608 }, { "epoch": 3.9528705440900564, "grad_norm": 0.41136953234672546, "learning_rate": 0.002828, "loss": 1.9203, "step": 52672 }, { "epoch": 3.957673545966229, "grad_norm": 0.39297160506248474, "learning_rate": 0.002828, "loss": 1.9237, "step": 52736 }, { "epoch": 3.9624765478424013, "grad_norm": 0.39389610290527344, "learning_rate": 0.002828, "loss": 1.9248, "step": 52800 }, { "epoch": 3.967279549718574, "grad_norm": 0.45044317841529846, "learning_rate": 0.002828, "loss": 1.9186, "step": 52864 }, { "epoch": 3.9720825515947467, "grad_norm": 0.4556477665901184, "learning_rate": 0.002828, "loss": 1.9136, "step": 52928 }, { "epoch": 3.976885553470919, "grad_norm": 0.41699573397636414, "learning_rate": 0.002828, "loss": 1.916, "step": 52992 }, { "epoch": 3.981688555347092, "grad_norm": 0.45948272943496704, "learning_rate": 0.002828, "loss": 1.9189, "step": 53056 }, { "epoch": 3.9864915572232644, "grad_norm": 0.3985957205295563, "learning_rate": 0.002828, "loss": 1.9218, "step": 53120 }, { "epoch": 3.9912945590994373, "grad_norm": 0.3850821852684021, "learning_rate": 0.002828, "loss": 1.9249, "step": 53184 }, { "epoch": 3.9960975609756098, "grad_norm": 0.4339929521083832, "learning_rate": 0.002828, "loss": 1.9169, "step": 53248 }, { "epoch": 4.000900562851783, "grad_norm": 0.42089322209358215, "learning_rate": 0.002828, "loss": 1.9067, "step": 53312 }, { "epoch": 4.005703564727955, "grad_norm": 0.4298423230648041, "learning_rate": 0.002828, "loss": 1.8761, "step": 53376 }, { "epoch": 4.0105065666041275, "grad_norm": 0.46022525429725647, "learning_rate": 0.002828, "loss": 1.8828, "step": 53440 }, { "epoch": 4.0153095684803, "grad_norm": 0.39294660091400146, "learning_rate": 0.002828, "loss": 1.8795, "step": 53504 }, { "epoch": 4.020112570356472, "grad_norm": 0.4576168656349182, "learning_rate": 0.002828, "loss": 1.8856, "step": 53568 }, { "epoch": 4.024915572232645, "grad_norm": 0.47902917861938477, "learning_rate": 0.002828, "loss": 1.8745, "step": 53632 }, { "epoch": 4.029718574108818, "grad_norm": 0.4290916621685028, "learning_rate": 0.002828, "loss": 1.8673, "step": 53696 }, { "epoch": 4.034521575984991, "grad_norm": 0.508795976638794, "learning_rate": 0.002828, "loss": 1.8712, "step": 53760 }, { "epoch": 4.039324577861163, "grad_norm": 0.4904632866382599, "learning_rate": 0.002828, "loss": 1.8711, "step": 53824 }, { "epoch": 4.044127579737336, "grad_norm": 0.4192737936973572, "learning_rate": 0.002828, "loss": 1.8804, "step": 53888 }, { "epoch": 4.048930581613509, "grad_norm": 0.4394719898700714, "learning_rate": 0.002828, "loss": 1.8748, "step": 53952 }, { "epoch": 4.053733583489681, "grad_norm": 0.4148879051208496, "learning_rate": 0.002828, "loss": 1.8713, "step": 54016 }, { "epoch": 4.058536585365854, "grad_norm": 0.4630700349807739, "learning_rate": 0.002828, "loss": 1.8691, "step": 54080 }, { "epoch": 4.063339587242027, "grad_norm": 0.43519315123558044, "learning_rate": 0.002828, "loss": 1.8795, "step": 54144 }, { "epoch": 4.068142589118199, "grad_norm": 0.49164682626724243, "learning_rate": 0.002828, "loss": 1.8829, "step": 54208 }, { "epoch": 4.0729455909943715, "grad_norm": 0.4109393358230591, "learning_rate": 0.002828, "loss": 1.8735, "step": 54272 }, { "epoch": 4.077748592870544, "grad_norm": 0.5678685903549194, "learning_rate": 0.002828, "loss": 1.8681, "step": 54336 }, { "epoch": 4.082551594746716, "grad_norm": 0.461561918258667, "learning_rate": 0.002828, "loss": 1.8678, "step": 54400 }, { "epoch": 4.087354596622889, "grad_norm": 0.7234866619110107, "learning_rate": 0.002828, "loss": 1.8763, "step": 54464 }, { "epoch": 4.092157598499062, "grad_norm": 0.4788929522037506, "learning_rate": 0.002828, "loss": 1.8791, "step": 54528 }, { "epoch": 4.096960600375234, "grad_norm": 0.40153634548187256, "learning_rate": 0.002828, "loss": 1.8763, "step": 54592 }, { "epoch": 4.101763602251407, "grad_norm": 0.4675711691379547, "learning_rate": 0.002828, "loss": 1.8755, "step": 54656 }, { "epoch": 4.10656660412758, "grad_norm": 0.40922197699546814, "learning_rate": 0.002828, "loss": 1.8688, "step": 54720 }, { "epoch": 4.111369606003752, "grad_norm": 0.46092039346694946, "learning_rate": 0.002828, "loss": 1.8722, "step": 54784 }, { "epoch": 4.116172607879925, "grad_norm": 0.41091591119766235, "learning_rate": 0.002828, "loss": 1.8691, "step": 54848 }, { "epoch": 4.120975609756098, "grad_norm": 0.39091047644615173, "learning_rate": 0.002828, "loss": 1.8752, "step": 54912 }, { "epoch": 4.1257786116322706, "grad_norm": 0.379625141620636, "learning_rate": 0.002828, "loss": 1.8691, "step": 54976 }, { "epoch": 4.1305816135084426, "grad_norm": 0.44625696539878845, "learning_rate": 0.002828, "loss": 1.878, "step": 55040 }, { "epoch": 4.135384615384615, "grad_norm": 0.4220277965068817, "learning_rate": 0.002828, "loss": 1.8701, "step": 55104 }, { "epoch": 4.140187617260788, "grad_norm": 0.5710861682891846, "learning_rate": 0.002828, "loss": 1.874, "step": 55168 }, { "epoch": 4.14499061913696, "grad_norm": 0.39823463559150696, "learning_rate": 0.002828, "loss": 1.8695, "step": 55232 }, { "epoch": 4.149793621013133, "grad_norm": 0.38708966970443726, "learning_rate": 0.002828, "loss": 1.8693, "step": 55296 }, { "epoch": 4.154596622889306, "grad_norm": 0.4767194390296936, "learning_rate": 0.002828, "loss": 1.8725, "step": 55360 }, { "epoch": 4.159399624765478, "grad_norm": 0.42039304971694946, "learning_rate": 0.002828, "loss": 1.8705, "step": 55424 }, { "epoch": 4.164202626641651, "grad_norm": 0.4277998208999634, "learning_rate": 0.002828, "loss": 1.8793, "step": 55488 }, { "epoch": 4.169005628517824, "grad_norm": 0.41498157382011414, "learning_rate": 0.002828, "loss": 1.8763, "step": 55552 }, { "epoch": 4.173808630393996, "grad_norm": 0.6043418049812317, "learning_rate": 0.002828, "loss": 1.8711, "step": 55616 }, { "epoch": 4.178611632270169, "grad_norm": 0.4799351692199707, "learning_rate": 0.002828, "loss": 1.8747, "step": 55680 }, { "epoch": 4.183414634146342, "grad_norm": 0.40535420179367065, "learning_rate": 0.002828, "loss": 1.8722, "step": 55744 }, { "epoch": 4.1882176360225145, "grad_norm": 0.4478679597377777, "learning_rate": 0.002828, "loss": 1.8682, "step": 55808 }, { "epoch": 4.1930206378986865, "grad_norm": 0.4330839216709137, "learning_rate": 0.002828, "loss": 1.864, "step": 55872 }, { "epoch": 4.197823639774859, "grad_norm": 0.45498356223106384, "learning_rate": 0.002828, "loss": 1.8723, "step": 55936 }, { "epoch": 4.202626641651032, "grad_norm": 0.5293490290641785, "learning_rate": 0.002828, "loss": 1.8685, "step": 56000 }, { "epoch": 4.207429643527204, "grad_norm": 0.7796311974525452, "learning_rate": 0.002828, "loss": 1.8708, "step": 56064 }, { "epoch": 4.212232645403377, "grad_norm": 0.4882802963256836, "learning_rate": 0.002828, "loss": 1.8755, "step": 56128 }, { "epoch": 4.21703564727955, "grad_norm": 0.44128352403640747, "learning_rate": 0.002828, "loss": 1.8751, "step": 56192 }, { "epoch": 4.221838649155722, "grad_norm": 0.41939055919647217, "learning_rate": 0.002828, "loss": 1.8772, "step": 56256 }, { "epoch": 4.226641651031895, "grad_norm": 0.4133007228374481, "learning_rate": 0.002828, "loss": 1.8759, "step": 56320 }, { "epoch": 4.231444652908068, "grad_norm": 0.466492235660553, "learning_rate": 0.002828, "loss": 1.8758, "step": 56384 }, { "epoch": 4.23624765478424, "grad_norm": 0.43265125155448914, "learning_rate": 0.002828, "loss": 1.868, "step": 56448 }, { "epoch": 4.241050656660413, "grad_norm": 0.4278468191623688, "learning_rate": 0.002828, "loss": 1.8696, "step": 56512 }, { "epoch": 4.245853658536586, "grad_norm": 0.43418794870376587, "learning_rate": 0.002828, "loss": 1.8714, "step": 56576 }, { "epoch": 4.250656660412758, "grad_norm": 0.4910091459751129, "learning_rate": 0.002828, "loss": 1.8633, "step": 56640 }, { "epoch": 4.2554596622889305, "grad_norm": 0.7604688405990601, "learning_rate": 0.002828, "loss": 1.8744, "step": 56704 }, { "epoch": 4.260262664165103, "grad_norm": 0.44893890619277954, "learning_rate": 0.002828, "loss": 1.8705, "step": 56768 }, { "epoch": 4.265065666041275, "grad_norm": 0.4631795585155487, "learning_rate": 0.002828, "loss": 1.8638, "step": 56832 }, { "epoch": 4.269868667917448, "grad_norm": 0.4360625147819519, "learning_rate": 0.002828, "loss": 1.8621, "step": 56896 }, { "epoch": 4.274671669793621, "grad_norm": 0.40779635310173035, "learning_rate": 0.002828, "loss": 1.8536, "step": 56960 }, { "epoch": 4.279474671669794, "grad_norm": 0.4392165243625641, "learning_rate": 0.002828, "loss": 1.8534, "step": 57024 }, { "epoch": 4.284277673545966, "grad_norm": 0.49357831478118896, "learning_rate": 0.002828, "loss": 1.8607, "step": 57088 }, { "epoch": 4.289080675422139, "grad_norm": 0.4300871193408966, "learning_rate": 0.002828, "loss": 1.8587, "step": 57152 }, { "epoch": 4.293883677298312, "grad_norm": 0.4610048532485962, "learning_rate": 0.002828, "loss": 1.8677, "step": 57216 }, { "epoch": 4.298686679174484, "grad_norm": 0.4305998682975769, "learning_rate": 0.002828, "loss": 1.8646, "step": 57280 }, { "epoch": 4.303489681050657, "grad_norm": 0.4064194858074188, "learning_rate": 0.002828, "loss": 1.864, "step": 57344 }, { "epoch": 4.3082926829268295, "grad_norm": 0.3984262943267822, "learning_rate": 0.002828, "loss": 1.8621, "step": 57408 }, { "epoch": 4.3130956848030015, "grad_norm": 0.4508727192878723, "learning_rate": 0.002828, "loss": 1.8579, "step": 57472 }, { "epoch": 4.317898686679174, "grad_norm": 0.4348895251750946, "learning_rate": 0.002828, "loss": 1.8553, "step": 57536 }, { "epoch": 4.322701688555347, "grad_norm": 0.3835434317588806, "learning_rate": 0.002828, "loss": 1.8568, "step": 57600 }, { "epoch": 4.327504690431519, "grad_norm": 0.429548054933548, "learning_rate": 0.002828, "loss": 1.8619, "step": 57664 }, { "epoch": 4.332307692307692, "grad_norm": 0.43585842847824097, "learning_rate": 0.002828, "loss": 1.87, "step": 57728 }, { "epoch": 4.337110694183865, "grad_norm": 0.4420074224472046, "learning_rate": 0.002828, "loss": 1.858, "step": 57792 }, { "epoch": 4.341913696060038, "grad_norm": 0.4094654321670532, "learning_rate": 0.002828, "loss": 1.8586, "step": 57856 }, { "epoch": 4.34671669793621, "grad_norm": 0.40466663241386414, "learning_rate": 0.002828, "loss": 1.8472, "step": 57920 }, { "epoch": 4.351519699812383, "grad_norm": 0.4832182228565216, "learning_rate": 0.002828, "loss": 1.8575, "step": 57984 }, { "epoch": 4.356322701688556, "grad_norm": 0.4997383654117584, "learning_rate": 0.002828, "loss": 1.8655, "step": 58048 }, { "epoch": 4.361125703564728, "grad_norm": 0.4515133500099182, "learning_rate": 0.002828, "loss": 1.8639, "step": 58112 }, { "epoch": 4.365928705440901, "grad_norm": 0.4245718717575073, "learning_rate": 0.002828, "loss": 1.8611, "step": 58176 }, { "epoch": 4.3707317073170735, "grad_norm": 0.42741069197654724, "learning_rate": 0.002828, "loss": 1.8614, "step": 58240 }, { "epoch": 4.3755347091932455, "grad_norm": 0.49281513690948486, "learning_rate": 0.002828, "loss": 1.8527, "step": 58304 }, { "epoch": 4.380337711069418, "grad_norm": 0.4547692537307739, "learning_rate": 0.002828, "loss": 1.8618, "step": 58368 }, { "epoch": 4.385140712945591, "grad_norm": 0.4504334628582001, "learning_rate": 0.002828, "loss": 1.8611, "step": 58432 }, { "epoch": 4.389943714821763, "grad_norm": 0.39381441473960876, "learning_rate": 0.002828, "loss": 1.8503, "step": 58496 }, { "epoch": 4.394746716697936, "grad_norm": 0.60453861951828, "learning_rate": 0.002828, "loss": 1.8597, "step": 58560 }, { "epoch": 4.399549718574109, "grad_norm": 0.45758405327796936, "learning_rate": 0.002828, "loss": 1.8675, "step": 58624 }, { "epoch": 4.404352720450281, "grad_norm": 0.3902152478694916, "learning_rate": 0.002828, "loss": 1.8561, "step": 58688 }, { "epoch": 4.409155722326454, "grad_norm": 0.457916796207428, "learning_rate": 0.002828, "loss": 1.8589, "step": 58752 }, { "epoch": 4.413958724202627, "grad_norm": 0.424445241689682, "learning_rate": 0.002828, "loss": 1.8548, "step": 58816 }, { "epoch": 4.418761726078799, "grad_norm": 0.4457383453845978, "learning_rate": 0.002828, "loss": 1.8553, "step": 58880 }, { "epoch": 4.423564727954972, "grad_norm": 1.47513747215271, "learning_rate": 0.002828, "loss": 1.8599, "step": 58944 }, { "epoch": 4.428367729831145, "grad_norm": 0.40880438685417175, "learning_rate": 0.002828, "loss": 1.8554, "step": 59008 }, { "epoch": 4.4331707317073175, "grad_norm": 0.4844193756580353, "learning_rate": 0.002828, "loss": 1.8558, "step": 59072 }, { "epoch": 4.4379737335834895, "grad_norm": 0.3893410861492157, "learning_rate": 0.002828, "loss": 1.8615, "step": 59136 }, { "epoch": 4.442776735459662, "grad_norm": 0.43397217988967896, "learning_rate": 0.002828, "loss": 1.8617, "step": 59200 }, { "epoch": 4.447579737335835, "grad_norm": 0.40611037611961365, "learning_rate": 0.002828, "loss": 1.86, "step": 59264 }, { "epoch": 4.452382739212007, "grad_norm": 0.42537084221839905, "learning_rate": 0.002828, "loss": 1.8555, "step": 59328 }, { "epoch": 4.45718574108818, "grad_norm": 0.39853808283805847, "learning_rate": 0.002828, "loss": 1.8637, "step": 59392 }, { "epoch": 4.461988742964353, "grad_norm": 0.43848931789398193, "learning_rate": 0.002828, "loss": 1.8505, "step": 59456 }, { "epoch": 4.466791744840525, "grad_norm": 0.4788341522216797, "learning_rate": 0.002828, "loss": 1.8477, "step": 59520 }, { "epoch": 4.471594746716698, "grad_norm": 0.42831316590309143, "learning_rate": 0.002828, "loss": 1.8514, "step": 59584 }, { "epoch": 4.476397748592871, "grad_norm": 0.51185542345047, "learning_rate": 0.002828, "loss": 1.8575, "step": 59648 }, { "epoch": 4.481200750469043, "grad_norm": 0.4338521361351013, "learning_rate": 0.002828, "loss": 1.8546, "step": 59712 }, { "epoch": 4.486003752345216, "grad_norm": 0.40350961685180664, "learning_rate": 0.002828, "loss": 1.8528, "step": 59776 }, { "epoch": 4.4908067542213885, "grad_norm": 0.433065801858902, "learning_rate": 0.002828, "loss": 1.8479, "step": 59840 }, { "epoch": 4.495609756097561, "grad_norm": 0.44782695174217224, "learning_rate": 0.002828, "loss": 1.8568, "step": 59904 }, { "epoch": 4.500412757973733, "grad_norm": 0.4756898283958435, "learning_rate": 0.002828, "loss": 1.8544, "step": 59968 }, { "epoch": 4.505215759849906, "grad_norm": 0.3805321455001831, "learning_rate": 0.002828, "loss": 1.8506, "step": 60032 }, { "epoch": 4.510018761726079, "grad_norm": 0.4259619414806366, "learning_rate": 0.002828, "loss": 1.8482, "step": 60096 }, { "epoch": 4.514821763602251, "grad_norm": 0.4335617125034332, "learning_rate": 0.002828, "loss": 1.8547, "step": 60160 }, { "epoch": 4.519624765478424, "grad_norm": 0.39122387766838074, "learning_rate": 0.002828, "loss": 1.8506, "step": 60224 }, { "epoch": 4.524427767354597, "grad_norm": 0.4266170859336853, "learning_rate": 0.002828, "loss": 1.8502, "step": 60288 }, { "epoch": 4.529230769230769, "grad_norm": 0.439899206161499, "learning_rate": 0.002828, "loss": 1.8572, "step": 60352 }, { "epoch": 4.534033771106942, "grad_norm": 0.4882529675960541, "learning_rate": 0.002828, "loss": 1.8493, "step": 60416 }, { "epoch": 4.538836772983115, "grad_norm": 0.44590744376182556, "learning_rate": 0.002828, "loss": 1.8406, "step": 60480 }, { "epoch": 4.543639774859287, "grad_norm": 0.4067928194999695, "learning_rate": 0.002828, "loss": 1.851, "step": 60544 }, { "epoch": 4.54844277673546, "grad_norm": 0.4017028510570526, "learning_rate": 0.002828, "loss": 1.8473, "step": 60608 }, { "epoch": 4.5532457786116325, "grad_norm": 0.42601388692855835, "learning_rate": 0.002828, "loss": 1.8501, "step": 60672 }, { "epoch": 4.558048780487805, "grad_norm": 0.43437841534614563, "learning_rate": 0.002828, "loss": 1.847, "step": 60736 }, { "epoch": 4.562851782363977, "grad_norm": 0.42863306403160095, "learning_rate": 0.002828, "loss": 1.8462, "step": 60800 }, { "epoch": 4.56765478424015, "grad_norm": 0.43381252884864807, "learning_rate": 0.002828, "loss": 1.8448, "step": 60864 }, { "epoch": 4.572457786116322, "grad_norm": 0.46240073442459106, "learning_rate": 0.002828, "loss": 1.852, "step": 60928 }, { "epoch": 4.577260787992495, "grad_norm": 0.4556284546852112, "learning_rate": 0.002828, "loss": 1.8522, "step": 60992 }, { "epoch": 4.582063789868668, "grad_norm": 0.41591572761535645, "learning_rate": 0.002828, "loss": 1.8448, "step": 61056 }, { "epoch": 4.586866791744841, "grad_norm": 0.44293197989463806, "learning_rate": 0.002828, "loss": 1.8434, "step": 61120 }, { "epoch": 4.591669793621013, "grad_norm": 0.42343223094940186, "learning_rate": 0.002828, "loss": 1.8484, "step": 61184 }, { "epoch": 4.596472795497186, "grad_norm": 0.3981548547744751, "learning_rate": 0.002828, "loss": 1.8507, "step": 61248 }, { "epoch": 4.601275797373359, "grad_norm": 0.47165608406066895, "learning_rate": 0.002828, "loss": 1.8514, "step": 61312 }, { "epoch": 4.606078799249531, "grad_norm": 0.4086002707481384, "learning_rate": 0.002828, "loss": 1.8517, "step": 61376 }, { "epoch": 4.610881801125704, "grad_norm": 0.5088809728622437, "learning_rate": 0.002828, "loss": 1.8492, "step": 61440 }, { "epoch": 4.6156848030018764, "grad_norm": 0.5022900104522705, "learning_rate": 0.002828, "loss": 1.8445, "step": 61504 }, { "epoch": 4.620487804878048, "grad_norm": 0.4150559604167938, "learning_rate": 0.002828, "loss": 1.8515, "step": 61568 }, { "epoch": 4.625290806754221, "grad_norm": 0.4678759276866913, "learning_rate": 0.002828, "loss": 1.8392, "step": 61632 }, { "epoch": 4.630093808630394, "grad_norm": 0.5109483003616333, "learning_rate": 0.002828, "loss": 1.8465, "step": 61696 }, { "epoch": 4.634896810506566, "grad_norm": 0.4642334580421448, "learning_rate": 0.002828, "loss": 1.8472, "step": 61760 }, { "epoch": 4.639699812382739, "grad_norm": 0.49772489070892334, "learning_rate": 0.002828, "loss": 1.8409, "step": 61824 }, { "epoch": 4.644502814258912, "grad_norm": 0.3974035680294037, "learning_rate": 0.002828, "loss": 1.8481, "step": 61888 }, { "epoch": 4.649305816135085, "grad_norm": 0.5001426935195923, "learning_rate": 0.002828, "loss": 1.843, "step": 61952 }, { "epoch": 4.654108818011257, "grad_norm": 0.44995561242103577, "learning_rate": 0.002828, "loss": 1.8444, "step": 62016 }, { "epoch": 4.65891181988743, "grad_norm": 0.42114222049713135, "learning_rate": 0.002828, "loss": 1.8455, "step": 62080 }, { "epoch": 4.663714821763603, "grad_norm": 0.4493741989135742, "learning_rate": 0.002828, "loss": 1.8366, "step": 62144 }, { "epoch": 4.668517823639775, "grad_norm": 0.48146486282348633, "learning_rate": 0.002828, "loss": 1.8477, "step": 62208 }, { "epoch": 4.6733208255159475, "grad_norm": 0.40148022770881653, "learning_rate": 0.002828, "loss": 1.8408, "step": 62272 }, { "epoch": 4.67812382739212, "grad_norm": 0.4373031556606293, "learning_rate": 0.002828, "loss": 1.8406, "step": 62336 }, { "epoch": 4.682926829268292, "grad_norm": 0.4787302315235138, "learning_rate": 0.002828, "loss": 1.8398, "step": 62400 }, { "epoch": 4.687729831144465, "grad_norm": 0.5098510384559631, "learning_rate": 0.002828, "loss": 1.847, "step": 62464 }, { "epoch": 4.692532833020638, "grad_norm": 0.5322007536888123, "learning_rate": 0.002828, "loss": 1.8393, "step": 62528 }, { "epoch": 4.69733583489681, "grad_norm": 0.4712188243865967, "learning_rate": 0.002828, "loss": 1.84, "step": 62592 }, { "epoch": 4.702138836772983, "grad_norm": 0.42479485273361206, "learning_rate": 0.002828, "loss": 1.8411, "step": 62656 }, { "epoch": 4.706941838649156, "grad_norm": 0.47006043791770935, "learning_rate": 0.002828, "loss": 1.8337, "step": 62720 }, { "epoch": 4.711744840525329, "grad_norm": 0.46571770310401917, "learning_rate": 0.002828, "loss": 1.8417, "step": 62784 }, { "epoch": 4.716547842401501, "grad_norm": 0.4172969162464142, "learning_rate": 0.002828, "loss": 1.838, "step": 62848 }, { "epoch": 4.721350844277674, "grad_norm": 0.4751930236816406, "learning_rate": 0.002828, "loss": 1.8304, "step": 62912 }, { "epoch": 4.726153846153846, "grad_norm": 0.42527517676353455, "learning_rate": 0.002828, "loss": 1.8387, "step": 62976 }, { "epoch": 4.730956848030019, "grad_norm": 0.43444591760635376, "learning_rate": 0.002828, "loss": 1.834, "step": 63040 }, { "epoch": 4.7357598499061915, "grad_norm": 0.4354824125766754, "learning_rate": 0.002828, "loss": 1.8398, "step": 63104 }, { "epoch": 4.740562851782364, "grad_norm": 0.4284718334674835, "learning_rate": 0.002828, "loss": 1.8385, "step": 63168 }, { "epoch": 4.745365853658536, "grad_norm": 0.5047585964202881, "learning_rate": 0.002828, "loss": 1.8361, "step": 63232 }, { "epoch": 4.750168855534709, "grad_norm": 0.41910332441329956, "learning_rate": 0.002828, "loss": 1.842, "step": 63296 }, { "epoch": 4.754971857410882, "grad_norm": 0.41034504771232605, "learning_rate": 0.002828, "loss": 1.8323, "step": 63360 }, { "epoch": 4.759774859287054, "grad_norm": 0.389961302280426, "learning_rate": 0.002828, "loss": 1.8378, "step": 63424 }, { "epoch": 4.764577861163227, "grad_norm": 0.4235137701034546, "learning_rate": 0.002828, "loss": 1.8403, "step": 63488 }, { "epoch": 4.7693808630394, "grad_norm": 0.4341602623462677, "learning_rate": 0.002828, "loss": 1.8316, "step": 63552 }, { "epoch": 4.774183864915572, "grad_norm": 0.42686939239501953, "learning_rate": 0.002828, "loss": 1.8335, "step": 63616 }, { "epoch": 4.778986866791745, "grad_norm": 0.41930991411209106, "learning_rate": 0.002828, "loss": 1.8312, "step": 63680 }, { "epoch": 4.783789868667918, "grad_norm": 0.4320056736469269, "learning_rate": 0.002828, "loss": 1.8255, "step": 63744 }, { "epoch": 4.78859287054409, "grad_norm": 0.4222176671028137, "learning_rate": 0.002828, "loss": 1.8367, "step": 63808 }, { "epoch": 4.7933958724202625, "grad_norm": 0.39090004563331604, "learning_rate": 0.002828, "loss": 1.836, "step": 63872 }, { "epoch": 4.798198874296435, "grad_norm": 0.5152971148490906, "learning_rate": 0.002828, "loss": 1.8315, "step": 63936 }, { "epoch": 4.803001876172608, "grad_norm": 0.3895571231842041, "learning_rate": 0.002828, "loss": 1.8388, "step": 64000 }, { "epoch": 4.80780487804878, "grad_norm": 0.4033324420452118, "learning_rate": 0.002828, "loss": 1.8329, "step": 64064 }, { "epoch": 4.812607879924953, "grad_norm": 0.43192601203918457, "learning_rate": 0.002828, "loss": 1.8303, "step": 64128 }, { "epoch": 4.817410881801126, "grad_norm": 0.4123119115829468, "learning_rate": 0.002828, "loss": 1.8365, "step": 64192 }, { "epoch": 4.822213883677298, "grad_norm": 0.394336998462677, "learning_rate": 0.002828, "loss": 1.8332, "step": 64256 }, { "epoch": 4.827016885553471, "grad_norm": 0.4631196856498718, "learning_rate": 0.002828, "loss": 1.8314, "step": 64320 }, { "epoch": 4.831819887429644, "grad_norm": 0.4436214566230774, "learning_rate": 0.002828, "loss": 1.8335, "step": 64384 }, { "epoch": 4.836622889305816, "grad_norm": 0.4646340608596802, "learning_rate": 0.002828, "loss": 1.8257, "step": 64448 }, { "epoch": 4.841425891181989, "grad_norm": 0.488765686750412, "learning_rate": 0.002828, "loss": 1.8328, "step": 64512 }, { "epoch": 4.846228893058162, "grad_norm": 0.5807002186775208, "learning_rate": 0.002828, "loss": 1.8302, "step": 64576 }, { "epoch": 4.851031894934334, "grad_norm": 0.41557422280311584, "learning_rate": 0.002828, "loss": 1.8274, "step": 64640 }, { "epoch": 4.8558348968105065, "grad_norm": 0.40899091958999634, "learning_rate": 0.002828, "loss": 1.8294, "step": 64704 }, { "epoch": 4.860637898686679, "grad_norm": 0.41941386461257935, "learning_rate": 0.002828, "loss": 1.8314, "step": 64768 }, { "epoch": 4.865440900562852, "grad_norm": 0.4324967861175537, "learning_rate": 0.002828, "loss": 1.8242, "step": 64832 }, { "epoch": 4.870243902439024, "grad_norm": 0.5122097730636597, "learning_rate": 0.002828, "loss": 1.8284, "step": 64896 }, { "epoch": 4.875046904315197, "grad_norm": 0.4794155955314636, "learning_rate": 0.002828, "loss": 1.8306, "step": 64960 }, { "epoch": 4.879849906191369, "grad_norm": 0.41596829891204834, "learning_rate": 0.002828, "loss": 1.8332, "step": 65024 }, { "epoch": 4.884652908067542, "grad_norm": 0.47452154755592346, "learning_rate": 0.002828, "loss": 1.8215, "step": 65088 }, { "epoch": 4.889455909943715, "grad_norm": 0.46592849493026733, "learning_rate": 0.002828, "loss": 1.8254, "step": 65152 }, { "epoch": 4.894258911819888, "grad_norm": 0.400258868932724, "learning_rate": 0.002828, "loss": 1.832, "step": 65216 }, { "epoch": 4.89906191369606, "grad_norm": 0.4501512050628662, "learning_rate": 0.002828, "loss": 1.8272, "step": 65280 }, { "epoch": 4.903864915572233, "grad_norm": 0.5389344096183777, "learning_rate": 0.002828, "loss": 1.8258, "step": 65344 }, { "epoch": 4.908667917448406, "grad_norm": 0.4736655354499817, "learning_rate": 0.002828, "loss": 1.8277, "step": 65408 }, { "epoch": 4.913470919324578, "grad_norm": 0.42256057262420654, "learning_rate": 0.002828, "loss": 1.8259, "step": 65472 }, { "epoch": 4.9182739212007505, "grad_norm": 0.45916080474853516, "learning_rate": 0.002828, "loss": 1.8317, "step": 65536 }, { "epoch": 4.923076923076923, "grad_norm": 0.3935365080833435, "learning_rate": 0.002828, "loss": 1.8253, "step": 65600 }, { "epoch": 4.927879924953095, "grad_norm": 0.4702571928501129, "learning_rate": 0.002828, "loss": 1.8194, "step": 65664 }, { "epoch": 4.932682926829268, "grad_norm": 0.4381246864795685, "learning_rate": 0.002828, "loss": 1.822, "step": 65728 }, { "epoch": 4.937485928705441, "grad_norm": 0.4881627857685089, "learning_rate": 0.002828, "loss": 1.8266, "step": 65792 }, { "epoch": 4.942288930581613, "grad_norm": 0.5015894770622253, "learning_rate": 0.002828, "loss": 1.8221, "step": 65856 }, { "epoch": 4.947091932457786, "grad_norm": 0.4536570608615875, "learning_rate": 0.002828, "loss": 1.8246, "step": 65920 }, { "epoch": 4.951894934333959, "grad_norm": 0.43508243560791016, "learning_rate": 0.002828, "loss": 1.8188, "step": 65984 }, { "epoch": 4.956697936210132, "grad_norm": 0.42308923602104187, "learning_rate": 0.002828, "loss": 1.8306, "step": 66048 }, { "epoch": 4.961500938086304, "grad_norm": 0.44210103154182434, "learning_rate": 0.002828, "loss": 1.8244, "step": 66112 }, { "epoch": 4.966303939962477, "grad_norm": 0.47712111473083496, "learning_rate": 0.002828, "loss": 1.8265, "step": 66176 }, { "epoch": 4.9711069418386495, "grad_norm": 0.4795370399951935, "learning_rate": 0.002828, "loss": 1.825, "step": 66240 }, { "epoch": 4.9759099437148215, "grad_norm": 0.3904446065425873, "learning_rate": 0.002828, "loss": 1.8251, "step": 66304 }, { "epoch": 4.980712945590994, "grad_norm": 0.4810478389263153, "learning_rate": 0.002828, "loss": 1.8258, "step": 66368 }, { "epoch": 4.985515947467167, "grad_norm": 0.39784982800483704, "learning_rate": 0.002828, "loss": 1.8232, "step": 66432 }, { "epoch": 4.990318949343339, "grad_norm": 0.4552132189273834, "learning_rate": 0.002828, "loss": 1.8203, "step": 66496 }, { "epoch": 4.995121951219512, "grad_norm": 0.44738858938217163, "learning_rate": 0.002828, "loss": 1.8241, "step": 66560 }, { "epoch": 4.999924953095685, "grad_norm": 0.41392263770103455, "learning_rate": 0.002828, "loss": 1.8214, "step": 66624 }, { "epoch": 5.004727954971857, "grad_norm": 0.4698507487773895, "learning_rate": 0.002828, "loss": 1.7802, "step": 66688 }, { "epoch": 5.00953095684803, "grad_norm": 0.45044583082199097, "learning_rate": 0.002828, "loss": 1.779, "step": 66752 }, { "epoch": 5.014333958724203, "grad_norm": 0.4659009277820587, "learning_rate": 0.002828, "loss": 1.7693, "step": 66816 }, { "epoch": 5.019136960600375, "grad_norm": 0.45445555448532104, "learning_rate": 0.002828, "loss": 1.7778, "step": 66880 }, { "epoch": 5.023939962476548, "grad_norm": 0.5755953192710876, "learning_rate": 0.002828, "loss": 1.783, "step": 66944 }, { "epoch": 5.028742964352721, "grad_norm": 0.46172744035720825, "learning_rate": 0.002828, "loss": 1.7779, "step": 67008 }, { "epoch": 5.0335459662288935, "grad_norm": 0.42822253704071045, "learning_rate": 0.002828, "loss": 1.7759, "step": 67072 }, { "epoch": 5.0383489681050655, "grad_norm": 0.40689054131507874, "learning_rate": 0.002828, "loss": 1.7818, "step": 67136 }, { "epoch": 5.043151969981238, "grad_norm": 0.4451557993888855, "learning_rate": 0.002828, "loss": 1.7802, "step": 67200 }, { "epoch": 5.047954971857411, "grad_norm": 0.4525567889213562, "learning_rate": 0.002828, "loss": 1.7752, "step": 67264 }, { "epoch": 5.052757973733583, "grad_norm": 0.40108218789100647, "learning_rate": 0.002828, "loss": 1.7732, "step": 67328 }, { "epoch": 5.057560975609756, "grad_norm": 0.42633527517318726, "learning_rate": 0.002828, "loss": 1.7771, "step": 67392 }, { "epoch": 5.062363977485929, "grad_norm": 0.46593084931373596, "learning_rate": 0.002828, "loss": 1.7815, "step": 67456 }, { "epoch": 5.067166979362101, "grad_norm": 0.5002214312553406, "learning_rate": 0.002828, "loss": 1.7734, "step": 67520 }, { "epoch": 5.071969981238274, "grad_norm": 0.5098074674606323, "learning_rate": 0.002828, "loss": 1.7714, "step": 67584 }, { "epoch": 5.076772983114447, "grad_norm": 0.5381320714950562, "learning_rate": 0.002828, "loss": 1.7808, "step": 67648 }, { "epoch": 5.081575984990619, "grad_norm": 0.43003377318382263, "learning_rate": 0.002828, "loss": 1.7773, "step": 67712 }, { "epoch": 5.086378986866792, "grad_norm": 0.41460487246513367, "learning_rate": 0.002828, "loss": 1.7736, "step": 67776 }, { "epoch": 5.091181988742965, "grad_norm": 0.42502009868621826, "learning_rate": 0.002828, "loss": 1.7799, "step": 67840 }, { "epoch": 5.095984990619137, "grad_norm": 0.40019145607948303, "learning_rate": 0.002828, "loss": 1.7843, "step": 67904 }, { "epoch": 5.1007879924953095, "grad_norm": 0.4934079349040985, "learning_rate": 0.002828, "loss": 1.7727, "step": 67968 }, { "epoch": 5.105590994371482, "grad_norm": 0.42615658044815063, "learning_rate": 0.002828, "loss": 1.7815, "step": 68032 }, { "epoch": 5.110393996247655, "grad_norm": 0.4133114814758301, "learning_rate": 0.002828, "loss": 1.7799, "step": 68096 }, { "epoch": 5.115196998123827, "grad_norm": 0.4989236891269684, "learning_rate": 0.002828, "loss": 1.7834, "step": 68160 }, { "epoch": 5.12, "grad_norm": 0.39801713824272156, "learning_rate": 0.002828, "loss": 1.7713, "step": 68224 }, { "epoch": 5.124803001876173, "grad_norm": 0.47219210863113403, "learning_rate": 0.002828, "loss": 1.7784, "step": 68288 }, { "epoch": 5.129606003752345, "grad_norm": 0.4931895434856415, "learning_rate": 0.002828, "loss": 1.775, "step": 68352 }, { "epoch": 5.134409005628518, "grad_norm": 0.4328326880931854, "learning_rate": 0.002828, "loss": 1.7913, "step": 68416 }, { "epoch": 5.139212007504691, "grad_norm": 0.5637125372886658, "learning_rate": 0.002828, "loss": 1.7767, "step": 68480 }, { "epoch": 5.144015009380863, "grad_norm": 0.4656321704387665, "learning_rate": 0.002828, "loss": 1.7729, "step": 68544 }, { "epoch": 5.148818011257036, "grad_norm": 0.46301645040512085, "learning_rate": 0.002828, "loss": 1.7714, "step": 68608 }, { "epoch": 5.1536210131332085, "grad_norm": 0.407530277967453, "learning_rate": 0.002828, "loss": 1.7778, "step": 68672 }, { "epoch": 5.1584240150093805, "grad_norm": 0.48896515369415283, "learning_rate": 0.002828, "loss": 1.773, "step": 68736 }, { "epoch": 5.163227016885553, "grad_norm": 0.5092155933380127, "learning_rate": 0.002828, "loss": 1.7818, "step": 68800 }, { "epoch": 5.168030018761726, "grad_norm": 0.40807560086250305, "learning_rate": 0.002828, "loss": 1.7747, "step": 68864 }, { "epoch": 5.172833020637898, "grad_norm": 0.4265471398830414, "learning_rate": 0.002828, "loss": 1.7801, "step": 68928 }, { "epoch": 5.177636022514071, "grad_norm": 0.4193212389945984, "learning_rate": 0.002828, "loss": 1.7779, "step": 68992 }, { "epoch": 5.182439024390244, "grad_norm": 0.4063675105571747, "learning_rate": 0.002828, "loss": 1.7769, "step": 69056 }, { "epoch": 5.187242026266416, "grad_norm": 0.4742323160171509, "learning_rate": 0.002828, "loss": 1.7748, "step": 69120 }, { "epoch": 5.192045028142589, "grad_norm": 0.4574708342552185, "learning_rate": 0.002828, "loss": 1.7728, "step": 69184 }, { "epoch": 5.196848030018762, "grad_norm": 0.3898203670978546, "learning_rate": 0.002828, "loss": 1.7788, "step": 69248 }, { "epoch": 5.201651031894935, "grad_norm": 0.48600926995277405, "learning_rate": 0.002828, "loss": 1.7767, "step": 69312 }, { "epoch": 5.206454033771107, "grad_norm": 0.5444703102111816, "learning_rate": 0.002828, "loss": 1.7778, "step": 69376 }, { "epoch": 5.21125703564728, "grad_norm": 0.54710453748703, "learning_rate": 0.002828, "loss": 1.7764, "step": 69440 }, { "epoch": 5.2160600375234525, "grad_norm": 0.5190069675445557, "learning_rate": 0.002828, "loss": 1.7731, "step": 69504 }, { "epoch": 5.2208630393996245, "grad_norm": 0.5375124216079712, "learning_rate": 0.002828, "loss": 1.7779, "step": 69568 }, { "epoch": 5.225666041275797, "grad_norm": 0.4914718568325043, "learning_rate": 0.002828, "loss": 1.7735, "step": 69632 }, { "epoch": 5.23046904315197, "grad_norm": 0.44196775555610657, "learning_rate": 0.002828, "loss": 1.7765, "step": 69696 }, { "epoch": 5.235272045028142, "grad_norm": 0.43454575538635254, "learning_rate": 0.002828, "loss": 1.7814, "step": 69760 }, { "epoch": 5.240075046904315, "grad_norm": 0.5103824138641357, "learning_rate": 0.002828, "loss": 1.779, "step": 69824 }, { "epoch": 5.244878048780488, "grad_norm": 0.396705687046051, "learning_rate": 0.002828, "loss": 1.7799, "step": 69888 }, { "epoch": 5.24968105065666, "grad_norm": 0.4350598156452179, "learning_rate": 0.002828, "loss": 1.7798, "step": 69952 }, { "epoch": 5.254484052532833, "grad_norm": 0.4352000653743744, "learning_rate": 0.002828, "loss": 1.7764, "step": 70016 }, { "epoch": 5.259287054409006, "grad_norm": 0.43230265378952026, "learning_rate": 0.002828, "loss": 1.7754, "step": 70080 }, { "epoch": 5.264090056285179, "grad_norm": 0.4460284113883972, "learning_rate": 0.002828, "loss": 1.7759, "step": 70144 }, { "epoch": 5.268893058161351, "grad_norm": 0.5028729438781738, "learning_rate": 0.002828, "loss": 1.7774, "step": 70208 }, { "epoch": 5.273696060037524, "grad_norm": 0.4400043189525604, "learning_rate": 0.002828, "loss": 1.7824, "step": 70272 }, { "epoch": 5.278499061913696, "grad_norm": 0.4092008173465729, "learning_rate": 0.002828, "loss": 1.7757, "step": 70336 }, { "epoch": 5.283302063789868, "grad_norm": 0.49051231145858765, "learning_rate": 0.002828, "loss": 1.7836, "step": 70400 }, { "epoch": 5.288105065666041, "grad_norm": 0.4183790981769562, "learning_rate": 0.002828, "loss": 1.7734, "step": 70464 }, { "epoch": 5.292908067542214, "grad_norm": 0.4172358512878418, "learning_rate": 0.002828, "loss": 1.7709, "step": 70528 }, { "epoch": 5.297711069418386, "grad_norm": 0.38371556997299194, "learning_rate": 0.002828, "loss": 1.773, "step": 70592 }, { "epoch": 5.302514071294559, "grad_norm": 0.47471094131469727, "learning_rate": 0.002828, "loss": 1.7812, "step": 70656 }, { "epoch": 5.307317073170732, "grad_norm": 0.4047795534133911, "learning_rate": 0.002828, "loss": 1.7621, "step": 70720 }, { "epoch": 5.312120075046904, "grad_norm": 0.4711602032184601, "learning_rate": 0.002828, "loss": 1.7662, "step": 70784 }, { "epoch": 5.316923076923077, "grad_norm": 0.41445788741111755, "learning_rate": 0.002828, "loss": 1.7776, "step": 70848 }, { "epoch": 5.32172607879925, "grad_norm": 0.40588343143463135, "learning_rate": 0.002828, "loss": 1.7704, "step": 70912 }, { "epoch": 5.326529080675422, "grad_norm": 0.4976954162120819, "learning_rate": 0.002828, "loss": 1.7772, "step": 70976 }, { "epoch": 5.331332082551595, "grad_norm": 0.47486379742622375, "learning_rate": 0.002828, "loss": 1.775, "step": 71040 }, { "epoch": 5.3361350844277675, "grad_norm": 0.41308653354644775, "learning_rate": 0.002828, "loss": 1.7709, "step": 71104 }, { "epoch": 5.3409380863039395, "grad_norm": 0.4700968861579895, "learning_rate": 0.002828, "loss": 1.7704, "step": 71168 }, { "epoch": 5.345741088180112, "grad_norm": 0.4668005108833313, "learning_rate": 0.002828, "loss": 1.7687, "step": 71232 }, { "epoch": 5.350544090056285, "grad_norm": 0.4304405152797699, "learning_rate": 0.002828, "loss": 1.7713, "step": 71296 }, { "epoch": 5.355347091932458, "grad_norm": 0.50732421875, "learning_rate": 0.002828, "loss": 1.7648, "step": 71360 }, { "epoch": 5.36015009380863, "grad_norm": 0.4894561469554901, "learning_rate": 0.002828, "loss": 1.7704, "step": 71424 }, { "epoch": 5.364953095684803, "grad_norm": 0.4268856942653656, "learning_rate": 0.002828, "loss": 1.7709, "step": 71488 }, { "epoch": 5.369756097560976, "grad_norm": 0.37423834204673767, "learning_rate": 0.002828, "loss": 1.7742, "step": 71552 }, { "epoch": 5.374559099437148, "grad_norm": 0.4616115987300873, "learning_rate": 0.002828, "loss": 1.7804, "step": 71616 }, { "epoch": 5.379362101313321, "grad_norm": 0.3891536593437195, "learning_rate": 0.002828, "loss": 1.7745, "step": 71680 }, { "epoch": 5.384165103189494, "grad_norm": 0.4057512581348419, "learning_rate": 0.002828, "loss": 1.7774, "step": 71744 }, { "epoch": 5.388968105065666, "grad_norm": 0.4673733115196228, "learning_rate": 0.002828, "loss": 1.7751, "step": 71808 }, { "epoch": 5.393771106941839, "grad_norm": 0.38505420088768005, "learning_rate": 0.002828, "loss": 1.7713, "step": 71872 }, { "epoch": 5.3985741088180115, "grad_norm": 0.4115614593029022, "learning_rate": 0.002828, "loss": 1.7743, "step": 71936 }, { "epoch": 5.4033771106941835, "grad_norm": 0.42474934458732605, "learning_rate": 0.002828, "loss": 1.7748, "step": 72000 }, { "epoch": 5.408180112570356, "grad_norm": 0.44842562079429626, "learning_rate": 0.002828, "loss": 1.7753, "step": 72064 }, { "epoch": 5.412983114446529, "grad_norm": 0.4199115037918091, "learning_rate": 0.002828, "loss": 1.7719, "step": 72128 }, { "epoch": 5.417786116322702, "grad_norm": 0.40089020133018494, "learning_rate": 0.002828, "loss": 1.7723, "step": 72192 }, { "epoch": 5.422589118198874, "grad_norm": 0.5181447863578796, "learning_rate": 0.002828, "loss": 1.7673, "step": 72256 }, { "epoch": 5.427392120075047, "grad_norm": 0.4892163574695587, "learning_rate": 0.002828, "loss": 1.7742, "step": 72320 }, { "epoch": 5.43219512195122, "grad_norm": 0.41054004430770874, "learning_rate": 0.002828, "loss": 1.7701, "step": 72384 }, { "epoch": 5.436998123827392, "grad_norm": 0.4574233293533325, "learning_rate": 0.002828, "loss": 1.7648, "step": 72448 }, { "epoch": 5.441801125703565, "grad_norm": 0.4245249629020691, "learning_rate": 0.002828, "loss": 1.7762, "step": 72512 }, { "epoch": 5.446604127579738, "grad_norm": 0.43858468532562256, "learning_rate": 0.002828, "loss": 1.7692, "step": 72576 }, { "epoch": 5.45140712945591, "grad_norm": 0.477273553609848, "learning_rate": 0.002828, "loss": 1.7723, "step": 72640 }, { "epoch": 5.4562101313320825, "grad_norm": 0.5352038145065308, "learning_rate": 0.002828, "loss": 1.7735, "step": 72704 }, { "epoch": 5.461013133208255, "grad_norm": 0.4089607894420624, "learning_rate": 0.002828, "loss": 1.7744, "step": 72768 }, { "epoch": 5.465816135084427, "grad_norm": 0.4176151752471924, "learning_rate": 0.002828, "loss": 1.7725, "step": 72832 }, { "epoch": 5.4706191369606, "grad_norm": 0.44598644971847534, "learning_rate": 0.002828, "loss": 1.7747, "step": 72896 }, { "epoch": 5.475422138836773, "grad_norm": 0.3903334438800812, "learning_rate": 0.002828, "loss": 1.7687, "step": 72960 }, { "epoch": 5.480225140712945, "grad_norm": 0.4536639451980591, "learning_rate": 0.002828, "loss": 1.7726, "step": 73024 }, { "epoch": 5.485028142589118, "grad_norm": 0.42450371384620667, "learning_rate": 0.002828, "loss": 1.7757, "step": 73088 }, { "epoch": 5.489831144465291, "grad_norm": 0.49686697125434875, "learning_rate": 0.002828, "loss": 1.7723, "step": 73152 }, { "epoch": 5.494634146341463, "grad_norm": 0.4569362699985504, "learning_rate": 0.002828, "loss": 1.7665, "step": 73216 }, { "epoch": 5.499437148217636, "grad_norm": 0.47700774669647217, "learning_rate": 0.002828, "loss": 1.7733, "step": 73280 }, { "epoch": 5.504240150093809, "grad_norm": 0.4036250412464142, "learning_rate": 0.002828, "loss": 1.7719, "step": 73344 }, { "epoch": 5.509043151969982, "grad_norm": 0.4487983286380768, "learning_rate": 0.002828, "loss": 1.766, "step": 73408 }, { "epoch": 5.513846153846154, "grad_norm": 0.519871175289154, "learning_rate": 0.002828, "loss": 1.7709, "step": 73472 }, { "epoch": 5.5186491557223265, "grad_norm": 0.4376335144042969, "learning_rate": 0.002828, "loss": 1.769, "step": 73536 }, { "epoch": 5.523452157598499, "grad_norm": 0.45950847864151, "learning_rate": 0.002828, "loss": 1.7753, "step": 73600 }, { "epoch": 5.528255159474671, "grad_norm": 0.47752419114112854, "learning_rate": 0.002828, "loss": 1.7612, "step": 73664 }, { "epoch": 5.533058161350844, "grad_norm": 0.4177493453025818, "learning_rate": 0.002828, "loss": 1.7689, "step": 73728 }, { "epoch": 5.537861163227017, "grad_norm": 0.427636981010437, "learning_rate": 0.002828, "loss": 1.7645, "step": 73792 }, { "epoch": 5.542664165103189, "grad_norm": 0.46148422360420227, "learning_rate": 0.002828, "loss": 1.7697, "step": 73856 }, { "epoch": 5.547467166979362, "grad_norm": 0.4212297797203064, "learning_rate": 0.002828, "loss": 1.7689, "step": 73920 }, { "epoch": 5.552270168855535, "grad_norm": 0.46947184205055237, "learning_rate": 0.002828, "loss": 1.7663, "step": 73984 }, { "epoch": 5.557073170731707, "grad_norm": 0.4970608353614807, "learning_rate": 0.002828, "loss": 1.7706, "step": 74048 }, { "epoch": 5.56187617260788, "grad_norm": 0.43422332406044006, "learning_rate": 0.002828, "loss": 1.7674, "step": 74112 }, { "epoch": 5.566679174484053, "grad_norm": 0.4593464732170105, "learning_rate": 0.002828, "loss": 1.7651, "step": 74176 }, { "epoch": 5.571482176360226, "grad_norm": 0.5521155595779419, "learning_rate": 0.002828, "loss": 1.7626, "step": 74240 }, { "epoch": 5.576285178236398, "grad_norm": 0.44747570157051086, "learning_rate": 0.002828, "loss": 1.7628, "step": 74304 }, { "epoch": 5.5810881801125705, "grad_norm": 0.4199005365371704, "learning_rate": 0.002828, "loss": 1.7654, "step": 74368 }, { "epoch": 5.585891181988743, "grad_norm": 0.4620073735713959, "learning_rate": 0.002828, "loss": 1.7596, "step": 74432 }, { "epoch": 5.590694183864915, "grad_norm": 0.4747791886329651, "learning_rate": 0.002828, "loss": 1.7681, "step": 74496 }, { "epoch": 5.595497185741088, "grad_norm": 0.39181622862815857, "learning_rate": 0.002828, "loss": 1.7659, "step": 74560 }, { "epoch": 5.600300187617261, "grad_norm": 0.48630768060684204, "learning_rate": 0.002828, "loss": 1.761, "step": 74624 }, { "epoch": 5.605103189493433, "grad_norm": 0.41863974928855896, "learning_rate": 0.002828, "loss": 1.764, "step": 74688 }, { "epoch": 5.609906191369606, "grad_norm": 0.4605071544647217, "learning_rate": 0.002828, "loss": 1.7615, "step": 74752 }, { "epoch": 5.614709193245779, "grad_norm": 0.4120197594165802, "learning_rate": 0.002828, "loss": 1.765, "step": 74816 }, { "epoch": 5.619512195121951, "grad_norm": 0.49022045731544495, "learning_rate": 0.002828, "loss": 1.7649, "step": 74880 }, { "epoch": 5.624315196998124, "grad_norm": 0.5220785140991211, "learning_rate": 0.002828, "loss": 1.7689, "step": 74944 }, { "epoch": 5.629118198874297, "grad_norm": 0.5221869945526123, "learning_rate": 0.002828, "loss": 1.7584, "step": 75008 }, { "epoch": 5.6339212007504695, "grad_norm": 0.44775456190109253, "learning_rate": 0.002828, "loss": 1.7608, "step": 75072 }, { "epoch": 5.6387242026266415, "grad_norm": 0.4067821502685547, "learning_rate": 0.002828, "loss": 1.7664, "step": 75136 }, { "epoch": 5.643527204502814, "grad_norm": 0.411039263010025, "learning_rate": 0.002828, "loss": 1.7673, "step": 75200 }, { "epoch": 5.648330206378986, "grad_norm": 0.45366838574409485, "learning_rate": 0.002828, "loss": 1.7668, "step": 75264 }, { "epoch": 5.653133208255159, "grad_norm": 0.4279555380344391, "learning_rate": 0.002828, "loss": 1.7585, "step": 75328 }, { "epoch": 5.657936210131332, "grad_norm": 0.41225770115852356, "learning_rate": 0.002828, "loss": 1.7647, "step": 75392 }, { "epoch": 5.662739212007505, "grad_norm": 0.4502100944519043, "learning_rate": 0.002828, "loss": 1.7556, "step": 75456 }, { "epoch": 5.667542213883677, "grad_norm": 0.4367251992225647, "learning_rate": 0.002828, "loss": 1.7626, "step": 75520 }, { "epoch": 5.67234521575985, "grad_norm": 0.3955664038658142, "learning_rate": 0.002828, "loss": 1.7611, "step": 75584 }, { "epoch": 5.677148217636023, "grad_norm": 0.4193713963031769, "learning_rate": 0.002828, "loss": 1.7591, "step": 75648 }, { "epoch": 5.681951219512195, "grad_norm": 0.4191085696220398, "learning_rate": 0.002828, "loss": 1.7553, "step": 75712 }, { "epoch": 5.686754221388368, "grad_norm": 0.47768324613571167, "learning_rate": 0.002828, "loss": 1.7623, "step": 75776 }, { "epoch": 5.691557223264541, "grad_norm": 0.46289485692977905, "learning_rate": 0.002828, "loss": 1.7692, "step": 75840 }, { "epoch": 5.696360225140713, "grad_norm": 0.45249009132385254, "learning_rate": 0.002828, "loss": 1.7575, "step": 75904 }, { "epoch": 5.7011632270168855, "grad_norm": 0.4678303003311157, "learning_rate": 0.002828, "loss": 1.755, "step": 75968 }, { "epoch": 5.705966228893058, "grad_norm": 0.49041491746902466, "learning_rate": 0.002828, "loss": 1.7617, "step": 76032 }, { "epoch": 5.71076923076923, "grad_norm": 0.47408047318458557, "learning_rate": 0.002828, "loss": 1.7593, "step": 76096 }, { "epoch": 5.715572232645403, "grad_norm": 0.5664355754852295, "learning_rate": 0.002828, "loss": 1.7649, "step": 76160 }, { "epoch": 5.720375234521576, "grad_norm": 0.4134531319141388, "learning_rate": 0.002828, "loss": 1.7619, "step": 76224 }, { "epoch": 5.725178236397749, "grad_norm": 0.4312486946582794, "learning_rate": 0.002828, "loss": 1.7547, "step": 76288 }, { "epoch": 5.729981238273921, "grad_norm": 0.41487306356430054, "learning_rate": 0.002828, "loss": 1.7501, "step": 76352 }, { "epoch": 5.734784240150094, "grad_norm": 0.4305776059627533, "learning_rate": 0.002828, "loss": 1.7502, "step": 76416 }, { "epoch": 5.739587242026267, "grad_norm": 0.4429924190044403, "learning_rate": 0.002828, "loss": 1.7554, "step": 76480 }, { "epoch": 5.744390243902439, "grad_norm": 0.47873786091804504, "learning_rate": 0.002828, "loss": 1.7535, "step": 76544 }, { "epoch": 5.749193245778612, "grad_norm": 0.4853677749633789, "learning_rate": 0.002828, "loss": 1.7514, "step": 76608 }, { "epoch": 5.753996247654785, "grad_norm": 0.43151888251304626, "learning_rate": 0.002828, "loss": 1.7548, "step": 76672 }, { "epoch": 5.758799249530957, "grad_norm": 0.5341268181800842, "learning_rate": 0.002828, "loss": 1.7626, "step": 76736 }, { "epoch": 5.7636022514071295, "grad_norm": 0.5068721175193787, "learning_rate": 0.002828, "loss": 1.7559, "step": 76800 }, { "epoch": 5.768405253283302, "grad_norm": 0.4304395318031311, "learning_rate": 0.002828, "loss": 1.7604, "step": 76864 }, { "epoch": 5.773208255159474, "grad_norm": 0.44748765230178833, "learning_rate": 0.002828, "loss": 1.7568, "step": 76928 }, { "epoch": 5.778011257035647, "grad_norm": 0.40714704990386963, "learning_rate": 0.002828, "loss": 1.7553, "step": 76992 }, { "epoch": 5.78281425891182, "grad_norm": 0.46348631381988525, "learning_rate": 0.002828, "loss": 1.7536, "step": 77056 }, { "epoch": 5.787617260787993, "grad_norm": 0.5729508996009827, "learning_rate": 0.002828, "loss": 1.7513, "step": 77120 }, { "epoch": 5.792420262664165, "grad_norm": 0.4666328728199005, "learning_rate": 0.002828, "loss": 1.7535, "step": 77184 }, { "epoch": 5.797223264540338, "grad_norm": 0.5445581674575806, "learning_rate": 0.002828, "loss": 1.7589, "step": 77248 }, { "epoch": 5.80202626641651, "grad_norm": 0.3767351508140564, "learning_rate": 0.002828, "loss": 1.7585, "step": 77312 }, { "epoch": 5.806829268292683, "grad_norm": 0.4318215548992157, "learning_rate": 0.002828, "loss": 1.76, "step": 77376 }, { "epoch": 5.811632270168856, "grad_norm": 0.43348705768585205, "learning_rate": 0.002828, "loss": 1.7468, "step": 77440 }, { "epoch": 5.8164352720450285, "grad_norm": 0.4296760559082031, "learning_rate": 0.002828, "loss": 1.7599, "step": 77504 }, { "epoch": 5.8212382739212005, "grad_norm": 0.42506301403045654, "learning_rate": 0.002828, "loss": 1.7577, "step": 77568 }, { "epoch": 5.826041275797373, "grad_norm": 0.4925733506679535, "learning_rate": 0.002828, "loss": 1.7571, "step": 77632 }, { "epoch": 5.830844277673546, "grad_norm": 0.4275703728199005, "learning_rate": 0.002828, "loss": 1.7515, "step": 77696 }, { "epoch": 5.835647279549718, "grad_norm": 0.4526989459991455, "learning_rate": 0.002828, "loss": 1.7631, "step": 77760 }, { "epoch": 5.840450281425891, "grad_norm": 0.39497390389442444, "learning_rate": 0.002828, "loss": 1.7548, "step": 77824 }, { "epoch": 5.845253283302064, "grad_norm": 0.39526093006134033, "learning_rate": 0.002828, "loss": 1.7547, "step": 77888 }, { "epoch": 5.850056285178236, "grad_norm": 0.4767382740974426, "learning_rate": 0.002828, "loss": 1.7515, "step": 77952 }, { "epoch": 5.854859287054409, "grad_norm": 0.36858007311820984, "learning_rate": 0.002828, "loss": 1.7564, "step": 78016 }, { "epoch": 5.859662288930582, "grad_norm": 0.5229625701904297, "learning_rate": 0.002828, "loss": 1.7523, "step": 78080 }, { "epoch": 5.864465290806754, "grad_norm": 0.46653860807418823, "learning_rate": 0.002828, "loss": 1.7581, "step": 78144 }, { "epoch": 5.869268292682927, "grad_norm": 0.4254682660102844, "learning_rate": 0.002828, "loss": 1.7524, "step": 78208 }, { "epoch": 5.8740712945591, "grad_norm": 0.4454876482486725, "learning_rate": 0.002828, "loss": 1.7572, "step": 78272 }, { "epoch": 5.8788742964352725, "grad_norm": 0.41046759486198425, "learning_rate": 0.002828, "loss": 1.7523, "step": 78336 }, { "epoch": 5.8836772983114445, "grad_norm": 0.4965238869190216, "learning_rate": 0.002828, "loss": 1.7567, "step": 78400 }, { "epoch": 5.888480300187617, "grad_norm": 0.4154772460460663, "learning_rate": 0.002828, "loss": 1.758, "step": 78464 }, { "epoch": 5.89328330206379, "grad_norm": 0.46626389026641846, "learning_rate": 0.002828, "loss": 1.7572, "step": 78528 }, { "epoch": 5.898086303939962, "grad_norm": 0.4979894161224365, "learning_rate": 0.002828, "loss": 1.7583, "step": 78592 }, { "epoch": 5.902889305816135, "grad_norm": 0.4381833076477051, "learning_rate": 0.002828, "loss": 1.7547, "step": 78656 }, { "epoch": 5.907692307692308, "grad_norm": 0.40586981177330017, "learning_rate": 0.002828, "loss": 1.7521, "step": 78720 }, { "epoch": 5.91249530956848, "grad_norm": 0.3989832401275635, "learning_rate": 0.002828, "loss": 1.7526, "step": 78784 }, { "epoch": 5.917298311444653, "grad_norm": 0.473142147064209, "learning_rate": 0.002828, "loss": 1.7523, "step": 78848 }, { "epoch": 5.922101313320826, "grad_norm": 0.4312800168991089, "learning_rate": 0.002828, "loss": 1.7458, "step": 78912 }, { "epoch": 5.926904315196998, "grad_norm": 0.48550522327423096, "learning_rate": 0.002828, "loss": 1.7527, "step": 78976 }, { "epoch": 5.931707317073171, "grad_norm": 0.41313230991363525, "learning_rate": 0.002828, "loss": 1.7584, "step": 79040 }, { "epoch": 5.936510318949344, "grad_norm": 0.4612511396408081, "learning_rate": 0.002828, "loss": 1.7542, "step": 79104 }, { "epoch": 5.941313320825516, "grad_norm": 0.46551233530044556, "learning_rate": 0.002828, "loss": 1.7577, "step": 79168 }, { "epoch": 5.946116322701688, "grad_norm": 0.4390702247619629, "learning_rate": 0.002828, "loss": 1.7494, "step": 79232 }, { "epoch": 5.950919324577861, "grad_norm": 0.4530523717403412, "learning_rate": 0.002828, "loss": 1.7448, "step": 79296 }, { "epoch": 5.955722326454033, "grad_norm": 0.5139921307563782, "learning_rate": 0.002828, "loss": 1.7472, "step": 79360 }, { "epoch": 5.960525328330206, "grad_norm": 0.4357989430427551, "learning_rate": 0.002828, "loss": 1.7522, "step": 79424 }, { "epoch": 5.965328330206379, "grad_norm": 0.46577298641204834, "learning_rate": 0.002828, "loss": 1.7445, "step": 79488 }, { "epoch": 5.970131332082552, "grad_norm": 0.5673984885215759, "learning_rate": 0.002828, "loss": 1.7479, "step": 79552 }, { "epoch": 5.974934333958724, "grad_norm": 0.41843244433403015, "learning_rate": 0.002828, "loss": 1.7528, "step": 79616 }, { "epoch": 5.979737335834897, "grad_norm": 0.5128591656684875, "learning_rate": 0.002828, "loss": 1.7487, "step": 79680 }, { "epoch": 5.98454033771107, "grad_norm": 0.4667825698852539, "learning_rate": 0.002828, "loss": 1.7429, "step": 79744 }, { "epoch": 5.989343339587242, "grad_norm": 0.41787078976631165, "learning_rate": 0.002828, "loss": 1.7463, "step": 79808 }, { "epoch": 5.994146341463415, "grad_norm": 0.43411746621131897, "learning_rate": 0.002828, "loss": 1.7477, "step": 79872 }, { "epoch": 5.9989493433395875, "grad_norm": 0.4941996932029724, "learning_rate": 0.002828, "loss": 1.7558, "step": 79936 }, { "epoch": 6.0037523452157595, "grad_norm": 0.48769477009773254, "learning_rate": 0.002828, "loss": 1.7094, "step": 80000 }, { "epoch": 6.008555347091932, "grad_norm": 0.47256094217300415, "learning_rate": 0.002828, "loss": 1.7016, "step": 80064 }, { "epoch": 6.013358348968105, "grad_norm": 0.4429653286933899, "learning_rate": 0.002828, "loss": 1.698, "step": 80128 }, { "epoch": 6.018161350844277, "grad_norm": 0.44992125034332275, "learning_rate": 0.002828, "loss": 1.697, "step": 80192 }, { "epoch": 6.02296435272045, "grad_norm": 0.4283198416233063, "learning_rate": 0.002828, "loss": 1.7042, "step": 80256 }, { "epoch": 6.027767354596623, "grad_norm": 0.41068553924560547, "learning_rate": 0.002828, "loss": 1.7081, "step": 80320 }, { "epoch": 6.032570356472796, "grad_norm": 0.4621987044811249, "learning_rate": 0.002828, "loss": 1.7038, "step": 80384 }, { "epoch": 6.037373358348968, "grad_norm": 0.43681883811950684, "learning_rate": 0.002828, "loss": 1.707, "step": 80448 }, { "epoch": 6.042176360225141, "grad_norm": 0.43538138270378113, "learning_rate": 0.002828, "loss": 1.7031, "step": 80512 }, { "epoch": 6.046979362101314, "grad_norm": 0.488596647977829, "learning_rate": 0.002828, "loss": 1.7071, "step": 80576 }, { "epoch": 6.051782363977486, "grad_norm": 0.48277080059051514, "learning_rate": 0.002828, "loss": 1.7054, "step": 80640 }, { "epoch": 6.056585365853659, "grad_norm": 0.5193591713905334, "learning_rate": 0.002828, "loss": 1.705, "step": 80704 }, { "epoch": 6.0613883677298315, "grad_norm": 0.42429229617118835, "learning_rate": 0.002828, "loss": 1.7042, "step": 80768 }, { "epoch": 6.0661913696060035, "grad_norm": 0.42066484689712524, "learning_rate": 0.002828, "loss": 1.705, "step": 80832 }, { "epoch": 6.070994371482176, "grad_norm": 0.5428970456123352, "learning_rate": 0.002828, "loss": 1.705, "step": 80896 }, { "epoch": 6.075797373358349, "grad_norm": 0.4695224463939667, "learning_rate": 0.002828, "loss": 1.7107, "step": 80960 }, { "epoch": 6.080600375234521, "grad_norm": 0.5089054703712463, "learning_rate": 0.002828, "loss": 1.7038, "step": 81024 }, { "epoch": 6.085403377110694, "grad_norm": 0.4825155735015869, "learning_rate": 0.002828, "loss": 1.7107, "step": 81088 }, { "epoch": 6.090206378986867, "grad_norm": 0.5433881282806396, "learning_rate": 0.002828, "loss": 1.7076, "step": 81152 }, { "epoch": 6.095009380863039, "grad_norm": 0.563622236251831, "learning_rate": 0.002828, "loss": 1.7025, "step": 81216 }, { "epoch": 6.099812382739212, "grad_norm": 0.4570015072822571, "learning_rate": 0.002828, "loss": 1.7039, "step": 81280 }, { "epoch": 6.104615384615385, "grad_norm": 0.6059426069259644, "learning_rate": 0.002828, "loss": 1.7065, "step": 81344 }, { "epoch": 6.109418386491557, "grad_norm": 0.46249598264694214, "learning_rate": 0.002828, "loss": 1.7057, "step": 81408 }, { "epoch": 6.11422138836773, "grad_norm": 0.4301151931285858, "learning_rate": 0.002828, "loss": 1.7054, "step": 81472 }, { "epoch": 6.1190243902439025, "grad_norm": 0.5160223245620728, "learning_rate": 0.002828, "loss": 1.7087, "step": 81536 }, { "epoch": 6.123827392120075, "grad_norm": 0.48462730646133423, "learning_rate": 0.002828, "loss": 1.7093, "step": 81600 }, { "epoch": 6.128630393996247, "grad_norm": 0.5068091750144958, "learning_rate": 0.002828, "loss": 1.7085, "step": 81664 }, { "epoch": 6.13343339587242, "grad_norm": 0.4427911341190338, "learning_rate": 0.002828, "loss": 1.6977, "step": 81728 }, { "epoch": 6.138236397748593, "grad_norm": 0.4816674292087555, "learning_rate": 0.002828, "loss": 1.7067, "step": 81792 }, { "epoch": 6.143039399624765, "grad_norm": 0.48784971237182617, "learning_rate": 0.002828, "loss": 1.7099, "step": 81856 }, { "epoch": 6.147842401500938, "grad_norm": 0.4686130881309509, "learning_rate": 0.002828, "loss": 1.7072, "step": 81920 }, { "epoch": 6.152645403377111, "grad_norm": 0.5386856198310852, "learning_rate": 0.002828, "loss": 1.7106, "step": 81984 }, { "epoch": 6.157448405253283, "grad_norm": 0.44840648770332336, "learning_rate": 0.002828, "loss": 1.7064, "step": 82048 }, { "epoch": 6.162251407129456, "grad_norm": 0.4645565450191498, "learning_rate": 0.002828, "loss": 1.706, "step": 82112 }, { "epoch": 6.167054409005629, "grad_norm": 0.6145454049110413, "learning_rate": 0.002828, "loss": 1.7072, "step": 82176 }, { "epoch": 6.171857410881801, "grad_norm": 0.526295006275177, "learning_rate": 0.002828, "loss": 1.7054, "step": 82240 }, { "epoch": 6.176660412757974, "grad_norm": 0.4715871810913086, "learning_rate": 0.002828, "loss": 1.7063, "step": 82304 }, { "epoch": 6.1814634146341465, "grad_norm": 0.48130881786346436, "learning_rate": 0.002828, "loss": 1.7061, "step": 82368 }, { "epoch": 6.186266416510319, "grad_norm": 0.4163391590118408, "learning_rate": 0.002828, "loss": 1.7079, "step": 82432 }, { "epoch": 6.191069418386491, "grad_norm": 0.509125292301178, "learning_rate": 0.002828, "loss": 1.7078, "step": 82496 }, { "epoch": 6.195872420262664, "grad_norm": 0.454570472240448, "learning_rate": 0.002828, "loss": 1.7058, "step": 82560 }, { "epoch": 6.200675422138837, "grad_norm": 0.40525802969932556, "learning_rate": 0.002828, "loss": 1.7054, "step": 82624 }, { "epoch": 6.205478424015009, "grad_norm": 0.44681093096733093, "learning_rate": 0.002828, "loss": 1.7154, "step": 82688 }, { "epoch": 6.210281425891182, "grad_norm": 0.4531172811985016, "learning_rate": 0.002828, "loss": 1.7073, "step": 82752 }, { "epoch": 6.215084427767355, "grad_norm": 0.42768582701683044, "learning_rate": 0.002828, "loss": 1.7033, "step": 82816 }, { "epoch": 6.219887429643527, "grad_norm": 0.49657461047172546, "learning_rate": 0.002828, "loss": 1.7067, "step": 82880 }, { "epoch": 6.2246904315197, "grad_norm": 0.45019227266311646, "learning_rate": 0.002828, "loss": 1.6968, "step": 82944 }, { "epoch": 6.229493433395873, "grad_norm": 0.41543009877204895, "learning_rate": 0.002828, "loss": 1.7031, "step": 83008 }, { "epoch": 6.234296435272045, "grad_norm": 0.44448015093803406, "learning_rate": 0.002828, "loss": 1.7034, "step": 83072 }, { "epoch": 6.239099437148218, "grad_norm": 0.49361652135849, "learning_rate": 0.002828, "loss": 1.7072, "step": 83136 }, { "epoch": 6.2439024390243905, "grad_norm": 0.4431740939617157, "learning_rate": 0.002828, "loss": 1.6989, "step": 83200 }, { "epoch": 6.2487054409005625, "grad_norm": 0.43959954380989075, "learning_rate": 0.002828, "loss": 1.7078, "step": 83264 }, { "epoch": 6.253508442776735, "grad_norm": 0.4439740777015686, "learning_rate": 0.002828, "loss": 1.7065, "step": 83328 }, { "epoch": 6.258311444652908, "grad_norm": 0.5072445273399353, "learning_rate": 0.002828, "loss": 1.7058, "step": 83392 }, { "epoch": 6.26311444652908, "grad_norm": 0.6075462102890015, "learning_rate": 0.002828, "loss": 1.7105, "step": 83456 }, { "epoch": 6.267917448405253, "grad_norm": 0.47057533264160156, "learning_rate": 0.002828, "loss": 1.7081, "step": 83520 }, { "epoch": 6.272720450281426, "grad_norm": 0.48063352704048157, "learning_rate": 0.002828, "loss": 1.7027, "step": 83584 }, { "epoch": 6.277523452157599, "grad_norm": 0.5260996222496033, "learning_rate": 0.002828, "loss": 1.7072, "step": 83648 }, { "epoch": 6.282326454033771, "grad_norm": 0.45026955008506775, "learning_rate": 0.002828, "loss": 1.7023, "step": 83712 }, { "epoch": 6.287129455909944, "grad_norm": 0.4766295552253723, "learning_rate": 0.002828, "loss": 1.7048, "step": 83776 }, { "epoch": 6.291932457786117, "grad_norm": 0.5029019713401794, "learning_rate": 0.002828, "loss": 1.7103, "step": 83840 }, { "epoch": 6.296735459662289, "grad_norm": 0.4043557643890381, "learning_rate": 0.002828, "loss": 1.7081, "step": 83904 }, { "epoch": 6.3015384615384615, "grad_norm": 0.4448949694633484, "learning_rate": 0.002828, "loss": 1.7056, "step": 83968 }, { "epoch": 6.306341463414634, "grad_norm": 0.44328081607818604, "learning_rate": 0.002828, "loss": 1.7021, "step": 84032 }, { "epoch": 6.311144465290806, "grad_norm": 0.5088468194007874, "learning_rate": 0.002828, "loss": 1.7025, "step": 84096 }, { "epoch": 6.315947467166979, "grad_norm": 0.5326889753341675, "learning_rate": 0.002828, "loss": 1.714, "step": 84160 }, { "epoch": 6.320750469043152, "grad_norm": 0.4468030631542206, "learning_rate": 0.002828, "loss": 1.7006, "step": 84224 }, { "epoch": 6.325553470919324, "grad_norm": 0.6114151477813721, "learning_rate": 0.002828, "loss": 1.7012, "step": 84288 }, { "epoch": 6.330356472795497, "grad_norm": 0.44562435150146484, "learning_rate": 0.002828, "loss": 1.7079, "step": 84352 }, { "epoch": 6.33515947467167, "grad_norm": 0.4240759313106537, "learning_rate": 0.002828, "loss": 1.7092, "step": 84416 }, { "epoch": 6.339962476547843, "grad_norm": 0.5037844777107239, "learning_rate": 0.002828, "loss": 1.7017, "step": 84480 }, { "epoch": 6.344765478424015, "grad_norm": 0.4932270348072052, "learning_rate": 0.002828, "loss": 1.7058, "step": 84544 }, { "epoch": 6.349568480300188, "grad_norm": 0.5098112225532532, "learning_rate": 0.002828, "loss": 1.7089, "step": 84608 }, { "epoch": 6.354371482176361, "grad_norm": 0.4815918207168579, "learning_rate": 0.002828, "loss": 1.6983, "step": 84672 }, { "epoch": 6.359174484052533, "grad_norm": 0.42424315214157104, "learning_rate": 0.002828, "loss": 1.7111, "step": 84736 }, { "epoch": 6.3639774859287055, "grad_norm": 0.4818706214427948, "learning_rate": 0.002828, "loss": 1.7007, "step": 84800 }, { "epoch": 6.368780487804878, "grad_norm": 0.4532407522201538, "learning_rate": 0.002828, "loss": 1.7026, "step": 84864 }, { "epoch": 6.37358348968105, "grad_norm": 0.4606661796569824, "learning_rate": 0.002828, "loss": 1.7033, "step": 84928 }, { "epoch": 6.378386491557223, "grad_norm": 0.5225577354431152, "learning_rate": 0.002828, "loss": 1.7058, "step": 84992 }, { "epoch": 6.383189493433396, "grad_norm": 0.6367613673210144, "learning_rate": 0.002828, "loss": 1.7025, "step": 85056 }, { "epoch": 6.387992495309568, "grad_norm": 0.48196592926979065, "learning_rate": 0.002828, "loss": 1.7055, "step": 85120 }, { "epoch": 6.392795497185741, "grad_norm": 0.48065000772476196, "learning_rate": 0.002828, "loss": 1.6962, "step": 85184 }, { "epoch": 6.397598499061914, "grad_norm": 0.5992358326911926, "learning_rate": 0.002828, "loss": 1.7008, "step": 85248 }, { "epoch": 6.402401500938086, "grad_norm": 0.4605671763420105, "learning_rate": 0.002828, "loss": 1.7023, "step": 85312 }, { "epoch": 6.407204502814259, "grad_norm": 0.42417392134666443, "learning_rate": 0.002828, "loss": 1.7101, "step": 85376 }, { "epoch": 6.412007504690432, "grad_norm": 0.48525306582450867, "learning_rate": 0.002828, "loss": 1.7131, "step": 85440 }, { "epoch": 6.416810506566604, "grad_norm": 0.4923460781574249, "learning_rate": 0.002828, "loss": 1.7043, "step": 85504 }, { "epoch": 6.421613508442777, "grad_norm": 0.47997698187828064, "learning_rate": 0.002828, "loss": 1.6941, "step": 85568 }, { "epoch": 6.4264165103189494, "grad_norm": 0.47466325759887695, "learning_rate": 0.002828, "loss": 1.7028, "step": 85632 }, { "epoch": 6.431219512195122, "grad_norm": 0.4309011399745941, "learning_rate": 0.002828, "loss": 1.703, "step": 85696 }, { "epoch": 6.436022514071294, "grad_norm": 0.44139745831489563, "learning_rate": 0.002828, "loss": 1.7037, "step": 85760 }, { "epoch": 6.440825515947467, "grad_norm": 0.47135940194129944, "learning_rate": 0.002828, "loss": 1.7003, "step": 85824 }, { "epoch": 6.44562851782364, "grad_norm": 0.40643376111984253, "learning_rate": 0.002828, "loss": 1.6999, "step": 85888 }, { "epoch": 6.450431519699812, "grad_norm": 0.4392285943031311, "learning_rate": 0.002828, "loss": 1.7047, "step": 85952 }, { "epoch": 6.455234521575985, "grad_norm": 0.4400002062320709, "learning_rate": 0.002828, "loss": 1.7026, "step": 86016 }, { "epoch": 6.460037523452158, "grad_norm": 0.45881539583206177, "learning_rate": 0.002828, "loss": 1.7121, "step": 86080 }, { "epoch": 6.46484052532833, "grad_norm": 0.47242626547813416, "learning_rate": 0.002828, "loss": 1.7069, "step": 86144 }, { "epoch": 6.469643527204503, "grad_norm": 0.4134158790111542, "learning_rate": 0.002828, "loss": 1.6968, "step": 86208 }, { "epoch": 6.474446529080676, "grad_norm": 0.45994988083839417, "learning_rate": 0.002828, "loss": 1.7053, "step": 86272 }, { "epoch": 6.479249530956848, "grad_norm": 0.43823009729385376, "learning_rate": 0.002828, "loss": 1.7046, "step": 86336 }, { "epoch": 6.4840525328330205, "grad_norm": 0.40943190455436707, "learning_rate": 0.002828, "loss": 1.6995, "step": 86400 }, { "epoch": 6.488855534709193, "grad_norm": 0.4266324043273926, "learning_rate": 0.002828, "loss": 1.6967, "step": 86464 }, { "epoch": 6.493658536585366, "grad_norm": 0.4427225887775421, "learning_rate": 0.002828, "loss": 1.7075, "step": 86528 }, { "epoch": 6.498461538461538, "grad_norm": 0.44521722197532654, "learning_rate": 0.002828, "loss": 1.7057, "step": 86592 }, { "epoch": 6.503264540337711, "grad_norm": 0.45472028851509094, "learning_rate": 0.002828, "loss": 1.7096, "step": 86656 }, { "epoch": 6.508067542213884, "grad_norm": 0.5230434536933899, "learning_rate": 0.002828, "loss": 1.7025, "step": 86720 }, { "epoch": 6.512870544090056, "grad_norm": 0.5306261777877808, "learning_rate": 0.002828, "loss": 1.698, "step": 86784 }, { "epoch": 6.517673545966229, "grad_norm": 0.427950918674469, "learning_rate": 0.002828, "loss": 1.7068, "step": 86848 }, { "epoch": 6.522476547842402, "grad_norm": 0.5199810862541199, "learning_rate": 0.002828, "loss": 1.6962, "step": 86912 }, { "epoch": 6.527279549718574, "grad_norm": 0.4530502259731293, "learning_rate": 0.002828, "loss": 1.698, "step": 86976 }, { "epoch": 6.532082551594747, "grad_norm": 0.5144789218902588, "learning_rate": 0.002828, "loss": 1.7048, "step": 87040 }, { "epoch": 6.53688555347092, "grad_norm": 0.47851622104644775, "learning_rate": 0.002828, "loss": 1.7021, "step": 87104 }, { "epoch": 6.541688555347092, "grad_norm": 0.47514185309410095, "learning_rate": 0.002828, "loss": 1.703, "step": 87168 }, { "epoch": 6.5464915572232645, "grad_norm": 0.5364302396774292, "learning_rate": 0.002828, "loss": 1.703, "step": 87232 }, { "epoch": 6.551294559099437, "grad_norm": 0.4827858507633209, "learning_rate": 0.002828, "loss": 1.6982, "step": 87296 }, { "epoch": 6.55609756097561, "grad_norm": 0.4004391133785248, "learning_rate": 0.002828, "loss": 1.6977, "step": 87360 }, { "epoch": 6.560900562851782, "grad_norm": 0.4460011422634125, "learning_rate": 0.002828, "loss": 1.6986, "step": 87424 }, { "epoch": 6.565703564727955, "grad_norm": 0.4100639820098877, "learning_rate": 0.002828, "loss": 1.6983, "step": 87488 }, { "epoch": 6.570506566604127, "grad_norm": 0.4702662527561188, "learning_rate": 0.002828, "loss": 1.7032, "step": 87552 }, { "epoch": 6.5753095684803, "grad_norm": 0.4798121750354767, "learning_rate": 0.002828, "loss": 1.7067, "step": 87616 }, { "epoch": 6.580112570356473, "grad_norm": 0.4707585871219635, "learning_rate": 0.002828, "loss": 1.698, "step": 87680 }, { "epoch": 6.584915572232646, "grad_norm": 0.4406650960445404, "learning_rate": 0.002828, "loss": 1.7022, "step": 87744 }, { "epoch": 6.589718574108818, "grad_norm": 0.48812413215637207, "learning_rate": 0.002828, "loss": 1.7042, "step": 87808 }, { "epoch": 6.594521575984991, "grad_norm": 0.41487032175064087, "learning_rate": 0.002828, "loss": 1.7009, "step": 87872 }, { "epoch": 6.5993245778611636, "grad_norm": 0.4759446382522583, "learning_rate": 0.002828, "loss": 1.7, "step": 87936 }, { "epoch": 6.6041275797373356, "grad_norm": 0.47563308477401733, "learning_rate": 0.002828, "loss": 1.7023, "step": 88000 }, { "epoch": 6.608930581613508, "grad_norm": 0.43401914834976196, "learning_rate": 0.002828, "loss": 1.6924, "step": 88064 }, { "epoch": 6.613733583489681, "grad_norm": 0.621225118637085, "learning_rate": 0.002828, "loss": 1.7028, "step": 88128 }, { "epoch": 6.618536585365853, "grad_norm": 0.4878978729248047, "learning_rate": 0.002828, "loss": 1.7029, "step": 88192 }, { "epoch": 6.623339587242026, "grad_norm": 0.4743395745754242, "learning_rate": 0.002828, "loss": 1.6918, "step": 88256 }, { "epoch": 6.628142589118199, "grad_norm": 0.4858582317829132, "learning_rate": 0.002828, "loss": 1.7026, "step": 88320 }, { "epoch": 6.632945590994371, "grad_norm": 0.4205172657966614, "learning_rate": 0.002828, "loss": 1.6956, "step": 88384 }, { "epoch": 6.637748592870544, "grad_norm": 0.4614770710468292, "learning_rate": 0.002828, "loss": 1.6934, "step": 88448 }, { "epoch": 6.642551594746717, "grad_norm": 0.42253434658050537, "learning_rate": 0.002828, "loss": 1.6968, "step": 88512 }, { "epoch": 6.64735459662289, "grad_norm": 0.4911728799343109, "learning_rate": 0.002828, "loss": 1.6969, "step": 88576 }, { "epoch": 6.652157598499062, "grad_norm": 0.48026782274246216, "learning_rate": 0.002828, "loss": 1.6965, "step": 88640 }, { "epoch": 6.656960600375235, "grad_norm": 0.449324369430542, "learning_rate": 0.002828, "loss": 1.6888, "step": 88704 }, { "epoch": 6.6617636022514075, "grad_norm": 0.4832703769207001, "learning_rate": 0.002828, "loss": 1.6987, "step": 88768 }, { "epoch": 6.6665666041275795, "grad_norm": 0.5017644166946411, "learning_rate": 0.002828, "loss": 1.7053, "step": 88832 }, { "epoch": 6.671369606003752, "grad_norm": 0.5028623342514038, "learning_rate": 0.002828, "loss": 1.6902, "step": 88896 }, { "epoch": 6.676172607879925, "grad_norm": 0.4900693893432617, "learning_rate": 0.002828, "loss": 1.6945, "step": 88960 }, { "epoch": 6.680975609756097, "grad_norm": 0.5036649703979492, "learning_rate": 0.002828, "loss": 1.6938, "step": 89024 }, { "epoch": 6.68577861163227, "grad_norm": 0.5400296449661255, "learning_rate": 0.002828, "loss": 1.6996, "step": 89088 }, { "epoch": 6.690581613508443, "grad_norm": 0.4391849935054779, "learning_rate": 0.002828, "loss": 1.6955, "step": 89152 }, { "epoch": 6.695384615384615, "grad_norm": 0.46695676445961, "learning_rate": 0.002828, "loss": 1.6952, "step": 89216 }, { "epoch": 6.700187617260788, "grad_norm": 0.5176795125007629, "learning_rate": 0.002828, "loss": 1.6982, "step": 89280 }, { "epoch": 6.704990619136961, "grad_norm": 0.4465230703353882, "learning_rate": 0.002828, "loss": 1.6922, "step": 89344 }, { "epoch": 6.709793621013134, "grad_norm": 0.7129762172698975, "learning_rate": 0.002828, "loss": 1.699, "step": 89408 }, { "epoch": 6.714596622889306, "grad_norm": 0.5269412994384766, "learning_rate": 0.002828, "loss": 1.6941, "step": 89472 }, { "epoch": 6.719399624765479, "grad_norm": 0.5359578728675842, "learning_rate": 0.002828, "loss": 1.6965, "step": 89536 }, { "epoch": 6.724202626641651, "grad_norm": 0.4400736093521118, "learning_rate": 0.002828, "loss": 1.6972, "step": 89600 }, { "epoch": 6.7290056285178235, "grad_norm": 0.4457342326641083, "learning_rate": 0.002828, "loss": 1.7058, "step": 89664 }, { "epoch": 6.733808630393996, "grad_norm": 0.5111649632453918, "learning_rate": 0.002828, "loss": 1.697, "step": 89728 }, { "epoch": 6.738611632270169, "grad_norm": 0.4815118908882141, "learning_rate": 0.002828, "loss": 1.6908, "step": 89792 }, { "epoch": 6.743414634146341, "grad_norm": 0.4404054284095764, "learning_rate": 0.002828, "loss": 1.6954, "step": 89856 }, { "epoch": 6.748217636022514, "grad_norm": 0.3914402425289154, "learning_rate": 0.002828, "loss": 1.6932, "step": 89920 }, { "epoch": 6.753020637898687, "grad_norm": 0.43506091833114624, "learning_rate": 0.002828, "loss": 1.6954, "step": 89984 }, { "epoch": 6.757823639774859, "grad_norm": 0.46764346957206726, "learning_rate": 0.002828, "loss": 1.6915, "step": 90048 }, { "epoch": 6.762626641651032, "grad_norm": 0.4379426836967468, "learning_rate": 0.002828, "loss": 1.6941, "step": 90112 }, { "epoch": 6.767429643527205, "grad_norm": 0.45644304156303406, "learning_rate": 0.002828, "loss": 1.6888, "step": 90176 }, { "epoch": 6.772232645403377, "grad_norm": 0.45382314920425415, "learning_rate": 0.002828, "loss": 1.6948, "step": 90240 }, { "epoch": 6.77703564727955, "grad_norm": 0.43029579520225525, "learning_rate": 0.002828, "loss": 1.6922, "step": 90304 }, { "epoch": 6.7818386491557225, "grad_norm": 0.47476446628570557, "learning_rate": 0.002828, "loss": 1.6933, "step": 90368 }, { "epoch": 6.7866416510318945, "grad_norm": 0.5024870038032532, "learning_rate": 0.002828, "loss": 1.6939, "step": 90432 }, { "epoch": 6.791444652908067, "grad_norm": 0.483139306306839, "learning_rate": 0.002828, "loss": 1.6956, "step": 90496 }, { "epoch": 6.79624765478424, "grad_norm": 0.4529098868370056, "learning_rate": 0.002828, "loss": 1.6897, "step": 90560 }, { "epoch": 6.801050656660413, "grad_norm": 0.4902280271053314, "learning_rate": 0.002828, "loss": 1.694, "step": 90624 }, { "epoch": 6.805853658536585, "grad_norm": 0.46275779604911804, "learning_rate": 0.002828, "loss": 1.6938, "step": 90688 }, { "epoch": 6.810656660412758, "grad_norm": 0.44121673703193665, "learning_rate": 0.002828, "loss": 1.6957, "step": 90752 }, { "epoch": 6.815459662288931, "grad_norm": 0.3859396278858185, "learning_rate": 0.002828, "loss": 1.6918, "step": 90816 }, { "epoch": 6.820262664165103, "grad_norm": 0.43591949343681335, "learning_rate": 0.002828, "loss": 1.6939, "step": 90880 }, { "epoch": 6.825065666041276, "grad_norm": 0.43440181016921997, "learning_rate": 0.002828, "loss": 1.6919, "step": 90944 }, { "epoch": 6.829868667917449, "grad_norm": 0.43394795060157776, "learning_rate": 0.002828, "loss": 1.6877, "step": 91008 }, { "epoch": 6.834671669793621, "grad_norm": 0.46919190883636475, "learning_rate": 0.002828, "loss": 1.687, "step": 91072 }, { "epoch": 6.839474671669794, "grad_norm": 0.4598900377750397, "learning_rate": 0.002828, "loss": 1.6905, "step": 91136 }, { "epoch": 6.8442776735459665, "grad_norm": 0.5242931246757507, "learning_rate": 0.002828, "loss": 1.6806, "step": 91200 }, { "epoch": 6.8490806754221385, "grad_norm": 0.43550997972488403, "learning_rate": 0.002828, "loss": 1.6966, "step": 91264 }, { "epoch": 6.853883677298311, "grad_norm": 0.5565023422241211, "learning_rate": 0.002828, "loss": 1.6903, "step": 91328 }, { "epoch": 6.858686679174484, "grad_norm": 0.4555063545703888, "learning_rate": 0.002828, "loss": 1.686, "step": 91392 }, { "epoch": 6.863489681050657, "grad_norm": 0.4962327480316162, "learning_rate": 0.002828, "loss": 1.69, "step": 91456 }, { "epoch": 6.868292682926829, "grad_norm": 0.3948613107204437, "learning_rate": 0.002828, "loss": 1.6863, "step": 91520 }, { "epoch": 6.873095684803002, "grad_norm": 0.4701470732688904, "learning_rate": 0.002828, "loss": 1.6849, "step": 91584 }, { "epoch": 6.877898686679174, "grad_norm": 0.5295090079307556, "learning_rate": 0.002828, "loss": 1.6957, "step": 91648 }, { "epoch": 6.882701688555347, "grad_norm": 0.5115393996238708, "learning_rate": 0.002828, "loss": 1.6951, "step": 91712 }, { "epoch": 6.88750469043152, "grad_norm": 0.4638088047504425, "learning_rate": 0.002828, "loss": 1.6871, "step": 91776 }, { "epoch": 6.892307692307693, "grad_norm": 0.46743154525756836, "learning_rate": 0.002828, "loss": 1.684, "step": 91840 }, { "epoch": 6.897110694183865, "grad_norm": 0.6080755591392517, "learning_rate": 0.002828, "loss": 1.6914, "step": 91904 }, { "epoch": 6.901913696060038, "grad_norm": 0.42485353350639343, "learning_rate": 0.002828, "loss": 1.6949, "step": 91968 }, { "epoch": 6.9067166979362105, "grad_norm": 0.45448240637779236, "learning_rate": 0.002828, "loss": 1.6943, "step": 92032 }, { "epoch": 6.9115196998123825, "grad_norm": 0.4901677072048187, "learning_rate": 0.002828, "loss": 1.6899, "step": 92096 }, { "epoch": 6.916322701688555, "grad_norm": 0.44569453597068787, "learning_rate": 0.002828, "loss": 1.6897, "step": 92160 }, { "epoch": 6.921125703564728, "grad_norm": 0.47087913751602173, "learning_rate": 0.002828, "loss": 1.6911, "step": 92224 }, { "epoch": 6.9259287054409, "grad_norm": 0.4655872881412506, "learning_rate": 0.002828, "loss": 1.6844, "step": 92288 }, { "epoch": 6.930731707317073, "grad_norm": 0.41938281059265137, "learning_rate": 0.002828, "loss": 1.689, "step": 92352 }, { "epoch": 6.935534709193246, "grad_norm": 0.4859105050563812, "learning_rate": 0.002828, "loss": 1.6864, "step": 92416 }, { "epoch": 6.940337711069418, "grad_norm": 0.5081743597984314, "learning_rate": 0.002828, "loss": 1.6937, "step": 92480 }, { "epoch": 6.945140712945591, "grad_norm": 0.5057895183563232, "learning_rate": 0.002828, "loss": 1.6844, "step": 92544 }, { "epoch": 6.949943714821764, "grad_norm": 0.4937455952167511, "learning_rate": 0.002828, "loss": 1.6875, "step": 92608 }, { "epoch": 6.954746716697937, "grad_norm": 0.4866175353527069, "learning_rate": 0.002828, "loss": 1.6933, "step": 92672 }, { "epoch": 6.959549718574109, "grad_norm": 0.5158609747886658, "learning_rate": 0.002828, "loss": 1.6874, "step": 92736 }, { "epoch": 6.9643527204502815, "grad_norm": 0.5342572331428528, "learning_rate": 0.002828, "loss": 1.6811, "step": 92800 }, { "epoch": 6.969155722326454, "grad_norm": 0.45014888048171997, "learning_rate": 0.002828, "loss": 1.6901, "step": 92864 }, { "epoch": 6.973958724202626, "grad_norm": 0.43926528096199036, "learning_rate": 0.002828, "loss": 1.6906, "step": 92928 }, { "epoch": 6.978761726078799, "grad_norm": 0.4542122781276703, "learning_rate": 0.002828, "loss": 1.6865, "step": 92992 }, { "epoch": 6.983564727954972, "grad_norm": 0.44546231627464294, "learning_rate": 0.002828, "loss": 1.6787, "step": 93056 }, { "epoch": 6.988367729831144, "grad_norm": 0.45137014985084534, "learning_rate": 0.002828, "loss": 1.6883, "step": 93120 }, { "epoch": 6.993170731707317, "grad_norm": 0.49241000413894653, "learning_rate": 0.002828, "loss": 1.6953, "step": 93184 }, { "epoch": 6.99797373358349, "grad_norm": 0.4922092854976654, "learning_rate": 0.002828, "loss": 1.6848, "step": 93248 }, { "epoch": 7.002776735459662, "grad_norm": 0.5105228424072266, "learning_rate": 0.002828, "loss": 1.6573, "step": 93312 }, { "epoch": 7.007579737335835, "grad_norm": 0.42724743485450745, "learning_rate": 0.002828, "loss": 1.6412, "step": 93376 }, { "epoch": 7.012382739212008, "grad_norm": 0.5028865933418274, "learning_rate": 0.002828, "loss": 1.6333, "step": 93440 }, { "epoch": 7.01718574108818, "grad_norm": 0.44145479798316956, "learning_rate": 0.002828, "loss": 1.6397, "step": 93504 }, { "epoch": 7.021988742964353, "grad_norm": 0.40729594230651855, "learning_rate": 0.002828, "loss": 1.6481, "step": 93568 }, { "epoch": 7.0267917448405255, "grad_norm": 0.4236595332622528, "learning_rate": 0.002828, "loss": 1.6464, "step": 93632 }, { "epoch": 7.031594746716698, "grad_norm": 0.4586251378059387, "learning_rate": 0.002828, "loss": 1.6449, "step": 93696 }, { "epoch": 7.03639774859287, "grad_norm": 0.44536247849464417, "learning_rate": 0.002828, "loss": 1.6446, "step": 93760 }, { "epoch": 7.041200750469043, "grad_norm": 0.49832186102867126, "learning_rate": 0.002828, "loss": 1.6446, "step": 93824 }, { "epoch": 7.046003752345216, "grad_norm": 0.49344515800476074, "learning_rate": 0.002828, "loss": 1.6442, "step": 93888 }, { "epoch": 7.050806754221388, "grad_norm": 0.4440404176712036, "learning_rate": 0.002828, "loss": 1.6487, "step": 93952 }, { "epoch": 7.055609756097561, "grad_norm": 0.5434808135032654, "learning_rate": 0.002828, "loss": 1.6508, "step": 94016 }, { "epoch": 7.060412757973734, "grad_norm": 0.4643784463405609, "learning_rate": 0.002828, "loss": 1.645, "step": 94080 }, { "epoch": 7.065215759849906, "grad_norm": 0.4471110999584198, "learning_rate": 0.002828, "loss": 1.6423, "step": 94144 }, { "epoch": 7.070018761726079, "grad_norm": 0.5672070980072021, "learning_rate": 0.002828, "loss": 1.6407, "step": 94208 }, { "epoch": 7.074821763602252, "grad_norm": 0.4709682762622833, "learning_rate": 0.002828, "loss": 1.6361, "step": 94272 }, { "epoch": 7.079624765478424, "grad_norm": 0.4074195921421051, "learning_rate": 0.002828, "loss": 1.6451, "step": 94336 }, { "epoch": 7.084427767354597, "grad_norm": 0.43681713938713074, "learning_rate": 0.002828, "loss": 1.6538, "step": 94400 }, { "epoch": 7.0892307692307694, "grad_norm": 0.47196078300476074, "learning_rate": 0.002828, "loss": 1.6479, "step": 94464 }, { "epoch": 7.094033771106941, "grad_norm": 0.3886224031448364, "learning_rate": 0.002828, "loss": 1.6552, "step": 94528 }, { "epoch": 7.098836772983114, "grad_norm": 0.47881880402565, "learning_rate": 0.002828, "loss": 1.6468, "step": 94592 }, { "epoch": 7.103639774859287, "grad_norm": 0.47139066457748413, "learning_rate": 0.002828, "loss": 1.6474, "step": 94656 }, { "epoch": 7.10844277673546, "grad_norm": 0.3880135118961334, "learning_rate": 0.002828, "loss": 1.6494, "step": 94720 }, { "epoch": 7.113245778611632, "grad_norm": 0.44806167483329773, "learning_rate": 0.002828, "loss": 1.6406, "step": 94784 }, { "epoch": 7.118048780487805, "grad_norm": 0.5234540104866028, "learning_rate": 0.002828, "loss": 1.6466, "step": 94848 }, { "epoch": 7.122851782363978, "grad_norm": 0.4456152021884918, "learning_rate": 0.002828, "loss": 1.6489, "step": 94912 }, { "epoch": 7.12765478424015, "grad_norm": 0.43684425950050354, "learning_rate": 0.002828, "loss": 1.6517, "step": 94976 }, { "epoch": 7.132457786116323, "grad_norm": 0.4596727788448334, "learning_rate": 0.002828, "loss": 1.6462, "step": 95040 }, { "epoch": 7.137260787992496, "grad_norm": 0.44558820128440857, "learning_rate": 0.002828, "loss": 1.6452, "step": 95104 }, { "epoch": 7.142063789868668, "grad_norm": 0.4541861116886139, "learning_rate": 0.002828, "loss": 1.6423, "step": 95168 }, { "epoch": 7.1468667917448405, "grad_norm": 0.6683451533317566, "learning_rate": 0.002828, "loss": 1.6444, "step": 95232 }, { "epoch": 7.151669793621013, "grad_norm": 0.5007615089416504, "learning_rate": 0.002828, "loss": 1.6455, "step": 95296 }, { "epoch": 7.156472795497185, "grad_norm": 0.5515764355659485, "learning_rate": 0.002828, "loss": 1.6496, "step": 95360 }, { "epoch": 7.161275797373358, "grad_norm": 0.460638165473938, "learning_rate": 0.002828, "loss": 1.6431, "step": 95424 }, { "epoch": 7.166078799249531, "grad_norm": 0.4183456003665924, "learning_rate": 0.002828, "loss": 1.6367, "step": 95488 }, { "epoch": 7.170881801125703, "grad_norm": 0.5132945775985718, "learning_rate": 0.002828, "loss": 1.6471, "step": 95552 }, { "epoch": 7.175684803001876, "grad_norm": 0.5141153931617737, "learning_rate": 0.002828, "loss": 1.6483, "step": 95616 }, { "epoch": 7.180487804878049, "grad_norm": 0.5110387802124023, "learning_rate": 0.002828, "loss": 1.647, "step": 95680 }, { "epoch": 7.185290806754221, "grad_norm": 0.5410206317901611, "learning_rate": 0.002828, "loss": 1.6478, "step": 95744 }, { "epoch": 7.190093808630394, "grad_norm": 0.43306368589401245, "learning_rate": 0.002828, "loss": 1.6453, "step": 95808 }, { "epoch": 7.194896810506567, "grad_norm": 0.4982653856277466, "learning_rate": 0.002828, "loss": 1.6492, "step": 95872 }, { "epoch": 7.19969981238274, "grad_norm": 0.47771182656288147, "learning_rate": 0.002828, "loss": 1.6522, "step": 95936 }, { "epoch": 7.204502814258912, "grad_norm": 0.5106894373893738, "learning_rate": 0.002828, "loss": 1.6449, "step": 96000 }, { "epoch": 7.2093058161350845, "grad_norm": 0.5163542628288269, "learning_rate": 0.002828, "loss": 1.646, "step": 96064 }, { "epoch": 7.214108818011257, "grad_norm": 0.48441585898399353, "learning_rate": 0.002828, "loss": 1.6508, "step": 96128 }, { "epoch": 7.218911819887429, "grad_norm": 0.4799892008304596, "learning_rate": 0.002828, "loss": 1.6429, "step": 96192 }, { "epoch": 7.223714821763602, "grad_norm": 0.48237788677215576, "learning_rate": 0.002828, "loss": 1.6502, "step": 96256 }, { "epoch": 7.228517823639775, "grad_norm": 0.43436750769615173, "learning_rate": 0.002828, "loss": 1.6486, "step": 96320 }, { "epoch": 7.233320825515947, "grad_norm": 0.48235654830932617, "learning_rate": 0.002828, "loss": 1.6516, "step": 96384 }, { "epoch": 7.23812382739212, "grad_norm": 0.4596686065196991, "learning_rate": 0.002828, "loss": 1.6482, "step": 96448 }, { "epoch": 7.242926829268293, "grad_norm": 0.509095311164856, "learning_rate": 0.002828, "loss": 1.6528, "step": 96512 }, { "epoch": 7.247729831144465, "grad_norm": 0.5236621499061584, "learning_rate": 0.002828, "loss": 1.6522, "step": 96576 }, { "epoch": 7.252532833020638, "grad_norm": 0.46608901023864746, "learning_rate": 0.002828, "loss": 1.6456, "step": 96640 }, { "epoch": 7.257335834896811, "grad_norm": 0.40260711312294006, "learning_rate": 0.002828, "loss": 1.6442, "step": 96704 }, { "epoch": 7.2621388367729836, "grad_norm": 0.4557930529117584, "learning_rate": 0.002828, "loss": 1.6447, "step": 96768 }, { "epoch": 7.2669418386491555, "grad_norm": 0.4857008457183838, "learning_rate": 0.002828, "loss": 1.645, "step": 96832 }, { "epoch": 7.271744840525328, "grad_norm": 0.47859156131744385, "learning_rate": 0.002828, "loss": 1.6446, "step": 96896 }, { "epoch": 7.276547842401501, "grad_norm": 0.4860116243362427, "learning_rate": 0.002828, "loss": 1.6451, "step": 96960 }, { "epoch": 7.281350844277673, "grad_norm": 0.47068920731544495, "learning_rate": 0.002828, "loss": 1.643, "step": 97024 }, { "epoch": 7.286153846153846, "grad_norm": 0.4627678394317627, "learning_rate": 0.002828, "loss": 1.6464, "step": 97088 }, { "epoch": 7.290956848030019, "grad_norm": 0.43063241243362427, "learning_rate": 0.002828, "loss": 1.6455, "step": 97152 }, { "epoch": 7.295759849906191, "grad_norm": 0.5483308434486389, "learning_rate": 0.002828, "loss": 1.6477, "step": 97216 }, { "epoch": 7.300562851782364, "grad_norm": 0.41557881236076355, "learning_rate": 0.002828, "loss": 1.6518, "step": 97280 }, { "epoch": 7.305365853658537, "grad_norm": 0.4560103416442871, "learning_rate": 0.002828, "loss": 1.6494, "step": 97344 }, { "epoch": 7.310168855534709, "grad_norm": 0.41694557666778564, "learning_rate": 0.002828, "loss": 1.6485, "step": 97408 }, { "epoch": 7.314971857410882, "grad_norm": 0.393020898103714, "learning_rate": 0.002828, "loss": 1.6483, "step": 97472 }, { "epoch": 7.319774859287055, "grad_norm": 0.4669319987297058, "learning_rate": 0.002828, "loss": 1.6502, "step": 97536 }, { "epoch": 7.324577861163227, "grad_norm": 0.4242900609970093, "learning_rate": 0.002828, "loss": 1.644, "step": 97600 }, { "epoch": 7.3293808630393995, "grad_norm": 0.4920247197151184, "learning_rate": 0.002828, "loss": 1.6453, "step": 97664 }, { "epoch": 7.334183864915572, "grad_norm": 0.46534204483032227, "learning_rate": 0.002828, "loss": 1.6506, "step": 97728 }, { "epoch": 7.338986866791744, "grad_norm": 0.4295295476913452, "learning_rate": 0.002828, "loss": 1.6429, "step": 97792 }, { "epoch": 7.343789868667917, "grad_norm": 0.6084052324295044, "learning_rate": 0.002828, "loss": 1.6414, "step": 97856 }, { "epoch": 7.34859287054409, "grad_norm": 0.5155935883522034, "learning_rate": 0.002828, "loss": 1.649, "step": 97920 }, { "epoch": 7.353395872420263, "grad_norm": 0.5151852369308472, "learning_rate": 0.002828, "loss": 1.6484, "step": 97984 }, { "epoch": 7.358198874296435, "grad_norm": 0.4509267210960388, "learning_rate": 0.002828, "loss": 1.6504, "step": 98048 }, { "epoch": 7.363001876172608, "grad_norm": 0.4989111125469208, "learning_rate": 0.002828, "loss": 1.6384, "step": 98112 }, { "epoch": 7.367804878048781, "grad_norm": 0.5407853722572327, "learning_rate": 0.002828, "loss": 1.6462, "step": 98176 }, { "epoch": 7.372607879924953, "grad_norm": 0.48210641741752625, "learning_rate": 0.002828, "loss": 1.6422, "step": 98240 }, { "epoch": 7.377410881801126, "grad_norm": 0.4497006833553314, "learning_rate": 0.002828, "loss": 1.6476, "step": 98304 }, { "epoch": 7.382213883677299, "grad_norm": 0.5384613871574402, "learning_rate": 0.002828, "loss": 1.65, "step": 98368 }, { "epoch": 7.387016885553471, "grad_norm": 0.45183315873146057, "learning_rate": 0.002828, "loss": 1.6489, "step": 98432 }, { "epoch": 7.3918198874296435, "grad_norm": 0.5084384083747864, "learning_rate": 0.002828, "loss": 1.647, "step": 98496 }, { "epoch": 7.396622889305816, "grad_norm": 0.4499737620353699, "learning_rate": 0.002828, "loss": 1.6491, "step": 98560 }, { "epoch": 7.401425891181988, "grad_norm": 0.46009787917137146, "learning_rate": 0.002828, "loss": 1.6419, "step": 98624 }, { "epoch": 7.406228893058161, "grad_norm": 0.45367369055747986, "learning_rate": 0.002828, "loss": 1.651, "step": 98688 }, { "epoch": 7.411031894934334, "grad_norm": 0.44495901465415955, "learning_rate": 0.002828, "loss": 1.647, "step": 98752 }, { "epoch": 7.415834896810507, "grad_norm": 0.5723180770874023, "learning_rate": 0.002828, "loss": 1.6486, "step": 98816 }, { "epoch": 7.420637898686679, "grad_norm": 0.4274926483631134, "learning_rate": 0.002828, "loss": 1.6558, "step": 98880 }, { "epoch": 7.425440900562852, "grad_norm": 0.4769154489040375, "learning_rate": 0.002828, "loss": 1.6476, "step": 98944 }, { "epoch": 7.430243902439025, "grad_norm": 0.46415236592292786, "learning_rate": 0.002828, "loss": 1.6446, "step": 99008 }, { "epoch": 7.435046904315197, "grad_norm": 0.4672410488128662, "learning_rate": 0.002828, "loss": 1.6532, "step": 99072 }, { "epoch": 7.43984990619137, "grad_norm": 0.4316319227218628, "learning_rate": 0.002828, "loss": 1.6459, "step": 99136 }, { "epoch": 7.4446529080675425, "grad_norm": 0.4864663779735565, "learning_rate": 0.002828, "loss": 1.6427, "step": 99200 }, { "epoch": 7.4494559099437145, "grad_norm": 0.4411129951477051, "learning_rate": 0.002828, "loss": 1.6439, "step": 99264 }, { "epoch": 7.454258911819887, "grad_norm": 0.4902966618537903, "learning_rate": 0.002828, "loss": 1.6392, "step": 99328 }, { "epoch": 7.45906191369606, "grad_norm": 0.5182162523269653, "learning_rate": 0.002828, "loss": 1.6546, "step": 99392 }, { "epoch": 7.463864915572232, "grad_norm": 0.44750234484672546, "learning_rate": 0.002828, "loss": 1.641, "step": 99456 }, { "epoch": 7.468667917448405, "grad_norm": 0.646702229976654, "learning_rate": 0.002828, "loss": 1.65, "step": 99520 }, { "epoch": 7.473470919324578, "grad_norm": 0.4905090630054474, "learning_rate": 0.002828, "loss": 1.6444, "step": 99584 }, { "epoch": 7.47827392120075, "grad_norm": 0.5883878469467163, "learning_rate": 0.002828, "loss": 1.6424, "step": 99648 }, { "epoch": 7.483076923076923, "grad_norm": 0.5606529116630554, "learning_rate": 0.002828, "loss": 1.6474, "step": 99712 }, { "epoch": 7.487879924953096, "grad_norm": 0.47397708892822266, "learning_rate": 0.002828, "loss": 1.6432, "step": 99776 }, { "epoch": 7.492682926829268, "grad_norm": 0.47367072105407715, "learning_rate": 0.002828, "loss": 1.6422, "step": 99840 }, { "epoch": 7.497485928705441, "grad_norm": 0.4972234070301056, "learning_rate": 0.002828, "loss": 1.6483, "step": 99904 }, { "epoch": 7.502288930581614, "grad_norm": 0.5520415306091309, "learning_rate": 0.002828, "loss": 1.6409, "step": 99968 }, { "epoch": 7.5070919324577865, "grad_norm": 0.403537392616272, "learning_rate": 0.002828, "loss": 1.6456, "step": 100032 }, { "epoch": 7.5118949343339585, "grad_norm": 0.5851362347602844, "learning_rate": 0.002828, "loss": 1.6429, "step": 100096 }, { "epoch": 7.516697936210131, "grad_norm": 0.4804452359676361, "learning_rate": 0.002828, "loss": 1.6398, "step": 100160 }, { "epoch": 7.521500938086304, "grad_norm": 0.4467880427837372, "learning_rate": 0.002828, "loss": 1.6457, "step": 100224 }, { "epoch": 7.526303939962476, "grad_norm": 0.5438836216926575, "learning_rate": 0.002828, "loss": 1.6468, "step": 100288 }, { "epoch": 7.531106941838649, "grad_norm": 0.4456494450569153, "learning_rate": 0.002828, "loss": 1.6462, "step": 100352 }, { "epoch": 7.535909943714822, "grad_norm": 0.4625204801559448, "learning_rate": 0.002828, "loss": 1.6462, "step": 100416 }, { "epoch": 7.540712945590994, "grad_norm": 0.45541369915008545, "learning_rate": 0.002828, "loss": 1.6545, "step": 100480 }, { "epoch": 7.545515947467167, "grad_norm": 0.4680885374546051, "learning_rate": 0.002828, "loss": 1.6479, "step": 100544 }, { "epoch": 7.55031894934334, "grad_norm": 0.5363187789916992, "learning_rate": 0.002828, "loss": 1.6457, "step": 100608 }, { "epoch": 7.555121951219512, "grad_norm": 0.4436447024345398, "learning_rate": 0.002828, "loss": 1.6489, "step": 100672 }, { "epoch": 7.559924953095685, "grad_norm": 0.4831717014312744, "learning_rate": 0.002828, "loss": 1.6452, "step": 100736 }, { "epoch": 7.564727954971858, "grad_norm": 0.587341845035553, "learning_rate": 0.002828, "loss": 1.6511, "step": 100800 }, { "epoch": 7.5695309568480305, "grad_norm": 0.5343487858772278, "learning_rate": 0.002828, "loss": 1.6419, "step": 100864 }, { "epoch": 7.5743339587242025, "grad_norm": 0.5156888365745544, "learning_rate": 0.002828, "loss": 1.6443, "step": 100928 }, { "epoch": 7.579136960600375, "grad_norm": 0.4895589053630829, "learning_rate": 0.002828, "loss": 1.6489, "step": 100992 }, { "epoch": 7.583939962476548, "grad_norm": 0.4382045269012451, "learning_rate": 0.002828, "loss": 1.6381, "step": 101056 }, { "epoch": 7.58874296435272, "grad_norm": 0.5956616401672363, "learning_rate": 0.002828, "loss": 1.643, "step": 101120 }, { "epoch": 7.593545966228893, "grad_norm": 0.496944785118103, "learning_rate": 0.002828, "loss": 1.6496, "step": 101184 }, { "epoch": 7.598348968105066, "grad_norm": 0.5628007054328918, "learning_rate": 0.002828, "loss": 1.644, "step": 101248 }, { "epoch": 7.603151969981238, "grad_norm": 0.45873934030532837, "learning_rate": 0.002828, "loss": 1.6426, "step": 101312 }, { "epoch": 7.607954971857411, "grad_norm": 0.5054008960723877, "learning_rate": 0.002828, "loss": 1.6446, "step": 101376 }, { "epoch": 7.612757973733584, "grad_norm": 0.45213744044303894, "learning_rate": 0.002828, "loss": 1.6423, "step": 101440 }, { "epoch": 7.617560975609756, "grad_norm": 0.45042672753334045, "learning_rate": 0.002828, "loss": 1.6449, "step": 101504 }, { "epoch": 7.622363977485929, "grad_norm": 0.4785134494304657, "learning_rate": 0.002828, "loss": 1.6434, "step": 101568 }, { "epoch": 7.6271669793621015, "grad_norm": 0.47091761231422424, "learning_rate": 0.002828, "loss": 1.6417, "step": 101632 }, { "epoch": 7.631969981238274, "grad_norm": 0.5239785313606262, "learning_rate": 0.002828, "loss": 1.642, "step": 101696 }, { "epoch": 7.636772983114446, "grad_norm": 0.47115275263786316, "learning_rate": 0.002828, "loss": 1.6402, "step": 101760 }, { "epoch": 7.641575984990619, "grad_norm": 0.4137537181377411, "learning_rate": 0.002828, "loss": 1.645, "step": 101824 }, { "epoch": 7.646378986866791, "grad_norm": 0.45282185077667236, "learning_rate": 0.002828, "loss": 1.6422, "step": 101888 }, { "epoch": 7.651181988742964, "grad_norm": 0.47771942615509033, "learning_rate": 0.002828, "loss": 1.6416, "step": 101952 }, { "epoch": 7.655984990619137, "grad_norm": 0.4466339945793152, "learning_rate": 0.002828, "loss": 1.6432, "step": 102016 }, { "epoch": 7.66078799249531, "grad_norm": 0.5430694818496704, "learning_rate": 0.002828, "loss": 1.6379, "step": 102080 }, { "epoch": 7.665590994371482, "grad_norm": 0.6273244619369507, "learning_rate": 0.002828, "loss": 1.6511, "step": 102144 }, { "epoch": 7.670393996247655, "grad_norm": 0.5001136660575867, "learning_rate": 0.002828, "loss": 1.6378, "step": 102208 }, { "epoch": 7.675196998123828, "grad_norm": 0.48289889097213745, "learning_rate": 0.002828, "loss": 1.6464, "step": 102272 }, { "epoch": 7.68, "grad_norm": 0.38933464884757996, "learning_rate": 0.002828, "loss": 1.64, "step": 102336 }, { "epoch": 7.684803001876173, "grad_norm": 0.4248032867908478, "learning_rate": 0.002828, "loss": 1.6437, "step": 102400 }, { "epoch": 7.6896060037523455, "grad_norm": 0.4562476575374603, "learning_rate": 0.002828, "loss": 1.6469, "step": 102464 }, { "epoch": 7.6944090056285175, "grad_norm": 0.44395706057548523, "learning_rate": 0.002828, "loss": 1.6411, "step": 102528 }, { "epoch": 7.69921200750469, "grad_norm": 0.43343424797058105, "learning_rate": 0.002828, "loss": 1.6431, "step": 102592 }, { "epoch": 7.704015009380863, "grad_norm": 0.5182087421417236, "learning_rate": 0.002828, "loss": 1.6481, "step": 102656 }, { "epoch": 7.708818011257035, "grad_norm": 0.4748702943325043, "learning_rate": 0.002828, "loss": 1.6454, "step": 102720 }, { "epoch": 7.713621013133208, "grad_norm": 0.46305951476097107, "learning_rate": 0.002828, "loss": 1.6428, "step": 102784 }, { "epoch": 7.718424015009381, "grad_norm": 0.4430948495864868, "learning_rate": 0.002828, "loss": 1.6378, "step": 102848 }, { "epoch": 7.723227016885554, "grad_norm": 0.5415197610855103, "learning_rate": 0.002828, "loss": 1.6456, "step": 102912 }, { "epoch": 7.728030018761726, "grad_norm": 0.4879511594772339, "learning_rate": 0.002828, "loss": 1.6418, "step": 102976 }, { "epoch": 7.732833020637899, "grad_norm": 0.4881740212440491, "learning_rate": 0.002828, "loss": 1.6409, "step": 103040 }, { "epoch": 7.737636022514072, "grad_norm": 0.4493964910507202, "learning_rate": 0.002828, "loss": 1.6496, "step": 103104 }, { "epoch": 7.742439024390244, "grad_norm": 0.44141024351119995, "learning_rate": 0.002828, "loss": 1.6432, "step": 103168 }, { "epoch": 7.747242026266417, "grad_norm": 0.5163127183914185, "learning_rate": 0.002828, "loss": 1.6386, "step": 103232 }, { "epoch": 7.7520450281425894, "grad_norm": 0.46502265334129333, "learning_rate": 0.002828, "loss": 1.6484, "step": 103296 }, { "epoch": 7.756848030018761, "grad_norm": 0.4692935347557068, "learning_rate": 0.002828, "loss": 1.6417, "step": 103360 }, { "epoch": 7.761651031894934, "grad_norm": 0.5229098796844482, "learning_rate": 0.002828, "loss": 1.6402, "step": 103424 }, { "epoch": 7.766454033771107, "grad_norm": 0.4577805697917938, "learning_rate": 0.002828, "loss": 1.6413, "step": 103488 }, { "epoch": 7.771257035647279, "grad_norm": 0.4625105559825897, "learning_rate": 0.002828, "loss": 1.64, "step": 103552 }, { "epoch": 7.776060037523452, "grad_norm": 0.4869496524333954, "learning_rate": 0.002828, "loss": 1.631, "step": 103616 }, { "epoch": 7.780863039399625, "grad_norm": 0.5049871802330017, "learning_rate": 0.002828, "loss": 1.637, "step": 103680 }, { "epoch": 7.785666041275798, "grad_norm": 0.4739224910736084, "learning_rate": 0.002828, "loss": 1.6349, "step": 103744 }, { "epoch": 7.79046904315197, "grad_norm": 0.5248587727546692, "learning_rate": 0.002828, "loss": 1.6416, "step": 103808 }, { "epoch": 7.795272045028143, "grad_norm": 0.5122642517089844, "learning_rate": 0.002828, "loss": 1.6353, "step": 103872 }, { "epoch": 7.800075046904315, "grad_norm": 0.5079776048660278, "learning_rate": 0.002828, "loss": 1.6377, "step": 103936 }, { "epoch": 7.804878048780488, "grad_norm": 0.5826370120048523, "learning_rate": 0.002828, "loss": 1.6399, "step": 104000 }, { "epoch": 7.8096810506566605, "grad_norm": 0.4368886351585388, "learning_rate": 0.002828, "loss": 1.641, "step": 104064 }, { "epoch": 7.814484052532833, "grad_norm": 0.4432503581047058, "learning_rate": 0.002828, "loss": 1.6411, "step": 104128 }, { "epoch": 7.819287054409005, "grad_norm": 0.4679797291755676, "learning_rate": 0.002828, "loss": 1.639, "step": 104192 }, { "epoch": 7.824090056285178, "grad_norm": 0.44275757670402527, "learning_rate": 0.002828, "loss": 1.6344, "step": 104256 }, { "epoch": 7.828893058161351, "grad_norm": 0.5065768957138062, "learning_rate": 0.002828, "loss": 1.6364, "step": 104320 }, { "epoch": 7.833696060037523, "grad_norm": 0.5551817417144775, "learning_rate": 0.002828, "loss": 1.6428, "step": 104384 }, { "epoch": 7.838499061913696, "grad_norm": 0.5234085321426392, "learning_rate": 0.002828, "loss": 1.6422, "step": 104448 }, { "epoch": 7.843302063789869, "grad_norm": 0.4474858045578003, "learning_rate": 0.002828, "loss": 1.6349, "step": 104512 }, { "epoch": 7.848105065666041, "grad_norm": 0.41089287400245667, "learning_rate": 0.002828, "loss": 1.6374, "step": 104576 }, { "epoch": 7.852908067542214, "grad_norm": 0.5417289733886719, "learning_rate": 0.002828, "loss": 1.6416, "step": 104640 }, { "epoch": 7.857711069418387, "grad_norm": 0.492750883102417, "learning_rate": 0.002828, "loss": 1.636, "step": 104704 }, { "epoch": 7.862514071294559, "grad_norm": 0.5007115602493286, "learning_rate": 0.002828, "loss": 1.637, "step": 104768 }, { "epoch": 7.867317073170732, "grad_norm": 0.42121168971061707, "learning_rate": 0.002828, "loss": 1.64, "step": 104832 }, { "epoch": 7.8721200750469045, "grad_norm": 0.4751914143562317, "learning_rate": 0.002828, "loss": 1.643, "step": 104896 }, { "epoch": 7.876923076923077, "grad_norm": 0.4676034450531006, "learning_rate": 0.002828, "loss": 1.6444, "step": 104960 }, { "epoch": 7.881726078799249, "grad_norm": 0.42461201548576355, "learning_rate": 0.002828, "loss": 1.6382, "step": 105024 }, { "epoch": 7.886529080675422, "grad_norm": 0.41529977321624756, "learning_rate": 0.002828, "loss": 1.6321, "step": 105088 }, { "epoch": 7.891332082551595, "grad_norm": 0.5157459378242493, "learning_rate": 0.002828, "loss": 1.6278, "step": 105152 }, { "epoch": 7.896135084427767, "grad_norm": 0.386042058467865, "learning_rate": 0.002828, "loss": 1.6296, "step": 105216 }, { "epoch": 7.90093808630394, "grad_norm": 0.5516283512115479, "learning_rate": 0.002828, "loss": 1.6367, "step": 105280 }, { "epoch": 7.905741088180113, "grad_norm": 0.4407348930835724, "learning_rate": 0.002828, "loss": 1.632, "step": 105344 }, { "epoch": 7.910544090056285, "grad_norm": 0.511715829372406, "learning_rate": 0.002828, "loss": 1.6401, "step": 105408 }, { "epoch": 7.915347091932458, "grad_norm": 0.5613473057746887, "learning_rate": 0.002828, "loss": 1.6327, "step": 105472 }, { "epoch": 7.920150093808631, "grad_norm": 0.47922655940055847, "learning_rate": 0.002828, "loss": 1.6299, "step": 105536 }, { "epoch": 7.924953095684803, "grad_norm": 0.5997971296310425, "learning_rate": 0.002828, "loss": 1.6349, "step": 105600 }, { "epoch": 7.9297560975609755, "grad_norm": 0.4698721766471863, "learning_rate": 0.002828, "loss": 1.6397, "step": 105664 }, { "epoch": 7.934559099437148, "grad_norm": 0.42611223459243774, "learning_rate": 0.002828, "loss": 1.6408, "step": 105728 }, { "epoch": 7.939362101313321, "grad_norm": 0.5407405495643616, "learning_rate": 0.002828, "loss": 1.6364, "step": 105792 }, { "epoch": 7.944165103189493, "grad_norm": 0.4046322703361511, "learning_rate": 0.002828, "loss": 1.6384, "step": 105856 }, { "epoch": 7.948968105065666, "grad_norm": 0.4765555262565613, "learning_rate": 0.002828, "loss": 1.6422, "step": 105920 }, { "epoch": 7.953771106941838, "grad_norm": 0.4602759778499603, "learning_rate": 0.002828, "loss": 1.6348, "step": 105984 }, { "epoch": 7.958574108818011, "grad_norm": 0.42289867997169495, "learning_rate": 0.002828, "loss": 1.6329, "step": 106048 }, { "epoch": 7.963377110694184, "grad_norm": 0.4758918285369873, "learning_rate": 0.002828, "loss": 1.6373, "step": 106112 }, { "epoch": 7.968180112570357, "grad_norm": 0.543738603591919, "learning_rate": 0.002828, "loss": 1.6276, "step": 106176 }, { "epoch": 7.972983114446529, "grad_norm": 0.5426182150840759, "learning_rate": 0.002828, "loss": 1.6377, "step": 106240 }, { "epoch": 7.977786116322702, "grad_norm": 0.3883894681930542, "learning_rate": 0.002828, "loss": 1.6428, "step": 106304 }, { "epoch": 7.982589118198875, "grad_norm": 0.49923983216285706, "learning_rate": 0.002828, "loss": 1.6389, "step": 106368 }, { "epoch": 7.987392120075047, "grad_norm": 0.5695561766624451, "learning_rate": 0.002828, "loss": 1.6374, "step": 106432 }, { "epoch": 7.9921951219512195, "grad_norm": 0.5229542255401611, "learning_rate": 0.002828, "loss": 1.6402, "step": 106496 }, { "epoch": 7.996998123827392, "grad_norm": 0.5376026630401611, "learning_rate": 0.002828, "loss": 1.6382, "step": 106560 }, { "epoch": 8.001801125703565, "grad_norm": 0.4897994101047516, "learning_rate": 0.002828, "loss": 1.6212, "step": 106624 }, { "epoch": 8.006604127579738, "grad_norm": 0.5158925652503967, "learning_rate": 0.002828, "loss": 1.5909, "step": 106688 }, { "epoch": 8.01140712945591, "grad_norm": 0.4795593023300171, "learning_rate": 0.002828, "loss": 1.5891, "step": 106752 }, { "epoch": 8.016210131332082, "grad_norm": 0.4325263500213623, "learning_rate": 0.002828, "loss": 1.591, "step": 106816 }, { "epoch": 8.021013133208255, "grad_norm": 0.41282597184181213, "learning_rate": 0.002828, "loss": 1.59, "step": 106880 }, { "epoch": 8.025816135084428, "grad_norm": 0.5297383666038513, "learning_rate": 0.002828, "loss": 1.5954, "step": 106944 }, { "epoch": 8.0306191369606, "grad_norm": 0.5734117031097412, "learning_rate": 0.002828, "loss": 1.5855, "step": 107008 }, { "epoch": 8.035422138836774, "grad_norm": 0.49331584572792053, "learning_rate": 0.002828, "loss": 1.591, "step": 107072 }, { "epoch": 8.040225140712945, "grad_norm": 0.4974007308483124, "learning_rate": 0.002828, "loss": 1.5948, "step": 107136 }, { "epoch": 8.045028142589118, "grad_norm": 0.5249443650245667, "learning_rate": 0.002828, "loss": 1.5982, "step": 107200 }, { "epoch": 8.04983114446529, "grad_norm": 0.42286255955696106, "learning_rate": 0.002828, "loss": 1.5985, "step": 107264 }, { "epoch": 8.054634146341463, "grad_norm": 0.4468154311180115, "learning_rate": 0.002828, "loss": 1.5902, "step": 107328 }, { "epoch": 8.059437148217636, "grad_norm": 0.483468621969223, "learning_rate": 0.002828, "loss": 1.5882, "step": 107392 }, { "epoch": 8.06424015009381, "grad_norm": 0.43714627623558044, "learning_rate": 0.002828, "loss": 1.59, "step": 107456 }, { "epoch": 8.069043151969982, "grad_norm": 0.46889758110046387, "learning_rate": 0.002828, "loss": 1.5907, "step": 107520 }, { "epoch": 8.073846153846153, "grad_norm": 0.43405240774154663, "learning_rate": 0.002828, "loss": 1.593, "step": 107584 }, { "epoch": 8.078649155722326, "grad_norm": 0.45629021525382996, "learning_rate": 0.002828, "loss": 1.5964, "step": 107648 }, { "epoch": 8.083452157598499, "grad_norm": 0.46646854281425476, "learning_rate": 0.002828, "loss": 1.5921, "step": 107712 }, { "epoch": 8.088255159474672, "grad_norm": 0.4769529104232788, "learning_rate": 0.002828, "loss": 1.5984, "step": 107776 }, { "epoch": 8.093058161350845, "grad_norm": 0.4490543305873871, "learning_rate": 0.002828, "loss": 1.6025, "step": 107840 }, { "epoch": 8.097861163227018, "grad_norm": 0.5665892958641052, "learning_rate": 0.002828, "loss": 1.5988, "step": 107904 }, { "epoch": 8.102664165103189, "grad_norm": 0.4945269227027893, "learning_rate": 0.002828, "loss": 1.5919, "step": 107968 }, { "epoch": 8.107467166979362, "grad_norm": 0.579629123210907, "learning_rate": 0.002828, "loss": 1.594, "step": 108032 }, { "epoch": 8.112270168855535, "grad_norm": 0.5322190523147583, "learning_rate": 0.002828, "loss": 1.5918, "step": 108096 }, { "epoch": 8.117073170731707, "grad_norm": 0.4702974259853363, "learning_rate": 0.002828, "loss": 1.5897, "step": 108160 }, { "epoch": 8.12187617260788, "grad_norm": 0.5027482509613037, "learning_rate": 0.002828, "loss": 1.5974, "step": 108224 }, { "epoch": 8.126679174484053, "grad_norm": 0.43558311462402344, "learning_rate": 0.002828, "loss": 1.5913, "step": 108288 }, { "epoch": 8.131482176360224, "grad_norm": 0.5288829803466797, "learning_rate": 0.002828, "loss": 1.5986, "step": 108352 }, { "epoch": 8.136285178236397, "grad_norm": 0.4630937874317169, "learning_rate": 0.002828, "loss": 1.599, "step": 108416 }, { "epoch": 8.14108818011257, "grad_norm": 0.44643405079841614, "learning_rate": 0.002828, "loss": 1.6024, "step": 108480 }, { "epoch": 8.145891181988743, "grad_norm": 0.4901682436466217, "learning_rate": 0.002828, "loss": 1.599, "step": 108544 }, { "epoch": 8.150694183864916, "grad_norm": 0.4672674536705017, "learning_rate": 0.002828, "loss": 1.5951, "step": 108608 }, { "epoch": 8.155497185741089, "grad_norm": 0.5036239624023438, "learning_rate": 0.002828, "loss": 1.5997, "step": 108672 }, { "epoch": 8.160300187617262, "grad_norm": 0.46045413613319397, "learning_rate": 0.002828, "loss": 1.5918, "step": 108736 }, { "epoch": 8.165103189493433, "grad_norm": 0.45920488238334656, "learning_rate": 0.002828, "loss": 1.597, "step": 108800 }, { "epoch": 8.169906191369606, "grad_norm": 0.5360845327377319, "learning_rate": 0.002828, "loss": 1.588, "step": 108864 }, { "epoch": 8.174709193245778, "grad_norm": 0.5119756460189819, "learning_rate": 0.002828, "loss": 1.5874, "step": 108928 }, { "epoch": 8.179512195121951, "grad_norm": 0.4600110948085785, "learning_rate": 0.002828, "loss": 1.6008, "step": 108992 }, { "epoch": 8.184315196998124, "grad_norm": 0.47242122888565063, "learning_rate": 0.002828, "loss": 1.6017, "step": 109056 }, { "epoch": 8.189118198874297, "grad_norm": 0.5298404097557068, "learning_rate": 0.002828, "loss": 1.595, "step": 109120 }, { "epoch": 8.193921200750468, "grad_norm": 0.5570024251937866, "learning_rate": 0.002828, "loss": 1.5949, "step": 109184 }, { "epoch": 8.198724202626641, "grad_norm": 0.46537068486213684, "learning_rate": 0.002828, "loss": 1.5901, "step": 109248 }, { "epoch": 8.203527204502814, "grad_norm": 0.5504530668258667, "learning_rate": 0.002828, "loss": 1.5961, "step": 109312 }, { "epoch": 8.208330206378987, "grad_norm": 0.5117718577384949, "learning_rate": 0.002828, "loss": 1.5986, "step": 109376 }, { "epoch": 8.21313320825516, "grad_norm": 0.45419740676879883, "learning_rate": 0.002828, "loss": 1.5995, "step": 109440 }, { "epoch": 8.217936210131333, "grad_norm": 0.5005888938903809, "learning_rate": 0.002828, "loss": 1.6025, "step": 109504 }, { "epoch": 8.222739212007504, "grad_norm": 0.5024480223655701, "learning_rate": 0.002828, "loss": 1.5961, "step": 109568 }, { "epoch": 8.227542213883677, "grad_norm": 0.5216082334518433, "learning_rate": 0.002828, "loss": 1.6023, "step": 109632 }, { "epoch": 8.23234521575985, "grad_norm": 0.4577321708202362, "learning_rate": 0.002828, "loss": 1.5976, "step": 109696 }, { "epoch": 8.237148217636022, "grad_norm": 0.5149946808815002, "learning_rate": 0.002828, "loss": 1.6038, "step": 109760 }, { "epoch": 8.241951219512195, "grad_norm": 0.43793120980262756, "learning_rate": 0.002828, "loss": 1.5975, "step": 109824 }, { "epoch": 8.246754221388368, "grad_norm": 0.45768973231315613, "learning_rate": 0.002828, "loss": 1.6025, "step": 109888 }, { "epoch": 8.251557223264541, "grad_norm": 0.5439128875732422, "learning_rate": 0.002828, "loss": 1.592, "step": 109952 }, { "epoch": 8.256360225140712, "grad_norm": 0.47533735632896423, "learning_rate": 0.002828, "loss": 1.6008, "step": 110016 }, { "epoch": 8.261163227016885, "grad_norm": 0.4820833206176758, "learning_rate": 0.002828, "loss": 1.5972, "step": 110080 }, { "epoch": 8.265966228893058, "grad_norm": 0.5155043005943298, "learning_rate": 0.002828, "loss": 1.5898, "step": 110144 }, { "epoch": 8.27076923076923, "grad_norm": 0.5212211012840271, "learning_rate": 0.002828, "loss": 1.5986, "step": 110208 }, { "epoch": 8.275572232645404, "grad_norm": 0.42652082443237305, "learning_rate": 0.002828, "loss": 1.6001, "step": 110272 }, { "epoch": 8.280375234521577, "grad_norm": 0.4728452265262604, "learning_rate": 0.002828, "loss": 1.5949, "step": 110336 }, { "epoch": 8.285178236397748, "grad_norm": 0.496060311794281, "learning_rate": 0.002828, "loss": 1.6013, "step": 110400 }, { "epoch": 8.28998123827392, "grad_norm": 0.4925978481769562, "learning_rate": 0.002828, "loss": 1.5957, "step": 110464 }, { "epoch": 8.294784240150094, "grad_norm": 0.4829269051551819, "learning_rate": 0.002828, "loss": 1.5999, "step": 110528 }, { "epoch": 8.299587242026266, "grad_norm": 0.509156346321106, "learning_rate": 0.002828, "loss": 1.5939, "step": 110592 }, { "epoch": 8.30439024390244, "grad_norm": 0.4902212619781494, "learning_rate": 0.002828, "loss": 1.5992, "step": 110656 }, { "epoch": 8.309193245778612, "grad_norm": 0.50597083568573, "learning_rate": 0.002828, "loss": 1.6017, "step": 110720 }, { "epoch": 8.313996247654785, "grad_norm": 0.519462525844574, "learning_rate": 0.002828, "loss": 1.5987, "step": 110784 }, { "epoch": 8.318799249530956, "grad_norm": 0.5620601773262024, "learning_rate": 0.002828, "loss": 1.5972, "step": 110848 }, { "epoch": 8.323602251407129, "grad_norm": 0.5269021391868591, "learning_rate": 0.002828, "loss": 1.6045, "step": 110912 }, { "epoch": 8.328405253283302, "grad_norm": 0.4557552635669708, "learning_rate": 0.002828, "loss": 1.5982, "step": 110976 }, { "epoch": 8.333208255159475, "grad_norm": 0.4556206464767456, "learning_rate": 0.002828, "loss": 1.5969, "step": 111040 }, { "epoch": 8.338011257035648, "grad_norm": 0.434209942817688, "learning_rate": 0.002828, "loss": 1.5939, "step": 111104 }, { "epoch": 8.34281425891182, "grad_norm": 0.44927170872688293, "learning_rate": 0.002828, "loss": 1.6014, "step": 111168 }, { "epoch": 8.347617260787992, "grad_norm": 0.5330567359924316, "learning_rate": 0.002828, "loss": 1.601, "step": 111232 }, { "epoch": 8.352420262664165, "grad_norm": 0.4359879195690155, "learning_rate": 0.002828, "loss": 1.6015, "step": 111296 }, { "epoch": 8.357223264540337, "grad_norm": 0.5834143161773682, "learning_rate": 0.002828, "loss": 1.5954, "step": 111360 }, { "epoch": 8.36202626641651, "grad_norm": 0.46430498361587524, "learning_rate": 0.002828, "loss": 1.5949, "step": 111424 }, { "epoch": 8.366829268292683, "grad_norm": 0.4292107820510864, "learning_rate": 0.002828, "loss": 1.5992, "step": 111488 }, { "epoch": 8.371632270168856, "grad_norm": 0.49093276262283325, "learning_rate": 0.002828, "loss": 1.5946, "step": 111552 }, { "epoch": 8.376435272045029, "grad_norm": 0.4749935567378998, "learning_rate": 0.002828, "loss": 1.5964, "step": 111616 }, { "epoch": 8.3812382739212, "grad_norm": 0.5645825862884521, "learning_rate": 0.002828, "loss": 1.5981, "step": 111680 }, { "epoch": 8.386041275797373, "grad_norm": 0.5568863749504089, "learning_rate": 0.002828, "loss": 1.6017, "step": 111744 }, { "epoch": 8.390844277673546, "grad_norm": 0.5183071494102478, "learning_rate": 0.002828, "loss": 1.5968, "step": 111808 }, { "epoch": 8.395647279549719, "grad_norm": 0.4693017601966858, "learning_rate": 0.002828, "loss": 1.5932, "step": 111872 }, { "epoch": 8.400450281425892, "grad_norm": 0.6434198617935181, "learning_rate": 0.002828, "loss": 1.5973, "step": 111936 }, { "epoch": 8.405253283302065, "grad_norm": 0.45251399278640747, "learning_rate": 0.002828, "loss": 1.5982, "step": 112000 }, { "epoch": 8.410056285178236, "grad_norm": 0.5282191634178162, "learning_rate": 0.002828, "loss": 1.5974, "step": 112064 }, { "epoch": 8.414859287054409, "grad_norm": 0.5637361407279968, "learning_rate": 0.002828, "loss": 1.6016, "step": 112128 }, { "epoch": 8.419662288930581, "grad_norm": 0.5421961545944214, "learning_rate": 0.002828, "loss": 1.5968, "step": 112192 }, { "epoch": 8.424465290806754, "grad_norm": 0.45800650119781494, "learning_rate": 0.002828, "loss": 1.6019, "step": 112256 }, { "epoch": 8.429268292682927, "grad_norm": 0.5147725939750671, "learning_rate": 0.002828, "loss": 1.5966, "step": 112320 }, { "epoch": 8.4340712945591, "grad_norm": 0.5175865888595581, "learning_rate": 0.002828, "loss": 1.6023, "step": 112384 }, { "epoch": 8.438874296435271, "grad_norm": 0.48384368419647217, "learning_rate": 0.002828, "loss": 1.5909, "step": 112448 }, { "epoch": 8.443677298311444, "grad_norm": 0.49091124534606934, "learning_rate": 0.002828, "loss": 1.5965, "step": 112512 }, { "epoch": 8.448480300187617, "grad_norm": 0.5016306042671204, "learning_rate": 0.002828, "loss": 1.592, "step": 112576 }, { "epoch": 8.45328330206379, "grad_norm": 0.49617043137550354, "learning_rate": 0.002828, "loss": 1.6, "step": 112640 }, { "epoch": 8.458086303939963, "grad_norm": 0.5295305848121643, "learning_rate": 0.002828, "loss": 1.6006, "step": 112704 }, { "epoch": 8.462889305816136, "grad_norm": 0.48764100670814514, "learning_rate": 0.002828, "loss": 1.5977, "step": 112768 }, { "epoch": 8.467692307692309, "grad_norm": 0.5215612053871155, "learning_rate": 0.002828, "loss": 1.5955, "step": 112832 }, { "epoch": 8.47249530956848, "grad_norm": 0.4538777768611908, "learning_rate": 0.002828, "loss": 1.5948, "step": 112896 }, { "epoch": 8.477298311444653, "grad_norm": 0.44952964782714844, "learning_rate": 0.002828, "loss": 1.5979, "step": 112960 }, { "epoch": 8.482101313320825, "grad_norm": 0.4811982214450836, "learning_rate": 0.002828, "loss": 1.594, "step": 113024 }, { "epoch": 8.486904315196998, "grad_norm": 0.4790002703666687, "learning_rate": 0.002828, "loss": 1.6019, "step": 113088 }, { "epoch": 8.491707317073171, "grad_norm": 0.475006639957428, "learning_rate": 0.002828, "loss": 1.597, "step": 113152 }, { "epoch": 8.496510318949344, "grad_norm": 0.5283963680267334, "learning_rate": 0.002828, "loss": 1.6016, "step": 113216 }, { "epoch": 8.501313320825515, "grad_norm": 0.4370746612548828, "learning_rate": 0.002828, "loss": 1.5985, "step": 113280 }, { "epoch": 8.506116322701688, "grad_norm": 0.4338639974594116, "learning_rate": 0.002828, "loss": 1.5985, "step": 113344 }, { "epoch": 8.510919324577861, "grad_norm": 0.47683221101760864, "learning_rate": 0.002828, "loss": 1.6004, "step": 113408 }, { "epoch": 8.515722326454034, "grad_norm": 0.5058532357215881, "learning_rate": 0.002828, "loss": 1.5979, "step": 113472 }, { "epoch": 8.520525328330207, "grad_norm": 0.5377301573753357, "learning_rate": 0.002828, "loss": 1.6062, "step": 113536 }, { "epoch": 8.52532833020638, "grad_norm": 0.4185563921928406, "learning_rate": 0.002828, "loss": 1.5993, "step": 113600 }, { "epoch": 8.53013133208255, "grad_norm": 0.48849865794181824, "learning_rate": 0.002828, "loss": 1.5992, "step": 113664 }, { "epoch": 8.534934333958724, "grad_norm": 0.5468032956123352, "learning_rate": 0.002828, "loss": 1.5979, "step": 113728 }, { "epoch": 8.539737335834896, "grad_norm": 0.45104995369911194, "learning_rate": 0.002828, "loss": 1.5952, "step": 113792 }, { "epoch": 8.54454033771107, "grad_norm": 0.4612830877304077, "learning_rate": 0.002828, "loss": 1.602, "step": 113856 }, { "epoch": 8.549343339587242, "grad_norm": 0.5112800598144531, "learning_rate": 0.002828, "loss": 1.5954, "step": 113920 }, { "epoch": 8.554146341463415, "grad_norm": 0.4625680446624756, "learning_rate": 0.002828, "loss": 1.5951, "step": 113984 }, { "epoch": 8.558949343339588, "grad_norm": 0.4750332236289978, "learning_rate": 0.002828, "loss": 1.5939, "step": 114048 }, { "epoch": 8.56375234521576, "grad_norm": 0.5651158690452576, "learning_rate": 0.002828, "loss": 1.5946, "step": 114112 }, { "epoch": 8.568555347091932, "grad_norm": 0.5241173505783081, "learning_rate": 0.002828, "loss": 1.5867, "step": 114176 }, { "epoch": 8.573358348968105, "grad_norm": 0.42227280139923096, "learning_rate": 0.002828, "loss": 1.5895, "step": 114240 }, { "epoch": 8.578161350844278, "grad_norm": 0.511060893535614, "learning_rate": 0.002828, "loss": 1.5994, "step": 114304 }, { "epoch": 8.58296435272045, "grad_norm": 0.49274709820747375, "learning_rate": 0.002828, "loss": 1.5916, "step": 114368 }, { "epoch": 8.587767354596624, "grad_norm": 0.5100725293159485, "learning_rate": 0.002828, "loss": 1.5998, "step": 114432 }, { "epoch": 8.592570356472795, "grad_norm": 0.5600299835205078, "learning_rate": 0.002828, "loss": 1.5997, "step": 114496 }, { "epoch": 8.597373358348968, "grad_norm": 0.4198225736618042, "learning_rate": 0.002828, "loss": 1.5999, "step": 114560 }, { "epoch": 8.60217636022514, "grad_norm": 0.520063579082489, "learning_rate": 0.002828, "loss": 1.595, "step": 114624 }, { "epoch": 8.606979362101313, "grad_norm": 0.41708284616470337, "learning_rate": 0.002828, "loss": 1.5995, "step": 114688 }, { "epoch": 8.611782363977486, "grad_norm": 0.4914540648460388, "learning_rate": 0.002828, "loss": 1.5882, "step": 114752 }, { "epoch": 8.616585365853659, "grad_norm": 0.451637327671051, "learning_rate": 0.002828, "loss": 1.5981, "step": 114816 }, { "epoch": 8.621388367729832, "grad_norm": 0.42104360461235046, "learning_rate": 0.002828, "loss": 1.5989, "step": 114880 }, { "epoch": 8.626191369606003, "grad_norm": 0.46855536103248596, "learning_rate": 0.002828, "loss": 1.5929, "step": 114944 }, { "epoch": 8.630994371482176, "grad_norm": 0.5319945216178894, "learning_rate": 0.002828, "loss": 1.5986, "step": 115008 }, { "epoch": 8.635797373358349, "grad_norm": 0.5827271342277527, "learning_rate": 0.002828, "loss": 1.5942, "step": 115072 }, { "epoch": 8.640600375234522, "grad_norm": 0.4484277069568634, "learning_rate": 0.002828, "loss": 1.5949, "step": 115136 }, { "epoch": 8.645403377110695, "grad_norm": 0.46110737323760986, "learning_rate": 0.002828, "loss": 1.6058, "step": 115200 }, { "epoch": 8.650206378986868, "grad_norm": 0.4264478385448456, "learning_rate": 0.002828, "loss": 1.5897, "step": 115264 }, { "epoch": 8.655009380863039, "grad_norm": 0.46964988112449646, "learning_rate": 0.002828, "loss": 1.6006, "step": 115328 }, { "epoch": 8.659812382739212, "grad_norm": 0.5994449853897095, "learning_rate": 0.002828, "loss": 1.5972, "step": 115392 }, { "epoch": 8.664615384615384, "grad_norm": 0.5398836135864258, "learning_rate": 0.002828, "loss": 1.5993, "step": 115456 }, { "epoch": 8.669418386491557, "grad_norm": 0.4390712082386017, "learning_rate": 0.002828, "loss": 1.5967, "step": 115520 }, { "epoch": 8.67422138836773, "grad_norm": 0.656513512134552, "learning_rate": 0.002828, "loss": 1.5974, "step": 115584 }, { "epoch": 8.679024390243903, "grad_norm": 0.4846341013908386, "learning_rate": 0.002828, "loss": 1.5978, "step": 115648 }, { "epoch": 8.683827392120076, "grad_norm": 0.5119456052780151, "learning_rate": 0.002828, "loss": 1.5852, "step": 115712 }, { "epoch": 8.688630393996247, "grad_norm": 0.40535804629325867, "learning_rate": 0.002828, "loss": 1.5917, "step": 115776 }, { "epoch": 8.69343339587242, "grad_norm": 0.49570637941360474, "learning_rate": 0.002828, "loss": 1.592, "step": 115840 }, { "epoch": 8.698236397748593, "grad_norm": 0.5806848406791687, "learning_rate": 0.002828, "loss": 1.5978, "step": 115904 }, { "epoch": 8.703039399624766, "grad_norm": 0.375515341758728, "learning_rate": 0.002828, "loss": 1.5944, "step": 115968 }, { "epoch": 8.707842401500939, "grad_norm": 0.4328213334083557, "learning_rate": 0.002828, "loss": 1.5898, "step": 116032 }, { "epoch": 8.712645403377111, "grad_norm": 0.4718143343925476, "learning_rate": 0.002828, "loss": 1.5986, "step": 116096 }, { "epoch": 8.717448405253283, "grad_norm": 0.5862587094306946, "learning_rate": 0.002828, "loss": 1.5922, "step": 116160 }, { "epoch": 8.722251407129455, "grad_norm": 0.4656484127044678, "learning_rate": 0.002828, "loss": 1.5934, "step": 116224 }, { "epoch": 8.727054409005628, "grad_norm": 0.541840672492981, "learning_rate": 0.002828, "loss": 1.5985, "step": 116288 }, { "epoch": 8.731857410881801, "grad_norm": 0.5951374769210815, "learning_rate": 0.002828, "loss": 1.5975, "step": 116352 }, { "epoch": 8.736660412757974, "grad_norm": 0.47397860884666443, "learning_rate": 0.002828, "loss": 1.5932, "step": 116416 }, { "epoch": 8.741463414634147, "grad_norm": 0.4907262325286865, "learning_rate": 0.002828, "loss": 1.5945, "step": 116480 }, { "epoch": 8.74626641651032, "grad_norm": 0.48710212111473083, "learning_rate": 0.002828, "loss": 1.5972, "step": 116544 }, { "epoch": 8.751069418386491, "grad_norm": 0.5130937099456787, "learning_rate": 0.002828, "loss": 1.5977, "step": 116608 }, { "epoch": 8.755872420262664, "grad_norm": 0.45630306005477905, "learning_rate": 0.002828, "loss": 1.597, "step": 116672 }, { "epoch": 8.760675422138837, "grad_norm": 0.44322487711906433, "learning_rate": 0.002828, "loss": 1.5982, "step": 116736 }, { "epoch": 8.76547842401501, "grad_norm": 0.7029752135276794, "learning_rate": 0.002828, "loss": 1.5926, "step": 116800 }, { "epoch": 8.770281425891183, "grad_norm": 0.5284256935119629, "learning_rate": 0.002828, "loss": 1.5958, "step": 116864 }, { "epoch": 8.775084427767355, "grad_norm": 0.5001572966575623, "learning_rate": 0.002828, "loss": 1.5955, "step": 116928 }, { "epoch": 8.779887429643527, "grad_norm": 0.5075249075889587, "learning_rate": 0.002828, "loss": 1.5963, "step": 116992 }, { "epoch": 8.7846904315197, "grad_norm": 0.4319472312927246, "learning_rate": 0.002828, "loss": 1.5997, "step": 117056 }, { "epoch": 8.789493433395872, "grad_norm": 0.501660168170929, "learning_rate": 0.002828, "loss": 1.5913, "step": 117120 }, { "epoch": 8.794296435272045, "grad_norm": 0.44797807931900024, "learning_rate": 0.002828, "loss": 1.5995, "step": 117184 }, { "epoch": 8.799099437148218, "grad_norm": 0.47948718070983887, "learning_rate": 0.002828, "loss": 1.5882, "step": 117248 }, { "epoch": 8.803902439024391, "grad_norm": 0.4581029713153839, "learning_rate": 0.002828, "loss": 1.5953, "step": 117312 }, { "epoch": 8.808705440900562, "grad_norm": 0.44964277744293213, "learning_rate": 0.002828, "loss": 1.5922, "step": 117376 }, { "epoch": 8.813508442776735, "grad_norm": 0.49619776010513306, "learning_rate": 0.002828, "loss": 1.5883, "step": 117440 }, { "epoch": 8.818311444652908, "grad_norm": 0.5036442279815674, "learning_rate": 0.002828, "loss": 1.5942, "step": 117504 }, { "epoch": 8.82311444652908, "grad_norm": 0.5327112078666687, "learning_rate": 0.002828, "loss": 1.5956, "step": 117568 }, { "epoch": 8.827917448405254, "grad_norm": 0.49304166436195374, "learning_rate": 0.002828, "loss": 1.5938, "step": 117632 }, { "epoch": 8.832720450281426, "grad_norm": 0.4130309820175171, "learning_rate": 0.002828, "loss": 1.5984, "step": 117696 }, { "epoch": 8.837523452157598, "grad_norm": 0.4643678367137909, "learning_rate": 0.002828, "loss": 1.6014, "step": 117760 }, { "epoch": 8.84232645403377, "grad_norm": 0.4973071813583374, "learning_rate": 0.002828, "loss": 1.5914, "step": 117824 }, { "epoch": 8.847129455909943, "grad_norm": 0.4967636466026306, "learning_rate": 0.002828, "loss": 1.5935, "step": 117888 }, { "epoch": 8.851932457786116, "grad_norm": 0.44560450315475464, "learning_rate": 0.002828, "loss": 1.5917, "step": 117952 }, { "epoch": 8.85673545966229, "grad_norm": 0.46391820907592773, "learning_rate": 0.002828, "loss": 1.5944, "step": 118016 }, { "epoch": 8.861538461538462, "grad_norm": 0.5275543928146362, "learning_rate": 0.002828, "loss": 1.6014, "step": 118080 }, { "epoch": 8.866341463414635, "grad_norm": 0.45606210827827454, "learning_rate": 0.002828, "loss": 1.5921, "step": 118144 }, { "epoch": 8.871144465290806, "grad_norm": 0.4854564964771271, "learning_rate": 0.002828, "loss": 1.5954, "step": 118208 }, { "epoch": 8.875947467166979, "grad_norm": 0.4536757171154022, "learning_rate": 0.002828, "loss": 1.5932, "step": 118272 }, { "epoch": 8.880750469043152, "grad_norm": 0.47313639521598816, "learning_rate": 0.002828, "loss": 1.5925, "step": 118336 }, { "epoch": 8.885553470919325, "grad_norm": 0.5030401945114136, "learning_rate": 0.002828, "loss": 1.5933, "step": 118400 }, { "epoch": 8.890356472795498, "grad_norm": 0.5216529369354248, "learning_rate": 0.002828, "loss": 1.5942, "step": 118464 }, { "epoch": 8.89515947467167, "grad_norm": 0.4898476302623749, "learning_rate": 0.002828, "loss": 1.5958, "step": 118528 }, { "epoch": 8.899962476547842, "grad_norm": 0.5011880397796631, "learning_rate": 0.002828, "loss": 1.5913, "step": 118592 }, { "epoch": 8.904765478424014, "grad_norm": 0.5021920800209045, "learning_rate": 0.002828, "loss": 1.59, "step": 118656 }, { "epoch": 8.909568480300187, "grad_norm": 0.48647958040237427, "learning_rate": 0.002828, "loss": 1.5982, "step": 118720 }, { "epoch": 8.91437148217636, "grad_norm": 0.5826468467712402, "learning_rate": 0.002828, "loss": 1.5871, "step": 118784 }, { "epoch": 8.919174484052533, "grad_norm": 0.49877214431762695, "learning_rate": 0.002828, "loss": 1.5945, "step": 118848 }, { "epoch": 8.923977485928706, "grad_norm": 0.6048728227615356, "learning_rate": 0.002828, "loss": 1.596, "step": 118912 }, { "epoch": 8.928780487804879, "grad_norm": 0.5304832458496094, "learning_rate": 0.002828, "loss": 1.5984, "step": 118976 }, { "epoch": 8.93358348968105, "grad_norm": 0.454377144575119, "learning_rate": 0.002828, "loss": 1.5853, "step": 119040 }, { "epoch": 8.938386491557223, "grad_norm": 0.509590208530426, "learning_rate": 0.002828, "loss": 1.5908, "step": 119104 }, { "epoch": 8.943189493433396, "grad_norm": 0.5293945074081421, "learning_rate": 0.002828, "loss": 1.592, "step": 119168 }, { "epoch": 8.947992495309569, "grad_norm": 0.4682133197784424, "learning_rate": 0.002828, "loss": 1.6001, "step": 119232 }, { "epoch": 8.952795497185742, "grad_norm": 0.5309058427810669, "learning_rate": 0.002828, "loss": 1.5966, "step": 119296 }, { "epoch": 8.957598499061914, "grad_norm": 0.4920218586921692, "learning_rate": 0.002828, "loss": 1.5979, "step": 119360 }, { "epoch": 8.962401500938086, "grad_norm": 0.4925355911254883, "learning_rate": 0.002828, "loss": 1.5893, "step": 119424 }, { "epoch": 8.967204502814258, "grad_norm": 0.4320846199989319, "learning_rate": 0.002828, "loss": 1.593, "step": 119488 }, { "epoch": 8.972007504690431, "grad_norm": 0.46012082695961, "learning_rate": 0.002828, "loss": 1.5946, "step": 119552 }, { "epoch": 8.976810506566604, "grad_norm": 0.47652629017829895, "learning_rate": 0.002828, "loss": 1.5938, "step": 119616 }, { "epoch": 8.981613508442777, "grad_norm": 0.5796999931335449, "learning_rate": 0.002828, "loss": 1.5926, "step": 119680 }, { "epoch": 8.98641651031895, "grad_norm": 0.41885271668434143, "learning_rate": 0.002828, "loss": 1.5879, "step": 119744 }, { "epoch": 8.991219512195123, "grad_norm": 0.43641573190689087, "learning_rate": 0.002828, "loss": 1.5838, "step": 119808 }, { "epoch": 8.996022514071294, "grad_norm": 0.4800099730491638, "learning_rate": 0.002828, "loss": 1.5821, "step": 119872 }, { "epoch": 9.000825515947467, "grad_norm": 0.49796926975250244, "learning_rate": 0.002828, "loss": 1.5808, "step": 119936 }, { "epoch": 9.00562851782364, "grad_norm": 0.5159605145454407, "learning_rate": 0.002828, "loss": 1.5448, "step": 120000 }, { "epoch": 9.010431519699813, "grad_norm": 0.48490002751350403, "learning_rate": 0.002828, "loss": 1.5543, "step": 120064 }, { "epoch": 9.015234521575985, "grad_norm": 0.45639586448669434, "learning_rate": 0.002828, "loss": 1.5462, "step": 120128 }, { "epoch": 9.020037523452158, "grad_norm": 0.4975225329399109, "learning_rate": 0.002828, "loss": 1.5506, "step": 120192 }, { "epoch": 9.02484052532833, "grad_norm": 0.4190879166126251, "learning_rate": 0.002828, "loss": 1.5452, "step": 120256 }, { "epoch": 9.029643527204502, "grad_norm": 0.42787736654281616, "learning_rate": 0.002828, "loss": 1.5464, "step": 120320 }, { "epoch": 9.034446529080675, "grad_norm": 0.5576650500297546, "learning_rate": 0.002828, "loss": 1.5484, "step": 120384 }, { "epoch": 9.039249530956848, "grad_norm": 0.4466994106769562, "learning_rate": 0.002828, "loss": 1.5424, "step": 120448 }, { "epoch": 9.044052532833021, "grad_norm": 0.5612980127334595, "learning_rate": 0.002828, "loss": 1.5489, "step": 120512 }, { "epoch": 9.048855534709194, "grad_norm": 0.48870643973350525, "learning_rate": 0.002828, "loss": 1.5475, "step": 120576 }, { "epoch": 9.053658536585365, "grad_norm": 0.47606512904167175, "learning_rate": 0.002828, "loss": 1.5477, "step": 120640 }, { "epoch": 9.058461538461538, "grad_norm": 0.4935826063156128, "learning_rate": 0.002828, "loss": 1.5471, "step": 120704 }, { "epoch": 9.06326454033771, "grad_norm": 0.49965599179267883, "learning_rate": 0.002828, "loss": 1.5419, "step": 120768 }, { "epoch": 9.068067542213884, "grad_norm": 0.4511246979236603, "learning_rate": 0.002828, "loss": 1.5498, "step": 120832 }, { "epoch": 9.072870544090057, "grad_norm": 0.4655289649963379, "learning_rate": 0.002828, "loss": 1.5493, "step": 120896 }, { "epoch": 9.07767354596623, "grad_norm": 0.48337608575820923, "learning_rate": 0.002828, "loss": 1.5566, "step": 120960 }, { "epoch": 9.082476547842402, "grad_norm": 0.47634145617485046, "learning_rate": 0.002828, "loss": 1.5547, "step": 121024 }, { "epoch": 9.087279549718573, "grad_norm": 0.5103233456611633, "learning_rate": 0.002828, "loss": 1.5515, "step": 121088 }, { "epoch": 9.092082551594746, "grad_norm": 0.47967302799224854, "learning_rate": 0.002828, "loss": 1.5536, "step": 121152 }, { "epoch": 9.09688555347092, "grad_norm": 0.4684443771839142, "learning_rate": 0.002828, "loss": 1.5507, "step": 121216 }, { "epoch": 9.101688555347092, "grad_norm": 0.513687014579773, "learning_rate": 0.002828, "loss": 1.5533, "step": 121280 }, { "epoch": 9.106491557223265, "grad_norm": 0.4781445264816284, "learning_rate": 0.002828, "loss": 1.5541, "step": 121344 }, { "epoch": 9.111294559099438, "grad_norm": 0.4334908723831177, "learning_rate": 0.002828, "loss": 1.5603, "step": 121408 }, { "epoch": 9.116097560975609, "grad_norm": 0.5581165552139282, "learning_rate": 0.002828, "loss": 1.5509, "step": 121472 }, { "epoch": 9.120900562851782, "grad_norm": 0.5921232104301453, "learning_rate": 0.002828, "loss": 1.5508, "step": 121536 }, { "epoch": 9.125703564727955, "grad_norm": 0.48528948426246643, "learning_rate": 0.002828, "loss": 1.5567, "step": 121600 }, { "epoch": 9.130506566604128, "grad_norm": 0.5249315500259399, "learning_rate": 0.002828, "loss": 1.5521, "step": 121664 }, { "epoch": 9.1353095684803, "grad_norm": 0.5910688638687134, "learning_rate": 0.002828, "loss": 1.5578, "step": 121728 }, { "epoch": 9.140112570356473, "grad_norm": 0.46241605281829834, "learning_rate": 0.002828, "loss": 1.5549, "step": 121792 }, { "epoch": 9.144915572232645, "grad_norm": 0.5226661562919617, "learning_rate": 0.002828, "loss": 1.551, "step": 121856 }, { "epoch": 9.149718574108817, "grad_norm": 0.4741944372653961, "learning_rate": 0.002828, "loss": 1.554, "step": 121920 }, { "epoch": 9.15452157598499, "grad_norm": 0.5220636129379272, "learning_rate": 0.002828, "loss": 1.555, "step": 121984 }, { "epoch": 9.159324577861163, "grad_norm": 0.5372878909111023, "learning_rate": 0.002828, "loss": 1.5505, "step": 122048 }, { "epoch": 9.164127579737336, "grad_norm": 0.5775820016860962, "learning_rate": 0.002828, "loss": 1.551, "step": 122112 }, { "epoch": 9.168930581613509, "grad_norm": 0.5545300841331482, "learning_rate": 0.002828, "loss": 1.5519, "step": 122176 }, { "epoch": 9.173733583489682, "grad_norm": 0.467183917760849, "learning_rate": 0.002828, "loss": 1.5539, "step": 122240 }, { "epoch": 9.178536585365853, "grad_norm": 0.4525934159755707, "learning_rate": 0.002828, "loss": 1.5543, "step": 122304 }, { "epoch": 9.183339587242026, "grad_norm": 0.4548516869544983, "learning_rate": 0.002828, "loss": 1.5475, "step": 122368 }, { "epoch": 9.188142589118199, "grad_norm": 0.5126798152923584, "learning_rate": 0.002828, "loss": 1.5585, "step": 122432 }, { "epoch": 9.192945590994372, "grad_norm": 0.4691389203071594, "learning_rate": 0.002828, "loss": 1.5574, "step": 122496 }, { "epoch": 9.197748592870544, "grad_norm": 0.4580743610858917, "learning_rate": 0.002828, "loss": 1.5553, "step": 122560 }, { "epoch": 9.202551594746717, "grad_norm": 0.5749825239181519, "learning_rate": 0.002828, "loss": 1.547, "step": 122624 }, { "epoch": 9.207354596622888, "grad_norm": 0.4706513285636902, "learning_rate": 0.002828, "loss": 1.5549, "step": 122688 }, { "epoch": 9.212157598499061, "grad_norm": 0.437346875667572, "learning_rate": 0.002828, "loss": 1.5565, "step": 122752 }, { "epoch": 9.216960600375234, "grad_norm": 0.5276687145233154, "learning_rate": 0.002828, "loss": 1.5549, "step": 122816 }, { "epoch": 9.221763602251407, "grad_norm": 0.49012863636016846, "learning_rate": 0.002828, "loss": 1.5511, "step": 122880 }, { "epoch": 9.22656660412758, "grad_norm": 0.5751110911369324, "learning_rate": 0.002828, "loss": 1.5493, "step": 122944 }, { "epoch": 9.231369606003753, "grad_norm": 0.48521631956100464, "learning_rate": 0.002828, "loss": 1.5529, "step": 123008 }, { "epoch": 9.236172607879926, "grad_norm": 0.5013362765312195, "learning_rate": 0.002828, "loss": 1.5571, "step": 123072 }, { "epoch": 9.240975609756097, "grad_norm": 0.4691832661628723, "learning_rate": 0.002828, "loss": 1.5601, "step": 123136 }, { "epoch": 9.24577861163227, "grad_norm": 0.41744399070739746, "learning_rate": 0.002828, "loss": 1.5555, "step": 123200 }, { "epoch": 9.250581613508443, "grad_norm": 0.5104173421859741, "learning_rate": 0.002828, "loss": 1.552, "step": 123264 }, { "epoch": 9.255384615384616, "grad_norm": 0.46932265162467957, "learning_rate": 0.002828, "loss": 1.5593, "step": 123328 }, { "epoch": 9.260187617260788, "grad_norm": 0.5633493065834045, "learning_rate": 0.002828, "loss": 1.5554, "step": 123392 }, { "epoch": 9.264990619136961, "grad_norm": 0.4229661226272583, "learning_rate": 0.002828, "loss": 1.5577, "step": 123456 }, { "epoch": 9.269793621013132, "grad_norm": 0.4221712648868561, "learning_rate": 0.002828, "loss": 1.5584, "step": 123520 }, { "epoch": 9.274596622889305, "grad_norm": 0.5246994495391846, "learning_rate": 0.002828, "loss": 1.56, "step": 123584 }, { "epoch": 9.279399624765478, "grad_norm": 0.49985742568969727, "learning_rate": 0.002828, "loss": 1.5539, "step": 123648 }, { "epoch": 9.284202626641651, "grad_norm": 0.529069721698761, "learning_rate": 0.002828, "loss": 1.559, "step": 123712 }, { "epoch": 9.289005628517824, "grad_norm": 0.5723833441734314, "learning_rate": 0.002828, "loss": 1.5469, "step": 123776 }, { "epoch": 9.293808630393997, "grad_norm": 0.4774170219898224, "learning_rate": 0.002828, "loss": 1.5562, "step": 123840 }, { "epoch": 9.29861163227017, "grad_norm": 0.4825045168399811, "learning_rate": 0.002828, "loss": 1.5616, "step": 123904 }, { "epoch": 9.30341463414634, "grad_norm": 0.49546265602111816, "learning_rate": 0.002828, "loss": 1.5528, "step": 123968 }, { "epoch": 9.308217636022514, "grad_norm": 0.48817411065101624, "learning_rate": 0.002828, "loss": 1.5557, "step": 124032 }, { "epoch": 9.313020637898687, "grad_norm": 0.4852955937385559, "learning_rate": 0.002828, "loss": 1.5556, "step": 124096 }, { "epoch": 9.31782363977486, "grad_norm": 0.605785071849823, "learning_rate": 0.002828, "loss": 1.557, "step": 124160 }, { "epoch": 9.322626641651032, "grad_norm": 0.5245658159255981, "learning_rate": 0.002828, "loss": 1.5571, "step": 124224 }, { "epoch": 9.327429643527205, "grad_norm": 0.4966484606266022, "learning_rate": 0.002828, "loss": 1.5491, "step": 124288 }, { "epoch": 9.332232645403376, "grad_norm": 0.5174903273582458, "learning_rate": 0.002828, "loss": 1.5584, "step": 124352 }, { "epoch": 9.33703564727955, "grad_norm": 0.5896949172019958, "learning_rate": 0.002828, "loss": 1.5501, "step": 124416 }, { "epoch": 9.341838649155722, "grad_norm": 0.5136840343475342, "learning_rate": 0.002828, "loss": 1.5584, "step": 124480 }, { "epoch": 9.346641651031895, "grad_norm": 0.4528697431087494, "learning_rate": 0.002828, "loss": 1.5581, "step": 124544 }, { "epoch": 9.351444652908068, "grad_norm": 0.6355164647102356, "learning_rate": 0.002828, "loss": 1.5514, "step": 124608 }, { "epoch": 9.35624765478424, "grad_norm": 0.5369568467140198, "learning_rate": 0.002828, "loss": 1.5552, "step": 124672 }, { "epoch": 9.361050656660412, "grad_norm": 0.4956488311290741, "learning_rate": 0.002828, "loss": 1.5487, "step": 124736 }, { "epoch": 9.365853658536585, "grad_norm": 0.5135923027992249, "learning_rate": 0.002828, "loss": 1.5601, "step": 124800 }, { "epoch": 9.370656660412758, "grad_norm": 0.4957681894302368, "learning_rate": 0.002828, "loss": 1.5616, "step": 124864 }, { "epoch": 9.37545966228893, "grad_norm": 0.48925191164016724, "learning_rate": 0.002828, "loss": 1.5578, "step": 124928 }, { "epoch": 9.380262664165103, "grad_norm": 0.6204767227172852, "learning_rate": 0.002828, "loss": 1.5657, "step": 124992 }, { "epoch": 9.385065666041276, "grad_norm": 0.49402710795402527, "learning_rate": 0.002828, "loss": 1.5544, "step": 125056 }, { "epoch": 9.38986866791745, "grad_norm": 0.4795926809310913, "learning_rate": 0.002828, "loss": 1.553, "step": 125120 }, { "epoch": 9.39467166979362, "grad_norm": 0.5284917950630188, "learning_rate": 0.002828, "loss": 1.555, "step": 125184 }, { "epoch": 9.399474671669793, "grad_norm": 0.47258827090263367, "learning_rate": 0.002828, "loss": 1.5555, "step": 125248 }, { "epoch": 9.404277673545966, "grad_norm": 0.4388088881969452, "learning_rate": 0.002828, "loss": 1.5554, "step": 125312 }, { "epoch": 9.409080675422139, "grad_norm": 0.5329185724258423, "learning_rate": 0.002828, "loss": 1.5545, "step": 125376 }, { "epoch": 9.413883677298312, "grad_norm": 0.47534602880477905, "learning_rate": 0.002828, "loss": 1.5517, "step": 125440 }, { "epoch": 9.418686679174485, "grad_norm": 0.5153233408927917, "learning_rate": 0.002828, "loss": 1.5531, "step": 125504 }, { "epoch": 9.423489681050656, "grad_norm": 0.49084314703941345, "learning_rate": 0.002828, "loss": 1.5623, "step": 125568 }, { "epoch": 9.428292682926829, "grad_norm": 0.46658453345298767, "learning_rate": 0.002828, "loss": 1.5582, "step": 125632 }, { "epoch": 9.433095684803002, "grad_norm": 0.5435781478881836, "learning_rate": 0.002828, "loss": 1.553, "step": 125696 }, { "epoch": 9.437898686679175, "grad_norm": 0.6206216812133789, "learning_rate": 0.002828, "loss": 1.5506, "step": 125760 }, { "epoch": 9.442701688555347, "grad_norm": 0.4667552411556244, "learning_rate": 0.002828, "loss": 1.5577, "step": 125824 }, { "epoch": 9.44750469043152, "grad_norm": 0.5482316613197327, "learning_rate": 0.002828, "loss": 1.5541, "step": 125888 }, { "epoch": 9.452307692307691, "grad_norm": 0.5336337685585022, "learning_rate": 0.002828, "loss": 1.5604, "step": 125952 }, { "epoch": 9.457110694183864, "grad_norm": 0.46397972106933594, "learning_rate": 0.002828, "loss": 1.5505, "step": 126016 }, { "epoch": 9.461913696060037, "grad_norm": 0.5540631413459778, "learning_rate": 0.002828, "loss": 1.5598, "step": 126080 }, { "epoch": 9.46671669793621, "grad_norm": 0.666083574295044, "learning_rate": 0.002828, "loss": 1.5603, "step": 126144 }, { "epoch": 9.471519699812383, "grad_norm": 0.5453064441680908, "learning_rate": 0.002828, "loss": 1.5511, "step": 126208 }, { "epoch": 9.476322701688556, "grad_norm": 0.5765575170516968, "learning_rate": 0.002828, "loss": 1.5521, "step": 126272 }, { "epoch": 9.481125703564729, "grad_norm": 0.4911395311355591, "learning_rate": 0.002828, "loss": 1.5536, "step": 126336 }, { "epoch": 9.4859287054409, "grad_norm": 0.5155625343322754, "learning_rate": 0.002828, "loss": 1.5509, "step": 126400 }, { "epoch": 9.490731707317073, "grad_norm": 0.4965815842151642, "learning_rate": 0.002828, "loss": 1.5475, "step": 126464 }, { "epoch": 9.495534709193246, "grad_norm": 0.54978346824646, "learning_rate": 0.002828, "loss": 1.5591, "step": 126528 }, { "epoch": 9.500337711069418, "grad_norm": 0.4493124485015869, "learning_rate": 0.002828, "loss": 1.5539, "step": 126592 }, { "epoch": 9.505140712945591, "grad_norm": 0.4517803490161896, "learning_rate": 0.002828, "loss": 1.5568, "step": 126656 }, { "epoch": 9.509943714821764, "grad_norm": 0.527080237865448, "learning_rate": 0.002828, "loss": 1.5565, "step": 126720 }, { "epoch": 9.514746716697935, "grad_norm": 0.41383326053619385, "learning_rate": 0.002828, "loss": 1.5559, "step": 126784 }, { "epoch": 9.519549718574108, "grad_norm": 0.5651464462280273, "learning_rate": 0.002828, "loss": 1.5596, "step": 126848 }, { "epoch": 9.524352720450281, "grad_norm": 0.4518251419067383, "learning_rate": 0.002828, "loss": 1.5542, "step": 126912 }, { "epoch": 9.529155722326454, "grad_norm": 0.49248403310775757, "learning_rate": 0.002828, "loss": 1.5586, "step": 126976 }, { "epoch": 9.533958724202627, "grad_norm": 0.48522064089775085, "learning_rate": 0.002828, "loss": 1.5526, "step": 127040 }, { "epoch": 9.5387617260788, "grad_norm": 0.5640375018119812, "learning_rate": 0.002828, "loss": 1.554, "step": 127104 }, { "epoch": 9.543564727954973, "grad_norm": 0.5300438404083252, "learning_rate": 0.002828, "loss": 1.5627, "step": 127168 }, { "epoch": 9.548367729831144, "grad_norm": 0.459332138299942, "learning_rate": 0.002828, "loss": 1.5527, "step": 127232 }, { "epoch": 9.553170731707317, "grad_norm": 0.47487667202949524, "learning_rate": 0.002828, "loss": 1.5533, "step": 127296 }, { "epoch": 9.55797373358349, "grad_norm": 0.4343540370464325, "learning_rate": 0.002828, "loss": 1.5515, "step": 127360 }, { "epoch": 9.562776735459662, "grad_norm": 0.4315250813961029, "learning_rate": 0.002828, "loss": 1.5608, "step": 127424 }, { "epoch": 9.567579737335835, "grad_norm": 0.4699438512325287, "learning_rate": 0.002828, "loss": 1.562, "step": 127488 }, { "epoch": 9.572382739212008, "grad_norm": 0.6131508350372314, "learning_rate": 0.002828, "loss": 1.5469, "step": 127552 }, { "epoch": 9.57718574108818, "grad_norm": 0.55040043592453, "learning_rate": 0.002828, "loss": 1.5565, "step": 127616 }, { "epoch": 9.581988742964352, "grad_norm": 0.5390975475311279, "learning_rate": 0.002828, "loss": 1.5547, "step": 127680 }, { "epoch": 9.586791744840525, "grad_norm": 0.49108564853668213, "learning_rate": 0.002828, "loss": 1.5581, "step": 127744 }, { "epoch": 9.591594746716698, "grad_norm": 0.47410160303115845, "learning_rate": 0.002828, "loss": 1.5608, "step": 127808 }, { "epoch": 9.59639774859287, "grad_norm": 0.4867013990879059, "learning_rate": 0.002828, "loss": 1.5515, "step": 127872 }, { "epoch": 9.601200750469044, "grad_norm": 0.5015280842781067, "learning_rate": 0.002828, "loss": 1.5522, "step": 127936 }, { "epoch": 9.606003752345217, "grad_norm": 0.4710654616355896, "learning_rate": 0.002828, "loss": 1.5614, "step": 128000 }, { "epoch": 9.610806754221388, "grad_norm": 0.5308662056922913, "learning_rate": 0.002828, "loss": 1.5507, "step": 128064 }, { "epoch": 9.61560975609756, "grad_norm": 0.5719583630561829, "learning_rate": 0.002828, "loss": 1.5561, "step": 128128 }, { "epoch": 9.620412757973734, "grad_norm": 0.5578813552856445, "learning_rate": 0.002828, "loss": 1.5684, "step": 128192 }, { "epoch": 9.625215759849906, "grad_norm": 0.5093487501144409, "learning_rate": 0.002828, "loss": 1.5595, "step": 128256 }, { "epoch": 9.63001876172608, "grad_norm": 0.4938162863254547, "learning_rate": 0.002828, "loss": 1.5508, "step": 128320 }, { "epoch": 9.634821763602252, "grad_norm": 0.5062044858932495, "learning_rate": 0.002828, "loss": 1.5566, "step": 128384 }, { "epoch": 9.639624765478423, "grad_norm": 0.48692384362220764, "learning_rate": 0.002828, "loss": 1.5557, "step": 128448 }, { "epoch": 9.644427767354596, "grad_norm": 0.4967643916606903, "learning_rate": 0.002828, "loss": 1.5494, "step": 128512 }, { "epoch": 9.649230769230769, "grad_norm": 0.442503422498703, "learning_rate": 0.002828, "loss": 1.5553, "step": 128576 }, { "epoch": 9.654033771106942, "grad_norm": 0.5926002860069275, "learning_rate": 0.002828, "loss": 1.5523, "step": 128640 }, { "epoch": 9.658836772983115, "grad_norm": 0.4846031367778778, "learning_rate": 0.002828, "loss": 1.5611, "step": 128704 }, { "epoch": 9.663639774859288, "grad_norm": 0.5495798587799072, "learning_rate": 0.002828, "loss": 1.5555, "step": 128768 }, { "epoch": 9.66844277673546, "grad_norm": 0.5806244611740112, "learning_rate": 0.002828, "loss": 1.5589, "step": 128832 }, { "epoch": 9.673245778611632, "grad_norm": 0.5770285725593567, "learning_rate": 0.002828, "loss": 1.5488, "step": 128896 }, { "epoch": 9.678048780487805, "grad_norm": 0.4988243877887726, "learning_rate": 0.002828, "loss": 1.5523, "step": 128960 }, { "epoch": 9.682851782363977, "grad_norm": 0.48712292313575745, "learning_rate": 0.002828, "loss": 1.5577, "step": 129024 }, { "epoch": 9.68765478424015, "grad_norm": 0.5018770694732666, "learning_rate": 0.002828, "loss": 1.5556, "step": 129088 }, { "epoch": 9.692457786116323, "grad_norm": 0.4957042336463928, "learning_rate": 0.002828, "loss": 1.5579, "step": 129152 }, { "epoch": 9.697260787992496, "grad_norm": 0.48829516768455505, "learning_rate": 0.002828, "loss": 1.5658, "step": 129216 }, { "epoch": 9.702063789868667, "grad_norm": 0.4693721532821655, "learning_rate": 0.002828, "loss": 1.5524, "step": 129280 }, { "epoch": 9.70686679174484, "grad_norm": 0.4855722486972809, "learning_rate": 0.002828, "loss": 1.5557, "step": 129344 }, { "epoch": 9.711669793621013, "grad_norm": 0.5685173869132996, "learning_rate": 0.002828, "loss": 1.5567, "step": 129408 }, { "epoch": 9.716472795497186, "grad_norm": 0.55943363904953, "learning_rate": 0.002828, "loss": 1.5562, "step": 129472 }, { "epoch": 9.721275797373359, "grad_norm": 0.36396172642707825, "learning_rate": 0.002828, "loss": 1.5611, "step": 129536 }, { "epoch": 9.726078799249532, "grad_norm": 0.45727115869522095, "learning_rate": 0.002828, "loss": 1.5502, "step": 129600 }, { "epoch": 9.730881801125703, "grad_norm": 0.5950860381126404, "learning_rate": 0.002828, "loss": 1.5565, "step": 129664 }, { "epoch": 9.735684803001876, "grad_norm": 0.5075332522392273, "learning_rate": 0.002828, "loss": 1.549, "step": 129728 }, { "epoch": 9.740487804878049, "grad_norm": 0.6805112361907959, "learning_rate": 0.002828, "loss": 1.5531, "step": 129792 }, { "epoch": 9.745290806754221, "grad_norm": 0.5490958094596863, "learning_rate": 0.002828, "loss": 1.5578, "step": 129856 }, { "epoch": 9.750093808630394, "grad_norm": 0.5078960657119751, "learning_rate": 0.002828, "loss": 1.5608, "step": 129920 }, { "epoch": 9.754896810506567, "grad_norm": 0.47650301456451416, "learning_rate": 0.002828, "loss": 1.554, "step": 129984 }, { "epoch": 9.759699812382738, "grad_norm": 0.5622501969337463, "learning_rate": 0.002828, "loss": 1.552, "step": 130048 }, { "epoch": 9.764502814258911, "grad_norm": 0.5386357307434082, "learning_rate": 0.002828, "loss": 1.5519, "step": 130112 }, { "epoch": 9.769305816135084, "grad_norm": 0.47315895557403564, "learning_rate": 0.002828, "loss": 1.547, "step": 130176 }, { "epoch": 9.774108818011257, "grad_norm": 0.40879422426223755, "learning_rate": 0.002828, "loss": 1.5522, "step": 130240 }, { "epoch": 9.77891181988743, "grad_norm": 0.44783979654312134, "learning_rate": 0.002828, "loss": 1.5561, "step": 130304 }, { "epoch": 9.783714821763603, "grad_norm": 0.6161860227584839, "learning_rate": 0.002828, "loss": 1.5581, "step": 130368 }, { "epoch": 9.788517823639776, "grad_norm": 0.5071606636047363, "learning_rate": 0.002828, "loss": 1.5562, "step": 130432 }, { "epoch": 9.793320825515947, "grad_norm": 0.5199759602546692, "learning_rate": 0.002828, "loss": 1.5561, "step": 130496 }, { "epoch": 9.79812382739212, "grad_norm": 0.6187331080436707, "learning_rate": 0.002828, "loss": 1.5519, "step": 130560 }, { "epoch": 9.802926829268293, "grad_norm": 0.5684900283813477, "learning_rate": 0.002828, "loss": 1.5551, "step": 130624 }, { "epoch": 9.807729831144465, "grad_norm": 0.5067030191421509, "learning_rate": 0.002828, "loss": 1.5493, "step": 130688 }, { "epoch": 9.812532833020638, "grad_norm": 0.46885350346565247, "learning_rate": 0.002828, "loss": 1.5558, "step": 130752 }, { "epoch": 9.817335834896811, "grad_norm": 0.6028760075569153, "learning_rate": 0.002828, "loss": 1.5552, "step": 130816 }, { "epoch": 9.822138836772982, "grad_norm": 0.5503089427947998, "learning_rate": 0.002828, "loss": 1.5589, "step": 130880 }, { "epoch": 9.826941838649155, "grad_norm": 0.5198109149932861, "learning_rate": 0.002828, "loss": 1.5586, "step": 130944 }, { "epoch": 9.831744840525328, "grad_norm": 0.4858454763889313, "learning_rate": 0.002828, "loss": 1.5538, "step": 131008 }, { "epoch": 9.836547842401501, "grad_norm": 0.4834841191768646, "learning_rate": 0.002828, "loss": 1.5546, "step": 131072 }, { "epoch": 9.841350844277674, "grad_norm": 0.5896739959716797, "learning_rate": 0.002828, "loss": 1.5529, "step": 131136 }, { "epoch": 9.846153846153847, "grad_norm": 0.5708956122398376, "learning_rate": 0.002828, "loss": 1.5568, "step": 131200 }, { "epoch": 9.85095684803002, "grad_norm": 0.5776032209396362, "learning_rate": 0.002828, "loss": 1.5532, "step": 131264 }, { "epoch": 9.85575984990619, "grad_norm": 0.4519674479961395, "learning_rate": 0.002828, "loss": 1.5519, "step": 131328 }, { "epoch": 9.860562851782364, "grad_norm": 0.45993855595588684, "learning_rate": 0.002828, "loss": 1.5575, "step": 131392 }, { "epoch": 9.865365853658536, "grad_norm": 0.42766380310058594, "learning_rate": 0.002828, "loss": 1.5468, "step": 131456 }, { "epoch": 9.87016885553471, "grad_norm": 0.424795925617218, "learning_rate": 0.002828, "loss": 1.5502, "step": 131520 }, { "epoch": 9.874971857410882, "grad_norm": 0.4582250714302063, "learning_rate": 0.002828, "loss": 1.5556, "step": 131584 }, { "epoch": 9.879774859287055, "grad_norm": 0.45927566289901733, "learning_rate": 0.002828, "loss": 1.5576, "step": 131648 }, { "epoch": 9.884577861163226, "grad_norm": 0.44731155037879944, "learning_rate": 0.002828, "loss": 1.5526, "step": 131712 }, { "epoch": 9.8893808630394, "grad_norm": 0.48120561242103577, "learning_rate": 0.002828, "loss": 1.5607, "step": 131776 }, { "epoch": 9.894183864915572, "grad_norm": 0.6588419675827026, "learning_rate": 0.002828, "loss": 1.5606, "step": 131840 }, { "epoch": 9.898986866791745, "grad_norm": 0.4711664617061615, "learning_rate": 0.002828, "loss": 1.5563, "step": 131904 }, { "epoch": 9.903789868667918, "grad_norm": 0.4643876552581787, "learning_rate": 0.002828, "loss": 1.5528, "step": 131968 }, { "epoch": 9.90859287054409, "grad_norm": 0.52628093957901, "learning_rate": 0.002828, "loss": 1.5528, "step": 132032 }, { "epoch": 9.913395872420264, "grad_norm": 0.5681013464927673, "learning_rate": 0.002828, "loss": 1.5451, "step": 132096 }, { "epoch": 9.918198874296435, "grad_norm": 0.6170089840888977, "learning_rate": 0.002828, "loss": 1.5538, "step": 132160 }, { "epoch": 9.923001876172608, "grad_norm": 0.5162079930305481, "learning_rate": 0.002828, "loss": 1.5472, "step": 132224 }, { "epoch": 9.92780487804878, "grad_norm": 0.46833181381225586, "learning_rate": 0.002828, "loss": 1.5568, "step": 132288 }, { "epoch": 9.932607879924953, "grad_norm": 0.44369474053382874, "learning_rate": 0.002828, "loss": 1.5613, "step": 132352 }, { "epoch": 9.937410881801126, "grad_norm": 0.5638606548309326, "learning_rate": 0.002828, "loss": 1.5561, "step": 132416 }, { "epoch": 9.942213883677299, "grad_norm": 0.4265860319137573, "learning_rate": 0.002828, "loss": 1.555, "step": 132480 }, { "epoch": 9.94701688555347, "grad_norm": 0.4962076246738434, "learning_rate": 0.002828, "loss": 1.551, "step": 132544 }, { "epoch": 9.951819887429643, "grad_norm": 0.45441246032714844, "learning_rate": 0.002828, "loss": 1.5524, "step": 132608 }, { "epoch": 9.956622889305816, "grad_norm": 0.5333769917488098, "learning_rate": 0.002828, "loss": 1.5521, "step": 132672 }, { "epoch": 9.961425891181989, "grad_norm": 0.4870472550392151, "learning_rate": 0.002828, "loss": 1.5489, "step": 132736 }, { "epoch": 9.966228893058162, "grad_norm": 0.4919857382774353, "learning_rate": 0.002828, "loss": 1.5503, "step": 132800 }, { "epoch": 9.971031894934335, "grad_norm": 0.43766430020332336, "learning_rate": 0.002828, "loss": 1.5521, "step": 132864 }, { "epoch": 9.975834896810508, "grad_norm": 0.435209721326828, "learning_rate": 0.002828, "loss": 1.5529, "step": 132928 }, { "epoch": 9.980637898686679, "grad_norm": 0.441018283367157, "learning_rate": 0.002828, "loss": 1.5558, "step": 132992 }, { "epoch": 9.985440900562851, "grad_norm": 0.41537073254585266, "learning_rate": 0.002828, "loss": 1.5537, "step": 133056 }, { "epoch": 9.990243902439024, "grad_norm": 0.4654955565929413, "learning_rate": 0.002828, "loss": 1.5511, "step": 133120 }, { "epoch": 9.995046904315197, "grad_norm": 0.5095586776733398, "learning_rate": 0.002828, "loss": 1.5585, "step": 133184 }, { "epoch": 9.99984990619137, "grad_norm": 0.49905091524124146, "learning_rate": 0.002828, "loss": 1.5502, "step": 133248 }, { "epoch": 10.004652908067543, "grad_norm": 0.479478657245636, "learning_rate": 0.002828, "loss": 1.5136, "step": 133312 }, { "epoch": 10.009455909943714, "grad_norm": 0.4345605671405792, "learning_rate": 0.002828, "loss": 1.513, "step": 133376 }, { "epoch": 10.014258911819887, "grad_norm": 0.5158377289772034, "learning_rate": 0.002828, "loss": 1.5104, "step": 133440 }, { "epoch": 10.01906191369606, "grad_norm": 0.49847567081451416, "learning_rate": 0.002828, "loss": 1.5042, "step": 133504 }, { "epoch": 10.023864915572233, "grad_norm": 0.49734270572662354, "learning_rate": 0.002828, "loss": 1.5085, "step": 133568 }, { "epoch": 10.028667917448406, "grad_norm": 0.5903614163398743, "learning_rate": 0.002828, "loss": 1.5079, "step": 133632 }, { "epoch": 10.033470919324579, "grad_norm": 0.4688267409801483, "learning_rate": 0.002828, "loss": 1.5104, "step": 133696 }, { "epoch": 10.03827392120075, "grad_norm": 0.5410575866699219, "learning_rate": 0.002828, "loss": 1.5101, "step": 133760 }, { "epoch": 10.043076923076923, "grad_norm": 0.5670648813247681, "learning_rate": 0.002828, "loss": 1.5066, "step": 133824 }, { "epoch": 10.047879924953095, "grad_norm": 0.544195830821991, "learning_rate": 0.002828, "loss": 1.5108, "step": 133888 }, { "epoch": 10.052682926829268, "grad_norm": 0.4335324168205261, "learning_rate": 0.002828, "loss": 1.5113, "step": 133952 }, { "epoch": 10.057485928705441, "grad_norm": 0.734622061252594, "learning_rate": 0.002828, "loss": 1.5098, "step": 134016 }, { "epoch": 10.062288930581614, "grad_norm": 0.5276984572410583, "learning_rate": 0.002828, "loss": 1.5151, "step": 134080 }, { "epoch": 10.067091932457787, "grad_norm": 0.5516037344932556, "learning_rate": 0.002828, "loss": 1.5087, "step": 134144 }, { "epoch": 10.071894934333958, "grad_norm": 0.4399818778038025, "learning_rate": 0.002828, "loss": 1.523, "step": 134208 }, { "epoch": 10.076697936210131, "grad_norm": 0.5881381630897522, "learning_rate": 0.002828, "loss": 1.5112, "step": 134272 }, { "epoch": 10.081500938086304, "grad_norm": 0.480007529258728, "learning_rate": 0.002828, "loss": 1.5039, "step": 134336 }, { "epoch": 10.086303939962477, "grad_norm": 0.6424974203109741, "learning_rate": 0.002828, "loss": 1.5081, "step": 134400 }, { "epoch": 10.09110694183865, "grad_norm": 0.5155991911888123, "learning_rate": 0.002828, "loss": 1.5121, "step": 134464 }, { "epoch": 10.095909943714823, "grad_norm": 0.48689907789230347, "learning_rate": 0.002828, "loss": 1.5095, "step": 134528 }, { "epoch": 10.100712945590994, "grad_norm": 0.5510069727897644, "learning_rate": 0.002828, "loss": 1.5156, "step": 134592 }, { "epoch": 10.105515947467167, "grad_norm": 0.49252885580062866, "learning_rate": 0.002828, "loss": 1.5132, "step": 134656 }, { "epoch": 10.11031894934334, "grad_norm": 0.6291511058807373, "learning_rate": 0.002828, "loss": 1.5103, "step": 134720 }, { "epoch": 10.115121951219512, "grad_norm": 0.4867250323295593, "learning_rate": 0.002828, "loss": 1.5185, "step": 134784 }, { "epoch": 10.119924953095685, "grad_norm": 0.5530567765235901, "learning_rate": 0.002828, "loss": 1.5067, "step": 134848 }, { "epoch": 10.124727954971858, "grad_norm": 0.43756282329559326, "learning_rate": 0.002828, "loss": 1.5142, "step": 134912 }, { "epoch": 10.12953095684803, "grad_norm": 0.5081878304481506, "learning_rate": 0.002828, "loss": 1.5127, "step": 134976 }, { "epoch": 10.134333958724202, "grad_norm": 0.5711334943771362, "learning_rate": 0.002828, "loss": 1.5125, "step": 135040 }, { "epoch": 10.139136960600375, "grad_norm": 0.5855487585067749, "learning_rate": 0.002828, "loss": 1.511, "step": 135104 }, { "epoch": 10.143939962476548, "grad_norm": 0.5145984888076782, "learning_rate": 0.002828, "loss": 1.5179, "step": 135168 }, { "epoch": 10.14874296435272, "grad_norm": 0.4353695511817932, "learning_rate": 0.002828, "loss": 1.5148, "step": 135232 }, { "epoch": 10.153545966228894, "grad_norm": 0.5010886788368225, "learning_rate": 0.002828, "loss": 1.5164, "step": 135296 }, { "epoch": 10.158348968105066, "grad_norm": 0.5228750109672546, "learning_rate": 0.002828, "loss": 1.523, "step": 135360 }, { "epoch": 10.163151969981238, "grad_norm": 0.6481117606163025, "learning_rate": 0.002828, "loss": 1.5176, "step": 135424 }, { "epoch": 10.16795497185741, "grad_norm": 0.4466358423233032, "learning_rate": 0.002828, "loss": 1.509, "step": 135488 }, { "epoch": 10.172757973733583, "grad_norm": 0.5442012548446655, "learning_rate": 0.002828, "loss": 1.5116, "step": 135552 }, { "epoch": 10.177560975609756, "grad_norm": 0.4774165451526642, "learning_rate": 0.002828, "loss": 1.5152, "step": 135616 }, { "epoch": 10.18236397748593, "grad_norm": 0.45034146308898926, "learning_rate": 0.002828, "loss": 1.5206, "step": 135680 }, { "epoch": 10.187166979362102, "grad_norm": 0.5528009533882141, "learning_rate": 0.002828, "loss": 1.5185, "step": 135744 }, { "epoch": 10.191969981238273, "grad_norm": 0.45600980520248413, "learning_rate": 0.002828, "loss": 1.5149, "step": 135808 }, { "epoch": 10.196772983114446, "grad_norm": 0.5091872811317444, "learning_rate": 0.002828, "loss": 1.5151, "step": 135872 }, { "epoch": 10.201575984990619, "grad_norm": 0.46785837411880493, "learning_rate": 0.002828, "loss": 1.5125, "step": 135936 }, { "epoch": 10.206378986866792, "grad_norm": 0.5324965119361877, "learning_rate": 0.002828, "loss": 1.516, "step": 136000 }, { "epoch": 10.211181988742965, "grad_norm": 0.5576740503311157, "learning_rate": 0.002828, "loss": 1.5118, "step": 136064 }, { "epoch": 10.215984990619138, "grad_norm": 0.5563480854034424, "learning_rate": 0.002828, "loss": 1.5173, "step": 136128 }, { "epoch": 10.22078799249531, "grad_norm": 0.6065492630004883, "learning_rate": 0.002828, "loss": 1.5169, "step": 136192 }, { "epoch": 10.225590994371482, "grad_norm": 0.5944182872772217, "learning_rate": 0.002828, "loss": 1.5136, "step": 136256 }, { "epoch": 10.230393996247654, "grad_norm": 0.48441362380981445, "learning_rate": 0.002828, "loss": 1.5211, "step": 136320 }, { "epoch": 10.235196998123827, "grad_norm": 0.4990592300891876, "learning_rate": 0.002828, "loss": 1.5162, "step": 136384 }, { "epoch": 10.24, "grad_norm": 0.5005013942718506, "learning_rate": 0.002828, "loss": 1.5155, "step": 136448 }, { "epoch": 10.244803001876173, "grad_norm": 0.4367721974849701, "learning_rate": 0.002828, "loss": 1.5178, "step": 136512 }, { "epoch": 10.249606003752346, "grad_norm": 0.6128390431404114, "learning_rate": 0.002828, "loss": 1.5206, "step": 136576 }, { "epoch": 10.254409005628517, "grad_norm": 0.48846036195755005, "learning_rate": 0.002828, "loss": 1.5129, "step": 136640 }, { "epoch": 10.25921200750469, "grad_norm": 0.5013264417648315, "learning_rate": 0.002828, "loss": 1.5154, "step": 136704 }, { "epoch": 10.264015009380863, "grad_norm": 0.5089060664176941, "learning_rate": 0.002828, "loss": 1.5242, "step": 136768 }, { "epoch": 10.268818011257036, "grad_norm": 0.4745241701602936, "learning_rate": 0.002828, "loss": 1.5215, "step": 136832 }, { "epoch": 10.273621013133209, "grad_norm": 0.553170382976532, "learning_rate": 0.002828, "loss": 1.5187, "step": 136896 }, { "epoch": 10.278424015009382, "grad_norm": 0.5633316040039062, "learning_rate": 0.002828, "loss": 1.5124, "step": 136960 }, { "epoch": 10.283227016885553, "grad_norm": 0.6235347390174866, "learning_rate": 0.002828, "loss": 1.5103, "step": 137024 }, { "epoch": 10.288030018761726, "grad_norm": 0.44016027450561523, "learning_rate": 0.002828, "loss": 1.5144, "step": 137088 }, { "epoch": 10.292833020637898, "grad_norm": 0.5390121340751648, "learning_rate": 0.002828, "loss": 1.525, "step": 137152 }, { "epoch": 10.297636022514071, "grad_norm": 0.5932199954986572, "learning_rate": 0.002828, "loss": 1.5268, "step": 137216 }, { "epoch": 10.302439024390244, "grad_norm": 0.46444085240364075, "learning_rate": 0.002828, "loss": 1.5165, "step": 137280 }, { "epoch": 10.307242026266417, "grad_norm": 0.46962660551071167, "learning_rate": 0.002828, "loss": 1.522, "step": 137344 }, { "epoch": 10.31204502814259, "grad_norm": 0.5389118790626526, "learning_rate": 0.002828, "loss": 1.5266, "step": 137408 }, { "epoch": 10.316848030018761, "grad_norm": 0.5998408198356628, "learning_rate": 0.002828, "loss": 1.523, "step": 137472 }, { "epoch": 10.321651031894934, "grad_norm": 0.5823659300804138, "learning_rate": 0.002828, "loss": 1.5149, "step": 137536 }, { "epoch": 10.326454033771107, "grad_norm": 0.5257498621940613, "learning_rate": 0.002828, "loss": 1.5203, "step": 137600 }, { "epoch": 10.33125703564728, "grad_norm": 0.5792316794395447, "learning_rate": 0.002828, "loss": 1.526, "step": 137664 }, { "epoch": 10.336060037523453, "grad_norm": 0.5107080936431885, "learning_rate": 0.002828, "loss": 1.5182, "step": 137728 }, { "epoch": 10.340863039399625, "grad_norm": 0.5128101110458374, "learning_rate": 0.002828, "loss": 1.5126, "step": 137792 }, { "epoch": 10.345666041275797, "grad_norm": 0.4674902558326721, "learning_rate": 0.002828, "loss": 1.5182, "step": 137856 }, { "epoch": 10.35046904315197, "grad_norm": 0.462354838848114, "learning_rate": 0.002828, "loss": 1.5234, "step": 137920 }, { "epoch": 10.355272045028142, "grad_norm": 0.543552815914154, "learning_rate": 0.002828, "loss": 1.5245, "step": 137984 }, { "epoch": 10.360075046904315, "grad_norm": 0.44141873717308044, "learning_rate": 0.002828, "loss": 1.5237, "step": 138048 }, { "epoch": 10.364878048780488, "grad_norm": 0.4777672588825226, "learning_rate": 0.002828, "loss": 1.5183, "step": 138112 }, { "epoch": 10.369681050656661, "grad_norm": 0.5415074229240417, "learning_rate": 0.002828, "loss": 1.5164, "step": 138176 }, { "epoch": 10.374484052532832, "grad_norm": 0.6049688458442688, "learning_rate": 0.002828, "loss": 1.524, "step": 138240 }, { "epoch": 10.379287054409005, "grad_norm": 0.46092769503593445, "learning_rate": 0.002828, "loss": 1.5212, "step": 138304 }, { "epoch": 10.384090056285178, "grad_norm": 0.5517581701278687, "learning_rate": 0.002828, "loss": 1.519, "step": 138368 }, { "epoch": 10.38889305816135, "grad_norm": 0.4884757399559021, "learning_rate": 0.002828, "loss": 1.5104, "step": 138432 }, { "epoch": 10.393696060037524, "grad_norm": 0.5153927803039551, "learning_rate": 0.002828, "loss": 1.5133, "step": 138496 }, { "epoch": 10.398499061913697, "grad_norm": 0.5135812163352966, "learning_rate": 0.002828, "loss": 1.5277, "step": 138560 }, { "epoch": 10.40330206378987, "grad_norm": 0.5352478623390198, "learning_rate": 0.002828, "loss": 1.5249, "step": 138624 }, { "epoch": 10.40810506566604, "grad_norm": 0.4779094457626343, "learning_rate": 0.002828, "loss": 1.5181, "step": 138688 }, { "epoch": 10.412908067542213, "grad_norm": 0.5232270956039429, "learning_rate": 0.002828, "loss": 1.5187, "step": 138752 }, { "epoch": 10.417711069418386, "grad_norm": 0.48955580592155457, "learning_rate": 0.002828, "loss": 1.5121, "step": 138816 }, { "epoch": 10.42251407129456, "grad_norm": 0.5007327795028687, "learning_rate": 0.002828, "loss": 1.5245, "step": 138880 }, { "epoch": 10.427317073170732, "grad_norm": 0.5203362107276917, "learning_rate": 0.002828, "loss": 1.5269, "step": 138944 }, { "epoch": 10.432120075046905, "grad_norm": 0.632523238658905, "learning_rate": 0.002828, "loss": 1.5158, "step": 139008 }, { "epoch": 10.436923076923076, "grad_norm": 0.4640112817287445, "learning_rate": 0.002828, "loss": 1.527, "step": 139072 }, { "epoch": 10.441726078799249, "grad_norm": 0.4999347925186157, "learning_rate": 0.002828, "loss": 1.506, "step": 139136 }, { "epoch": 10.446529080675422, "grad_norm": 0.44936737418174744, "learning_rate": 0.002828, "loss": 1.5176, "step": 139200 }, { "epoch": 10.451332082551595, "grad_norm": 0.4773159325122833, "learning_rate": 0.002828, "loss": 1.5199, "step": 139264 }, { "epoch": 10.456135084427768, "grad_norm": 0.4635850191116333, "learning_rate": 0.002828, "loss": 1.521, "step": 139328 }, { "epoch": 10.46093808630394, "grad_norm": 0.42555472254753113, "learning_rate": 0.002828, "loss": 1.5181, "step": 139392 }, { "epoch": 10.465741088180113, "grad_norm": 0.458814412355423, "learning_rate": 0.002828, "loss": 1.524, "step": 139456 }, { "epoch": 10.470544090056285, "grad_norm": 0.4896027743816376, "learning_rate": 0.002828, "loss": 1.5192, "step": 139520 }, { "epoch": 10.475347091932457, "grad_norm": 0.5083631873130798, "learning_rate": 0.002828, "loss": 1.5218, "step": 139584 }, { "epoch": 10.48015009380863, "grad_norm": 0.4863784909248352, "learning_rate": 0.002828, "loss": 1.5117, "step": 139648 }, { "epoch": 10.484953095684803, "grad_norm": 0.4946701228618622, "learning_rate": 0.002828, "loss": 1.5182, "step": 139712 }, { "epoch": 10.489756097560976, "grad_norm": 0.521332859992981, "learning_rate": 0.002828, "loss": 1.5196, "step": 139776 }, { "epoch": 10.494559099437149, "grad_norm": 0.5090921521186829, "learning_rate": 0.002828, "loss": 1.5168, "step": 139840 }, { "epoch": 10.49936210131332, "grad_norm": 0.5902573466300964, "learning_rate": 0.002828, "loss": 1.5195, "step": 139904 }, { "epoch": 10.504165103189493, "grad_norm": 0.5510809421539307, "learning_rate": 0.002828, "loss": 1.5182, "step": 139968 }, { "epoch": 10.508968105065666, "grad_norm": 0.5307651162147522, "learning_rate": 0.002828, "loss": 1.5237, "step": 140032 }, { "epoch": 10.513771106941839, "grad_norm": 0.6894339919090271, "learning_rate": 0.002828, "loss": 1.5199, "step": 140096 }, { "epoch": 10.518574108818012, "grad_norm": 0.48183873295783997, "learning_rate": 0.002828, "loss": 1.5268, "step": 140160 }, { "epoch": 10.523377110694184, "grad_norm": 0.5286323428153992, "learning_rate": 0.002828, "loss": 1.5252, "step": 140224 }, { "epoch": 10.528180112570357, "grad_norm": 0.44124990701675415, "learning_rate": 0.002828, "loss": 1.5244, "step": 140288 }, { "epoch": 10.532983114446528, "grad_norm": 0.5079673528671265, "learning_rate": 0.002828, "loss": 1.518, "step": 140352 }, { "epoch": 10.537786116322701, "grad_norm": 0.48387521505355835, "learning_rate": 0.002828, "loss": 1.5168, "step": 140416 }, { "epoch": 10.542589118198874, "grad_norm": 0.6227713823318481, "learning_rate": 0.002828, "loss": 1.5268, "step": 140480 }, { "epoch": 10.547392120075047, "grad_norm": 0.549760103225708, "learning_rate": 0.002828, "loss": 1.5183, "step": 140544 }, { "epoch": 10.55219512195122, "grad_norm": 0.5009521842002869, "learning_rate": 0.002828, "loss": 1.5187, "step": 140608 }, { "epoch": 10.556998123827393, "grad_norm": 0.5136734247207642, "learning_rate": 0.002828, "loss": 1.5133, "step": 140672 }, { "epoch": 10.561801125703564, "grad_norm": 0.5014452338218689, "learning_rate": 0.002828, "loss": 1.5174, "step": 140736 }, { "epoch": 10.566604127579737, "grad_norm": 0.4686838686466217, "learning_rate": 0.002828, "loss": 1.5221, "step": 140800 }, { "epoch": 10.57140712945591, "grad_norm": 0.49404841661453247, "learning_rate": 0.002828, "loss": 1.5252, "step": 140864 }, { "epoch": 10.576210131332083, "grad_norm": 0.5186532139778137, "learning_rate": 0.002828, "loss": 1.5168, "step": 140928 }, { "epoch": 10.581013133208256, "grad_norm": 0.578127384185791, "learning_rate": 0.002828, "loss": 1.5242, "step": 140992 }, { "epoch": 10.585816135084428, "grad_norm": 0.5328065156936646, "learning_rate": 0.002828, "loss": 1.5219, "step": 141056 }, { "epoch": 10.590619136960601, "grad_norm": 0.459264874458313, "learning_rate": 0.002828, "loss": 1.5199, "step": 141120 }, { "epoch": 10.595422138836772, "grad_norm": 0.5663778185844421, "learning_rate": 0.002828, "loss": 1.5162, "step": 141184 }, { "epoch": 10.600225140712945, "grad_norm": 0.5025619268417358, "learning_rate": 0.002828, "loss": 1.5182, "step": 141248 }, { "epoch": 10.605028142589118, "grad_norm": 0.4759722948074341, "learning_rate": 0.002828, "loss": 1.514, "step": 141312 }, { "epoch": 10.609831144465291, "grad_norm": 0.4879244267940521, "learning_rate": 0.002828, "loss": 1.5133, "step": 141376 }, { "epoch": 10.614634146341464, "grad_norm": 0.4883573055267334, "learning_rate": 0.002828, "loss": 1.5144, "step": 141440 }, { "epoch": 10.619437148217637, "grad_norm": 0.5533087253570557, "learning_rate": 0.002828, "loss": 1.5111, "step": 141504 }, { "epoch": 10.624240150093808, "grad_norm": 0.5033180117607117, "learning_rate": 0.002828, "loss": 1.5253, "step": 141568 }, { "epoch": 10.62904315196998, "grad_norm": 0.4731650948524475, "learning_rate": 0.002828, "loss": 1.5171, "step": 141632 }, { "epoch": 10.633846153846154, "grad_norm": 0.5617311596870422, "learning_rate": 0.002828, "loss": 1.5187, "step": 141696 }, { "epoch": 10.638649155722327, "grad_norm": 0.588597297668457, "learning_rate": 0.002828, "loss": 1.524, "step": 141760 }, { "epoch": 10.6434521575985, "grad_norm": 0.47357311844825745, "learning_rate": 0.002828, "loss": 1.5179, "step": 141824 }, { "epoch": 10.648255159474672, "grad_norm": 0.5507169365882874, "learning_rate": 0.002828, "loss": 1.5238, "step": 141888 }, { "epoch": 10.653058161350843, "grad_norm": 0.4835931658744812, "learning_rate": 0.002828, "loss": 1.5186, "step": 141952 }, { "epoch": 10.657861163227016, "grad_norm": 0.5200017690658569, "learning_rate": 0.002828, "loss": 1.5179, "step": 142016 }, { "epoch": 10.66266416510319, "grad_norm": 0.6572912931442261, "learning_rate": 0.002828, "loss": 1.5224, "step": 142080 }, { "epoch": 10.667467166979362, "grad_norm": 0.4692614674568176, "learning_rate": 0.002828, "loss": 1.5235, "step": 142144 }, { "epoch": 10.672270168855535, "grad_norm": 0.5642805695533752, "learning_rate": 0.002828, "loss": 1.5161, "step": 142208 }, { "epoch": 10.677073170731708, "grad_norm": 0.5048820972442627, "learning_rate": 0.002828, "loss": 1.5234, "step": 142272 }, { "epoch": 10.681876172607879, "grad_norm": 0.5080498456954956, "learning_rate": 0.002828, "loss": 1.5164, "step": 142336 }, { "epoch": 10.686679174484052, "grad_norm": 0.4397072494029999, "learning_rate": 0.002828, "loss": 1.5156, "step": 142400 }, { "epoch": 10.691482176360225, "grad_norm": 0.49026167392730713, "learning_rate": 0.002828, "loss": 1.5141, "step": 142464 }, { "epoch": 10.696285178236398, "grad_norm": 0.49835464358329773, "learning_rate": 0.002828, "loss": 1.5189, "step": 142528 }, { "epoch": 10.70108818011257, "grad_norm": 0.5250354409217834, "learning_rate": 0.002828, "loss": 1.5206, "step": 142592 }, { "epoch": 10.705891181988743, "grad_norm": 0.560287594795227, "learning_rate": 0.002828, "loss": 1.5118, "step": 142656 }, { "epoch": 10.710694183864916, "grad_norm": 0.47352853417396545, "learning_rate": 0.002828, "loss": 1.5204, "step": 142720 }, { "epoch": 10.715497185741087, "grad_norm": 0.49744275212287903, "learning_rate": 0.002828, "loss": 1.5195, "step": 142784 }, { "epoch": 10.72030018761726, "grad_norm": 0.5193085670471191, "learning_rate": 0.002828, "loss": 1.5181, "step": 142848 }, { "epoch": 10.725103189493433, "grad_norm": 0.5842470526695251, "learning_rate": 0.002828, "loss": 1.516, "step": 142912 }, { "epoch": 10.729906191369606, "grad_norm": 0.5229711532592773, "learning_rate": 0.002828, "loss": 1.5174, "step": 142976 }, { "epoch": 10.734709193245779, "grad_norm": 0.5207584500312805, "learning_rate": 0.002828, "loss": 1.5202, "step": 143040 }, { "epoch": 10.739512195121952, "grad_norm": 0.5027861595153809, "learning_rate": 0.002828, "loss": 1.5239, "step": 143104 }, { "epoch": 10.744315196998123, "grad_norm": 0.6836662292480469, "learning_rate": 0.002828, "loss": 1.5175, "step": 143168 }, { "epoch": 10.749118198874296, "grad_norm": 0.4938174784183502, "learning_rate": 0.002828, "loss": 1.518, "step": 143232 }, { "epoch": 10.753921200750469, "grad_norm": 0.49605420231819153, "learning_rate": 0.002828, "loss": 1.5169, "step": 143296 }, { "epoch": 10.758724202626642, "grad_norm": 0.6375707387924194, "learning_rate": 0.002828, "loss": 1.5079, "step": 143360 }, { "epoch": 10.763527204502815, "grad_norm": 0.40622422099113464, "learning_rate": 0.002828, "loss": 1.519, "step": 143424 }, { "epoch": 10.768330206378987, "grad_norm": 0.6180644631385803, "learning_rate": 0.002828, "loss": 1.5158, "step": 143488 }, { "epoch": 10.77313320825516, "grad_norm": 0.5086051821708679, "learning_rate": 0.002828, "loss": 1.5174, "step": 143552 }, { "epoch": 10.777936210131331, "grad_norm": 0.4496934711933136, "learning_rate": 0.002828, "loss": 1.5236, "step": 143616 }, { "epoch": 10.782739212007504, "grad_norm": 0.43942809104919434, "learning_rate": 0.002828, "loss": 1.5296, "step": 143680 }, { "epoch": 10.787542213883677, "grad_norm": 0.5430384874343872, "learning_rate": 0.002828, "loss": 1.5153, "step": 143744 }, { "epoch": 10.79234521575985, "grad_norm": 0.509769082069397, "learning_rate": 0.002828, "loss": 1.5215, "step": 143808 }, { "epoch": 10.797148217636023, "grad_norm": 0.4945880174636841, "learning_rate": 0.002828, "loss": 1.5199, "step": 143872 }, { "epoch": 10.801951219512196, "grad_norm": 0.5118721127510071, "learning_rate": 0.002828, "loss": 1.5202, "step": 143936 }, { "epoch": 10.806754221388367, "grad_norm": 0.4627864360809326, "learning_rate": 0.002828, "loss": 1.5179, "step": 144000 }, { "epoch": 10.81155722326454, "grad_norm": 0.6523723602294922, "learning_rate": 0.002828, "loss": 1.5245, "step": 144064 }, { "epoch": 10.816360225140713, "grad_norm": 0.5512601137161255, "learning_rate": 0.002828, "loss": 1.5178, "step": 144128 }, { "epoch": 10.821163227016886, "grad_norm": 0.5221168994903564, "learning_rate": 0.002828, "loss": 1.5207, "step": 144192 }, { "epoch": 10.825966228893058, "grad_norm": 0.4710821509361267, "learning_rate": 0.002828, "loss": 1.5303, "step": 144256 }, { "epoch": 10.830769230769231, "grad_norm": 0.5352916121482849, "learning_rate": 0.002828, "loss": 1.5189, "step": 144320 }, { "epoch": 10.835572232645404, "grad_norm": 0.44222092628479004, "learning_rate": 0.002828, "loss": 1.5245, "step": 144384 }, { "epoch": 10.840375234521575, "grad_norm": 0.5109274387359619, "learning_rate": 0.002828, "loss": 1.5162, "step": 144448 }, { "epoch": 10.845178236397748, "grad_norm": 0.6416229009628296, "learning_rate": 0.002828, "loss": 1.5259, "step": 144512 }, { "epoch": 10.849981238273921, "grad_norm": 0.46093955636024475, "learning_rate": 0.002828, "loss": 1.5198, "step": 144576 }, { "epoch": 10.854784240150094, "grad_norm": 0.6144540309906006, "learning_rate": 0.002828, "loss": 1.5149, "step": 144640 }, { "epoch": 10.859587242026267, "grad_norm": 0.47149455547332764, "learning_rate": 0.002828, "loss": 1.5196, "step": 144704 }, { "epoch": 10.86439024390244, "grad_norm": 0.5379576683044434, "learning_rate": 0.002828, "loss": 1.5149, "step": 144768 }, { "epoch": 10.869193245778611, "grad_norm": 0.4889363944530487, "learning_rate": 0.002828, "loss": 1.52, "step": 144832 }, { "epoch": 10.873996247654784, "grad_norm": 0.4816627502441406, "learning_rate": 0.002828, "loss": 1.5146, "step": 144896 }, { "epoch": 10.878799249530957, "grad_norm": 0.4828503131866455, "learning_rate": 0.002828, "loss": 1.5222, "step": 144960 }, { "epoch": 10.88360225140713, "grad_norm": 0.5127456188201904, "learning_rate": 0.002828, "loss": 1.5218, "step": 145024 }, { "epoch": 10.888405253283302, "grad_norm": 0.4628119170665741, "learning_rate": 0.002828, "loss": 1.5142, "step": 145088 }, { "epoch": 10.893208255159475, "grad_norm": 0.5557365417480469, "learning_rate": 0.002828, "loss": 1.5176, "step": 145152 }, { "epoch": 10.898011257035648, "grad_norm": 0.4339671730995178, "learning_rate": 0.002828, "loss": 1.5198, "step": 145216 }, { "epoch": 10.90281425891182, "grad_norm": 0.5440133213996887, "learning_rate": 0.002828, "loss": 1.5187, "step": 145280 }, { "epoch": 10.907617260787992, "grad_norm": 0.4955686628818512, "learning_rate": 0.002828, "loss": 1.5209, "step": 145344 }, { "epoch": 10.912420262664165, "grad_norm": 0.5579130053520203, "learning_rate": 0.002828, "loss": 1.5119, "step": 145408 }, { "epoch": 10.917223264540338, "grad_norm": 0.5804506540298462, "learning_rate": 0.002828, "loss": 1.5136, "step": 145472 }, { "epoch": 10.92202626641651, "grad_norm": 0.4968551993370056, "learning_rate": 0.002828, "loss": 1.5161, "step": 145536 }, { "epoch": 10.926829268292684, "grad_norm": 0.447307825088501, "learning_rate": 0.002828, "loss": 1.5191, "step": 145600 }, { "epoch": 10.931632270168855, "grad_norm": 0.5149531960487366, "learning_rate": 0.002828, "loss": 1.5232, "step": 145664 }, { "epoch": 10.936435272045028, "grad_norm": 0.44786104559898376, "learning_rate": 0.002828, "loss": 1.5176, "step": 145728 }, { "epoch": 10.9412382739212, "grad_norm": 0.4521365165710449, "learning_rate": 0.002828, "loss": 1.5164, "step": 145792 }, { "epoch": 10.946041275797374, "grad_norm": 0.5914323329925537, "learning_rate": 0.002828, "loss": 1.5193, "step": 145856 }, { "epoch": 10.950844277673546, "grad_norm": 0.6470093131065369, "learning_rate": 0.002828, "loss": 1.5125, "step": 145920 }, { "epoch": 10.95564727954972, "grad_norm": 0.3939136564731598, "learning_rate": 0.002828, "loss": 1.5165, "step": 145984 }, { "epoch": 10.96045028142589, "grad_norm": 0.6154167056083679, "learning_rate": 0.002828, "loss": 1.5115, "step": 146048 }, { "epoch": 10.965253283302063, "grad_norm": 0.5322780609130859, "learning_rate": 0.002828, "loss": 1.5189, "step": 146112 }, { "epoch": 10.970056285178236, "grad_norm": 0.5299228429794312, "learning_rate": 0.002828, "loss": 1.5166, "step": 146176 }, { "epoch": 10.974859287054409, "grad_norm": 0.4304593801498413, "learning_rate": 0.002828, "loss": 1.5258, "step": 146240 }, { "epoch": 10.979662288930582, "grad_norm": 0.5457515716552734, "learning_rate": 0.002828, "loss": 1.514, "step": 146304 }, { "epoch": 10.984465290806755, "grad_norm": 0.59228515625, "learning_rate": 0.002828, "loss": 1.5257, "step": 146368 }, { "epoch": 10.989268292682926, "grad_norm": 0.5043462514877319, "learning_rate": 0.002828, "loss": 1.5152, "step": 146432 }, { "epoch": 10.994071294559099, "grad_norm": 0.4221332371234894, "learning_rate": 0.002828, "loss": 1.5159, "step": 146496 }, { "epoch": 10.998874296435272, "grad_norm": 0.5075702667236328, "learning_rate": 0.002828, "loss": 1.5202, "step": 146560 }, { "epoch": 11.003677298311445, "grad_norm": 0.5219289064407349, "learning_rate": 0.002828, "loss": 1.4803, "step": 146624 }, { "epoch": 11.008480300187617, "grad_norm": 0.46624791622161865, "learning_rate": 0.002828, "loss": 1.4725, "step": 146688 }, { "epoch": 11.01328330206379, "grad_norm": 0.6117081642150879, "learning_rate": 0.002828, "loss": 1.4732, "step": 146752 }, { "epoch": 11.018086303939963, "grad_norm": 0.48853933811187744, "learning_rate": 0.002828, "loss": 1.4787, "step": 146816 }, { "epoch": 11.022889305816134, "grad_norm": 0.5432941913604736, "learning_rate": 0.002828, "loss": 1.4667, "step": 146880 }, { "epoch": 11.027692307692307, "grad_norm": 0.5087817311286926, "learning_rate": 0.002828, "loss": 1.4737, "step": 146944 }, { "epoch": 11.03249530956848, "grad_norm": 0.5708329677581787, "learning_rate": 0.002828, "loss": 1.4784, "step": 147008 }, { "epoch": 11.037298311444653, "grad_norm": 0.6395474672317505, "learning_rate": 0.002828, "loss": 1.4751, "step": 147072 }, { "epoch": 11.042101313320826, "grad_norm": 0.5034723877906799, "learning_rate": 0.002828, "loss": 1.4786, "step": 147136 }, { "epoch": 11.046904315196999, "grad_norm": 0.5139698386192322, "learning_rate": 0.002828, "loss": 1.4723, "step": 147200 }, { "epoch": 11.05170731707317, "grad_norm": 0.5037325024604797, "learning_rate": 0.002828, "loss": 1.4724, "step": 147264 }, { "epoch": 11.056510318949343, "grad_norm": 0.5322916507720947, "learning_rate": 0.002828, "loss": 1.4768, "step": 147328 }, { "epoch": 11.061313320825516, "grad_norm": 0.5106876492500305, "learning_rate": 0.002828, "loss": 1.4719, "step": 147392 }, { "epoch": 11.066116322701689, "grad_norm": 0.47849833965301514, "learning_rate": 0.002828, "loss": 1.4784, "step": 147456 }, { "epoch": 11.070919324577861, "grad_norm": 0.4921078383922577, "learning_rate": 0.002828, "loss": 1.4736, "step": 147520 }, { "epoch": 11.075722326454034, "grad_norm": 0.43421855568885803, "learning_rate": 0.002828, "loss": 1.4735, "step": 147584 }, { "epoch": 11.080525328330207, "grad_norm": 0.503207266330719, "learning_rate": 0.002828, "loss": 1.4761, "step": 147648 }, { "epoch": 11.085328330206378, "grad_norm": 0.5848730206489563, "learning_rate": 0.002828, "loss": 1.4719, "step": 147712 }, { "epoch": 11.090131332082551, "grad_norm": 0.5984218120574951, "learning_rate": 0.002828, "loss": 1.4796, "step": 147776 }, { "epoch": 11.094934333958724, "grad_norm": 0.4876102805137634, "learning_rate": 0.002828, "loss": 1.4795, "step": 147840 }, { "epoch": 11.099737335834897, "grad_norm": 0.4254842698574066, "learning_rate": 0.002828, "loss": 1.4785, "step": 147904 }, { "epoch": 11.10454033771107, "grad_norm": 0.5368649363517761, "learning_rate": 0.002828, "loss": 1.4803, "step": 147968 }, { "epoch": 11.109343339587243, "grad_norm": 0.4503213167190552, "learning_rate": 0.002828, "loss": 1.482, "step": 148032 }, { "epoch": 11.114146341463414, "grad_norm": 0.5051832795143127, "learning_rate": 0.002828, "loss": 1.4788, "step": 148096 }, { "epoch": 11.118949343339587, "grad_norm": 0.5128603577613831, "learning_rate": 0.002828, "loss": 1.4816, "step": 148160 }, { "epoch": 11.12375234521576, "grad_norm": 0.5454185605049133, "learning_rate": 0.002828, "loss": 1.4847, "step": 148224 }, { "epoch": 11.128555347091933, "grad_norm": 0.575675904750824, "learning_rate": 0.002828, "loss": 1.4874, "step": 148288 }, { "epoch": 11.133358348968105, "grad_norm": 0.49659401178359985, "learning_rate": 0.002828, "loss": 1.4844, "step": 148352 }, { "epoch": 11.138161350844278, "grad_norm": 0.4814682900905609, "learning_rate": 0.002828, "loss": 1.476, "step": 148416 }, { "epoch": 11.142964352720451, "grad_norm": 0.46637871861457825, "learning_rate": 0.002828, "loss": 1.4804, "step": 148480 }, { "epoch": 11.147767354596622, "grad_norm": 0.8030520677566528, "learning_rate": 0.002828, "loss": 1.4828, "step": 148544 }, { "epoch": 11.152570356472795, "grad_norm": 0.4283498525619507, "learning_rate": 0.002828, "loss": 1.4833, "step": 148608 }, { "epoch": 11.157373358348968, "grad_norm": 0.7140107154846191, "learning_rate": 0.002828, "loss": 1.4843, "step": 148672 }, { "epoch": 11.162176360225141, "grad_norm": 0.45211201906204224, "learning_rate": 0.002828, "loss": 1.4764, "step": 148736 }, { "epoch": 11.166979362101314, "grad_norm": 0.5594805479049683, "learning_rate": 0.002828, "loss": 1.4868, "step": 148800 }, { "epoch": 11.171782363977487, "grad_norm": 0.5397804975509644, "learning_rate": 0.002828, "loss": 1.4797, "step": 148864 }, { "epoch": 11.176585365853658, "grad_norm": 0.4983387887477875, "learning_rate": 0.002828, "loss": 1.4846, "step": 148928 }, { "epoch": 11.18138836772983, "grad_norm": 0.48610880970954895, "learning_rate": 0.002828, "loss": 1.4807, "step": 148992 }, { "epoch": 11.186191369606004, "grad_norm": 0.4960896968841553, "learning_rate": 0.002828, "loss": 1.4833, "step": 149056 }, { "epoch": 11.190994371482176, "grad_norm": 0.4940851628780365, "learning_rate": 0.002828, "loss": 1.4898, "step": 149120 }, { "epoch": 11.19579737335835, "grad_norm": 0.5122717618942261, "learning_rate": 0.002828, "loss": 1.483, "step": 149184 }, { "epoch": 11.200600375234522, "grad_norm": 0.4835565984249115, "learning_rate": 0.002828, "loss": 1.4833, "step": 149248 }, { "epoch": 11.205403377110693, "grad_norm": 0.5495861768722534, "learning_rate": 0.002828, "loss": 1.4825, "step": 149312 }, { "epoch": 11.210206378986866, "grad_norm": 0.4611824154853821, "learning_rate": 0.002828, "loss": 1.4819, "step": 149376 }, { "epoch": 11.21500938086304, "grad_norm": 0.47973930835723877, "learning_rate": 0.002828, "loss": 1.4839, "step": 149440 }, { "epoch": 11.219812382739212, "grad_norm": 0.5280267596244812, "learning_rate": 0.002828, "loss": 1.4813, "step": 149504 }, { "epoch": 11.224615384615385, "grad_norm": 0.5254800915718079, "learning_rate": 0.002828, "loss": 1.4843, "step": 149568 }, { "epoch": 11.229418386491558, "grad_norm": 0.5418986678123474, "learning_rate": 0.002828, "loss": 1.4887, "step": 149632 }, { "epoch": 11.23422138836773, "grad_norm": 0.522598147392273, "learning_rate": 0.002828, "loss": 1.4828, "step": 149696 }, { "epoch": 11.239024390243902, "grad_norm": 0.47169050574302673, "learning_rate": 0.002828, "loss": 1.4799, "step": 149760 }, { "epoch": 11.243827392120075, "grad_norm": 0.5565853714942932, "learning_rate": 0.002828, "loss": 1.4808, "step": 149824 }, { "epoch": 11.248630393996248, "grad_norm": 0.571573793888092, "learning_rate": 0.002828, "loss": 1.4843, "step": 149888 }, { "epoch": 11.25343339587242, "grad_norm": 0.5259160995483398, "learning_rate": 0.002828, "loss": 1.4854, "step": 149952 }, { "epoch": 11.258236397748593, "grad_norm": 0.4677402079105377, "learning_rate": 0.002828, "loss": 1.4822, "step": 150016 }, { "epoch": 11.263039399624766, "grad_norm": 0.5765787959098816, "learning_rate": 0.002828, "loss": 1.4877, "step": 150080 }, { "epoch": 11.267842401500937, "grad_norm": 0.6456829905509949, "learning_rate": 0.002828, "loss": 1.4806, "step": 150144 }, { "epoch": 11.27264540337711, "grad_norm": 0.4820854961872101, "learning_rate": 0.002828, "loss": 1.489, "step": 150208 }, { "epoch": 11.277448405253283, "grad_norm": 0.4561883807182312, "learning_rate": 0.002828, "loss": 1.4794, "step": 150272 }, { "epoch": 11.282251407129456, "grad_norm": 0.47787246108055115, "learning_rate": 0.002828, "loss": 1.4902, "step": 150336 }, { "epoch": 11.287054409005629, "grad_norm": 0.5143373012542725, "learning_rate": 0.002828, "loss": 1.4867, "step": 150400 }, { "epoch": 11.291857410881802, "grad_norm": 0.5364919304847717, "learning_rate": 0.002828, "loss": 1.4846, "step": 150464 }, { "epoch": 11.296660412757973, "grad_norm": 0.5826845169067383, "learning_rate": 0.002828, "loss": 1.4826, "step": 150528 }, { "epoch": 11.301463414634146, "grad_norm": 0.542300820350647, "learning_rate": 0.002828, "loss": 1.4848, "step": 150592 }, { "epoch": 11.306266416510319, "grad_norm": 0.4969736933708191, "learning_rate": 0.002828, "loss": 1.4897, "step": 150656 }, { "epoch": 11.311069418386491, "grad_norm": 0.4292810559272766, "learning_rate": 0.002828, "loss": 1.4799, "step": 150720 }, { "epoch": 11.315872420262664, "grad_norm": 0.5050080418586731, "learning_rate": 0.002828, "loss": 1.4905, "step": 150784 }, { "epoch": 11.320675422138837, "grad_norm": 0.45646074414253235, "learning_rate": 0.002828, "loss": 1.4905, "step": 150848 }, { "epoch": 11.32547842401501, "grad_norm": 0.4377295970916748, "learning_rate": 0.002828, "loss": 1.4776, "step": 150912 }, { "epoch": 11.330281425891181, "grad_norm": 0.6370419263839722, "learning_rate": 0.002828, "loss": 1.4773, "step": 150976 }, { "epoch": 11.335084427767354, "grad_norm": 0.48091229796409607, "learning_rate": 0.002828, "loss": 1.4845, "step": 151040 }, { "epoch": 11.339887429643527, "grad_norm": 0.5427354574203491, "learning_rate": 0.002828, "loss": 1.4856, "step": 151104 }, { "epoch": 11.3446904315197, "grad_norm": 0.5507506728172302, "learning_rate": 0.002828, "loss": 1.4907, "step": 151168 }, { "epoch": 11.349493433395873, "grad_norm": 0.5161782503128052, "learning_rate": 0.002828, "loss": 1.483, "step": 151232 }, { "epoch": 11.354296435272046, "grad_norm": 0.4885135293006897, "learning_rate": 0.002828, "loss": 1.4919, "step": 151296 }, { "epoch": 11.359099437148217, "grad_norm": 0.5884434580802917, "learning_rate": 0.002828, "loss": 1.4847, "step": 151360 }, { "epoch": 11.36390243902439, "grad_norm": 0.46827441453933716, "learning_rate": 0.002828, "loss": 1.4812, "step": 151424 }, { "epoch": 11.368705440900563, "grad_norm": 0.4739481508731842, "learning_rate": 0.002828, "loss": 1.4819, "step": 151488 }, { "epoch": 11.373508442776735, "grad_norm": 0.42353928089141846, "learning_rate": 0.002828, "loss": 1.4871, "step": 151552 }, { "epoch": 11.378311444652908, "grad_norm": 0.5274419784545898, "learning_rate": 0.002828, "loss": 1.4909, "step": 151616 }, { "epoch": 11.383114446529081, "grad_norm": 0.5639366507530212, "learning_rate": 0.002828, "loss": 1.4842, "step": 151680 }, { "epoch": 11.387917448405254, "grad_norm": 0.556846559047699, "learning_rate": 0.002828, "loss": 1.489, "step": 151744 }, { "epoch": 11.392720450281425, "grad_norm": 0.5005077123641968, "learning_rate": 0.002828, "loss": 1.4919, "step": 151808 }, { "epoch": 11.397523452157598, "grad_norm": 0.4254924952983856, "learning_rate": 0.002828, "loss": 1.493, "step": 151872 }, { "epoch": 11.402326454033771, "grad_norm": 0.5749356150627136, "learning_rate": 0.002828, "loss": 1.4806, "step": 151936 }, { "epoch": 11.407129455909944, "grad_norm": 0.5827454924583435, "learning_rate": 0.002828, "loss": 1.4886, "step": 152000 }, { "epoch": 11.411932457786117, "grad_norm": 0.5152413249015808, "learning_rate": 0.002828, "loss": 1.4867, "step": 152064 }, { "epoch": 11.41673545966229, "grad_norm": 0.5386618375778198, "learning_rate": 0.002828, "loss": 1.4914, "step": 152128 }, { "epoch": 11.42153846153846, "grad_norm": 0.4550732970237732, "learning_rate": 0.002828, "loss": 1.4839, "step": 152192 }, { "epoch": 11.426341463414634, "grad_norm": 0.45811569690704346, "learning_rate": 0.002828, "loss": 1.4843, "step": 152256 }, { "epoch": 11.431144465290807, "grad_norm": 0.6061787009239197, "learning_rate": 0.002828, "loss": 1.4828, "step": 152320 }, { "epoch": 11.43594746716698, "grad_norm": 0.54230797290802, "learning_rate": 0.002828, "loss": 1.4771, "step": 152384 }, { "epoch": 11.440750469043152, "grad_norm": 0.5468747019767761, "learning_rate": 0.002828, "loss": 1.4896, "step": 152448 }, { "epoch": 11.445553470919325, "grad_norm": 0.46672895550727844, "learning_rate": 0.002828, "loss": 1.4888, "step": 152512 }, { "epoch": 11.450356472795498, "grad_norm": 0.4434189796447754, "learning_rate": 0.002828, "loss": 1.4787, "step": 152576 }, { "epoch": 11.45515947467167, "grad_norm": 0.5849834084510803, "learning_rate": 0.002828, "loss": 1.4842, "step": 152640 }, { "epoch": 11.459962476547842, "grad_norm": 0.5004452466964722, "learning_rate": 0.002828, "loss": 1.4863, "step": 152704 }, { "epoch": 11.464765478424015, "grad_norm": 0.520357072353363, "learning_rate": 0.002828, "loss": 1.487, "step": 152768 }, { "epoch": 11.469568480300188, "grad_norm": 0.48489540815353394, "learning_rate": 0.002828, "loss": 1.4887, "step": 152832 }, { "epoch": 11.47437148217636, "grad_norm": 0.4426417052745819, "learning_rate": 0.002828, "loss": 1.4851, "step": 152896 }, { "epoch": 11.479174484052534, "grad_norm": 0.6546865701675415, "learning_rate": 0.002828, "loss": 1.4899, "step": 152960 }, { "epoch": 11.483977485928705, "grad_norm": 0.5141983032226562, "learning_rate": 0.002828, "loss": 1.49, "step": 153024 }, { "epoch": 11.488780487804878, "grad_norm": 0.6321448683738708, "learning_rate": 0.002828, "loss": 1.4817, "step": 153088 }, { "epoch": 11.49358348968105, "grad_norm": 0.5375384092330933, "learning_rate": 0.002828, "loss": 1.4815, "step": 153152 }, { "epoch": 11.498386491557223, "grad_norm": 0.5649176239967346, "learning_rate": 0.002828, "loss": 1.4851, "step": 153216 }, { "epoch": 11.503189493433396, "grad_norm": 0.5667992234230042, "learning_rate": 0.002828, "loss": 1.4843, "step": 153280 }, { "epoch": 11.50799249530957, "grad_norm": 0.4731646776199341, "learning_rate": 0.002828, "loss": 1.4861, "step": 153344 }, { "epoch": 11.512795497185742, "grad_norm": 0.4973836839199066, "learning_rate": 0.002828, "loss": 1.4858, "step": 153408 }, { "epoch": 11.517598499061913, "grad_norm": 0.4976014792919159, "learning_rate": 0.002828, "loss": 1.4814, "step": 153472 }, { "epoch": 11.522401500938086, "grad_norm": 0.48606857657432556, "learning_rate": 0.002828, "loss": 1.4873, "step": 153536 }, { "epoch": 11.527204502814259, "grad_norm": 0.5743147730827332, "learning_rate": 0.002828, "loss": 1.4874, "step": 153600 }, { "epoch": 11.532007504690432, "grad_norm": 0.5242615342140198, "learning_rate": 0.002828, "loss": 1.4842, "step": 153664 }, { "epoch": 11.536810506566605, "grad_norm": 0.5626909732818604, "learning_rate": 0.002828, "loss": 1.4839, "step": 153728 }, { "epoch": 11.541613508442778, "grad_norm": 0.4896009564399719, "learning_rate": 0.002828, "loss": 1.4885, "step": 153792 }, { "epoch": 11.546416510318949, "grad_norm": 0.5722072720527649, "learning_rate": 0.002828, "loss": 1.4835, "step": 153856 }, { "epoch": 11.551219512195122, "grad_norm": 0.4468698501586914, "learning_rate": 0.002828, "loss": 1.4905, "step": 153920 }, { "epoch": 11.556022514071294, "grad_norm": 0.46148356795310974, "learning_rate": 0.002828, "loss": 1.4859, "step": 153984 }, { "epoch": 11.560825515947467, "grad_norm": 0.5205918550491333, "learning_rate": 0.002828, "loss": 1.4945, "step": 154048 }, { "epoch": 11.56562851782364, "grad_norm": 0.5110222697257996, "learning_rate": 0.002828, "loss": 1.4885, "step": 154112 }, { "epoch": 11.570431519699813, "grad_norm": 0.5615659356117249, "learning_rate": 0.002828, "loss": 1.4814, "step": 154176 }, { "epoch": 11.575234521575984, "grad_norm": 0.5684924125671387, "learning_rate": 0.002828, "loss": 1.4904, "step": 154240 }, { "epoch": 11.580037523452157, "grad_norm": 0.5970752239227295, "learning_rate": 0.002828, "loss": 1.4937, "step": 154304 }, { "epoch": 11.58484052532833, "grad_norm": 0.5680916905403137, "learning_rate": 0.002828, "loss": 1.4936, "step": 154368 }, { "epoch": 11.589643527204503, "grad_norm": 0.6049686670303345, "learning_rate": 0.002828, "loss": 1.4866, "step": 154432 }, { "epoch": 11.594446529080676, "grad_norm": 0.5732573866844177, "learning_rate": 0.002828, "loss": 1.4913, "step": 154496 }, { "epoch": 11.599249530956849, "grad_norm": 0.5191873908042908, "learning_rate": 0.002828, "loss": 1.4863, "step": 154560 }, { "epoch": 11.60405253283302, "grad_norm": 0.4945391118526459, "learning_rate": 0.002828, "loss": 1.4907, "step": 154624 }, { "epoch": 11.608855534709193, "grad_norm": 0.5700604915618896, "learning_rate": 0.002828, "loss": 1.4872, "step": 154688 }, { "epoch": 11.613658536585366, "grad_norm": 0.5110488533973694, "learning_rate": 0.002828, "loss": 1.4877, "step": 154752 }, { "epoch": 11.618461538461538, "grad_norm": 0.5065351724624634, "learning_rate": 0.002828, "loss": 1.4922, "step": 154816 }, { "epoch": 11.623264540337711, "grad_norm": 0.4655206799507141, "learning_rate": 0.002828, "loss": 1.4875, "step": 154880 }, { "epoch": 11.628067542213884, "grad_norm": 0.5089915990829468, "learning_rate": 0.002828, "loss": 1.4876, "step": 154944 }, { "epoch": 11.632870544090057, "grad_norm": 0.48405689001083374, "learning_rate": 0.002828, "loss": 1.4834, "step": 155008 }, { "epoch": 11.637673545966228, "grad_norm": 0.49810877442359924, "learning_rate": 0.002828, "loss": 1.4911, "step": 155072 }, { "epoch": 11.642476547842401, "grad_norm": 0.5092597603797913, "learning_rate": 0.002828, "loss": 1.4919, "step": 155136 }, { "epoch": 11.647279549718574, "grad_norm": 0.5448451042175293, "learning_rate": 0.002828, "loss": 1.4886, "step": 155200 }, { "epoch": 11.652082551594747, "grad_norm": 0.5183640718460083, "learning_rate": 0.002828, "loss": 1.4883, "step": 155264 }, { "epoch": 11.65688555347092, "grad_norm": 0.47896742820739746, "learning_rate": 0.002828, "loss": 1.4827, "step": 155328 }, { "epoch": 11.661688555347093, "grad_norm": 0.4914989769458771, "learning_rate": 0.002828, "loss": 1.488, "step": 155392 }, { "epoch": 11.666491557223264, "grad_norm": 0.44482043385505676, "learning_rate": 0.002828, "loss": 1.4831, "step": 155456 }, { "epoch": 11.671294559099437, "grad_norm": 0.514543354511261, "learning_rate": 0.002828, "loss": 1.4967, "step": 155520 }, { "epoch": 11.67609756097561, "grad_norm": 0.4731675088405609, "learning_rate": 0.002828, "loss": 1.4877, "step": 155584 }, { "epoch": 11.680900562851782, "grad_norm": 0.602486252784729, "learning_rate": 0.002828, "loss": 1.4857, "step": 155648 }, { "epoch": 11.685703564727955, "grad_norm": 0.5217562913894653, "learning_rate": 0.002828, "loss": 1.4924, "step": 155712 }, { "epoch": 11.690506566604128, "grad_norm": 0.5473820567131042, "learning_rate": 0.002828, "loss": 1.4945, "step": 155776 }, { "epoch": 11.695309568480301, "grad_norm": 0.5123807787895203, "learning_rate": 0.002828, "loss": 1.4881, "step": 155840 }, { "epoch": 11.700112570356472, "grad_norm": 0.5650917291641235, "learning_rate": 0.002828, "loss": 1.4841, "step": 155904 }, { "epoch": 11.704915572232645, "grad_norm": 0.49404966831207275, "learning_rate": 0.002828, "loss": 1.4832, "step": 155968 }, { "epoch": 11.709718574108818, "grad_norm": 0.5131070613861084, "learning_rate": 0.002828, "loss": 1.4874, "step": 156032 }, { "epoch": 11.71452157598499, "grad_norm": 0.5612493753433228, "learning_rate": 0.002828, "loss": 1.4853, "step": 156096 }, { "epoch": 11.719324577861164, "grad_norm": 0.5529559850692749, "learning_rate": 0.002828, "loss": 1.4903, "step": 156160 }, { "epoch": 11.724127579737337, "grad_norm": 0.44944292306900024, "learning_rate": 0.002828, "loss": 1.4862, "step": 156224 }, { "epoch": 11.728930581613508, "grad_norm": 0.5775054693222046, "learning_rate": 0.002828, "loss": 1.4899, "step": 156288 }, { "epoch": 11.73373358348968, "grad_norm": 0.4601612985134125, "learning_rate": 0.002828, "loss": 1.4835, "step": 156352 }, { "epoch": 11.738536585365853, "grad_norm": 0.4491596817970276, "learning_rate": 0.002828, "loss": 1.4844, "step": 156416 }, { "epoch": 11.743339587242026, "grad_norm": 0.46096885204315186, "learning_rate": 0.002828, "loss": 1.486, "step": 156480 }, { "epoch": 11.7481425891182, "grad_norm": 0.5358552932739258, "learning_rate": 0.002828, "loss": 1.4839, "step": 156544 }, { "epoch": 11.752945590994372, "grad_norm": 0.6074651479721069, "learning_rate": 0.002828, "loss": 1.4822, "step": 156608 }, { "epoch": 11.757748592870545, "grad_norm": 0.5014597773551941, "learning_rate": 0.002828, "loss": 1.4812, "step": 156672 }, { "epoch": 11.762551594746716, "grad_norm": 0.41438910365104675, "learning_rate": 0.002828, "loss": 1.4874, "step": 156736 }, { "epoch": 11.767354596622889, "grad_norm": 0.5457605123519897, "learning_rate": 0.002828, "loss": 1.481, "step": 156800 }, { "epoch": 11.772157598499062, "grad_norm": 0.4685835540294647, "learning_rate": 0.002828, "loss": 1.4837, "step": 156864 }, { "epoch": 11.776960600375235, "grad_norm": 0.4858347177505493, "learning_rate": 0.002828, "loss": 1.4844, "step": 156928 }, { "epoch": 11.781763602251408, "grad_norm": 0.5601338148117065, "learning_rate": 0.002828, "loss": 1.489, "step": 156992 }, { "epoch": 11.78656660412758, "grad_norm": 0.4982069730758667, "learning_rate": 0.002828, "loss": 1.4902, "step": 157056 }, { "epoch": 11.791369606003752, "grad_norm": 0.5538622140884399, "learning_rate": 0.002828, "loss": 1.4853, "step": 157120 }, { "epoch": 11.796172607879924, "grad_norm": 0.5723830461502075, "learning_rate": 0.002828, "loss": 1.4867, "step": 157184 }, { "epoch": 11.800975609756097, "grad_norm": 0.4833488464355469, "learning_rate": 0.002828, "loss": 1.492, "step": 157248 }, { "epoch": 11.80577861163227, "grad_norm": 0.5311214923858643, "learning_rate": 0.002828, "loss": 1.4929, "step": 157312 }, { "epoch": 11.810581613508443, "grad_norm": 0.5109043121337891, "learning_rate": 0.002828, "loss": 1.4834, "step": 157376 }, { "epoch": 11.815384615384616, "grad_norm": 0.5096741318702698, "learning_rate": 0.002828, "loss": 1.4911, "step": 157440 }, { "epoch": 11.820187617260789, "grad_norm": 0.4858244061470032, "learning_rate": 0.002828, "loss": 1.485, "step": 157504 }, { "epoch": 11.82499061913696, "grad_norm": 0.4737776219844818, "learning_rate": 0.002828, "loss": 1.4922, "step": 157568 }, { "epoch": 11.829793621013133, "grad_norm": 0.5020055174827576, "learning_rate": 0.002828, "loss": 1.482, "step": 157632 }, { "epoch": 11.834596622889306, "grad_norm": 0.49587440490722656, "learning_rate": 0.002828, "loss": 1.4894, "step": 157696 }, { "epoch": 11.839399624765479, "grad_norm": 0.4922964870929718, "learning_rate": 0.002828, "loss": 1.4859, "step": 157760 }, { "epoch": 11.844202626641652, "grad_norm": 0.46961134672164917, "learning_rate": 0.002828, "loss": 1.4868, "step": 157824 }, { "epoch": 11.849005628517824, "grad_norm": 0.5366621613502502, "learning_rate": 0.002828, "loss": 1.4908, "step": 157888 }, { "epoch": 11.853808630393996, "grad_norm": 0.4588063955307007, "learning_rate": 0.002828, "loss": 1.4869, "step": 157952 }, { "epoch": 11.858611632270168, "grad_norm": 0.49376556277275085, "learning_rate": 0.002828, "loss": 1.4841, "step": 158016 }, { "epoch": 11.863414634146341, "grad_norm": 0.48326173424720764, "learning_rate": 0.002828, "loss": 1.4924, "step": 158080 }, { "epoch": 11.868217636022514, "grad_norm": 0.49264848232269287, "learning_rate": 0.002828, "loss": 1.4868, "step": 158144 }, { "epoch": 11.873020637898687, "grad_norm": 0.5427521467208862, "learning_rate": 0.002828, "loss": 1.4907, "step": 158208 }, { "epoch": 11.87782363977486, "grad_norm": 0.5619401335716248, "learning_rate": 0.002828, "loss": 1.4885, "step": 158272 }, { "epoch": 11.882626641651031, "grad_norm": 0.5136578679084778, "learning_rate": 0.002828, "loss": 1.4857, "step": 158336 }, { "epoch": 11.887429643527204, "grad_norm": 0.5775615572929382, "learning_rate": 0.002828, "loss": 1.4838, "step": 158400 }, { "epoch": 11.892232645403377, "grad_norm": 0.5155749917030334, "learning_rate": 0.002828, "loss": 1.4913, "step": 158464 }, { "epoch": 11.89703564727955, "grad_norm": 0.4589896500110626, "learning_rate": 0.002828, "loss": 1.4813, "step": 158528 }, { "epoch": 11.901838649155723, "grad_norm": 0.5712898969650269, "learning_rate": 0.002828, "loss": 1.4876, "step": 158592 }, { "epoch": 11.906641651031896, "grad_norm": 0.5223777890205383, "learning_rate": 0.002828, "loss": 1.4878, "step": 158656 }, { "epoch": 11.911444652908067, "grad_norm": 0.5290514230728149, "learning_rate": 0.002828, "loss": 1.4828, "step": 158720 }, { "epoch": 11.91624765478424, "grad_norm": 0.464539110660553, "learning_rate": 0.002828, "loss": 1.4851, "step": 158784 }, { "epoch": 11.921050656660412, "grad_norm": 0.5479804873466492, "learning_rate": 0.002828, "loss": 1.4893, "step": 158848 }, { "epoch": 11.925853658536585, "grad_norm": 0.5218388438224792, "learning_rate": 0.002828, "loss": 1.4898, "step": 158912 }, { "epoch": 11.930656660412758, "grad_norm": 0.5520194172859192, "learning_rate": 0.002828, "loss": 1.4862, "step": 158976 }, { "epoch": 11.935459662288931, "grad_norm": 0.6023911833763123, "learning_rate": 0.002828, "loss": 1.4897, "step": 159040 }, { "epoch": 11.940262664165104, "grad_norm": 0.522139847278595, "learning_rate": 0.002828, "loss": 1.4873, "step": 159104 }, { "epoch": 11.945065666041275, "grad_norm": 0.43739432096481323, "learning_rate": 0.002828, "loss": 1.4874, "step": 159168 }, { "epoch": 11.949868667917448, "grad_norm": 0.4542504847049713, "learning_rate": 0.002828, "loss": 1.4844, "step": 159232 }, { "epoch": 11.95467166979362, "grad_norm": 0.5269852876663208, "learning_rate": 0.002828, "loss": 1.4904, "step": 159296 }, { "epoch": 11.959474671669794, "grad_norm": 0.480561763048172, "learning_rate": 0.002828, "loss": 1.4884, "step": 159360 }, { "epoch": 11.964277673545967, "grad_norm": 0.47597286105155945, "learning_rate": 0.002828, "loss": 1.4833, "step": 159424 }, { "epoch": 11.96908067542214, "grad_norm": 0.49963515996932983, "learning_rate": 0.002828, "loss": 1.4844, "step": 159488 }, { "epoch": 11.97388367729831, "grad_norm": 0.4844549894332886, "learning_rate": 0.002828, "loss": 1.4933, "step": 159552 }, { "epoch": 11.978686679174483, "grad_norm": 0.4828104078769684, "learning_rate": 0.002828, "loss": 1.4902, "step": 159616 }, { "epoch": 11.983489681050656, "grad_norm": 0.5636357665061951, "learning_rate": 0.002828, "loss": 1.4877, "step": 159680 }, { "epoch": 11.98829268292683, "grad_norm": 0.5685691833496094, "learning_rate": 0.002828, "loss": 1.486, "step": 159744 }, { "epoch": 11.993095684803002, "grad_norm": 0.4913961887359619, "learning_rate": 0.002828, "loss": 1.4894, "step": 159808 }, { "epoch": 11.997898686679175, "grad_norm": 0.587879478931427, "learning_rate": 0.002828, "loss": 1.4858, "step": 159872 }, { "epoch": 12.002701688555348, "grad_norm": 0.7443265318870544, "learning_rate": 0.002828, "loss": 1.4605, "step": 159936 }, { "epoch": 12.007504690431519, "grad_norm": 0.5058794617652893, "learning_rate": 0.002828, "loss": 1.4435, "step": 160000 }, { "epoch": 12.012307692307692, "grad_norm": 0.4813808798789978, "learning_rate": 0.002828, "loss": 1.4437, "step": 160064 }, { "epoch": 12.017110694183865, "grad_norm": 0.5733544826507568, "learning_rate": 0.002828, "loss": 1.4447, "step": 160128 }, { "epoch": 12.021913696060038, "grad_norm": 0.48118847608566284, "learning_rate": 0.002828, "loss": 1.443, "step": 160192 }, { "epoch": 12.02671669793621, "grad_norm": 0.4636737108230591, "learning_rate": 0.002828, "loss": 1.4395, "step": 160256 }, { "epoch": 12.031519699812383, "grad_norm": 0.47300007939338684, "learning_rate": 0.002828, "loss": 1.4397, "step": 160320 }, { "epoch": 12.036322701688555, "grad_norm": 0.5260705351829529, "learning_rate": 0.002828, "loss": 1.4362, "step": 160384 }, { "epoch": 12.041125703564727, "grad_norm": 0.5173159241676331, "learning_rate": 0.002828, "loss": 1.447, "step": 160448 }, { "epoch": 12.0459287054409, "grad_norm": 0.49044883251190186, "learning_rate": 0.002828, "loss": 1.4443, "step": 160512 }, { "epoch": 12.050731707317073, "grad_norm": 0.5307776927947998, "learning_rate": 0.002828, "loss": 1.449, "step": 160576 }, { "epoch": 12.055534709193246, "grad_norm": 0.5528637170791626, "learning_rate": 0.002828, "loss": 1.4418, "step": 160640 }, { "epoch": 12.060337711069419, "grad_norm": 0.5058019161224365, "learning_rate": 0.002828, "loss": 1.4444, "step": 160704 }, { "epoch": 12.065140712945592, "grad_norm": 0.46592089533805847, "learning_rate": 0.002828, "loss": 1.4402, "step": 160768 }, { "epoch": 12.069943714821763, "grad_norm": 0.5039073824882507, "learning_rate": 0.002828, "loss": 1.4452, "step": 160832 }, { "epoch": 12.074746716697936, "grad_norm": 0.5121236443519592, "learning_rate": 0.002828, "loss": 1.4534, "step": 160896 }, { "epoch": 12.079549718574109, "grad_norm": 0.5076376795768738, "learning_rate": 0.002828, "loss": 1.4457, "step": 160960 }, { "epoch": 12.084352720450282, "grad_norm": 0.6144150495529175, "learning_rate": 0.002828, "loss": 1.4466, "step": 161024 }, { "epoch": 12.089155722326455, "grad_norm": 0.628743052482605, "learning_rate": 0.002828, "loss": 1.4506, "step": 161088 }, { "epoch": 12.093958724202627, "grad_norm": 0.4718782901763916, "learning_rate": 0.002828, "loss": 1.446, "step": 161152 }, { "epoch": 12.098761726078799, "grad_norm": 0.4742676615715027, "learning_rate": 0.002828, "loss": 1.4543, "step": 161216 }, { "epoch": 12.103564727954971, "grad_norm": 0.5861676931381226, "learning_rate": 0.002828, "loss": 1.4454, "step": 161280 }, { "epoch": 12.108367729831144, "grad_norm": 0.6501273512840271, "learning_rate": 0.002828, "loss": 1.4453, "step": 161344 }, { "epoch": 12.113170731707317, "grad_norm": 0.43199998140335083, "learning_rate": 0.002828, "loss": 1.4449, "step": 161408 }, { "epoch": 12.11797373358349, "grad_norm": 0.5195205807685852, "learning_rate": 0.002828, "loss": 1.4524, "step": 161472 }, { "epoch": 12.122776735459663, "grad_norm": 0.5002077221870422, "learning_rate": 0.002828, "loss": 1.4433, "step": 161536 }, { "epoch": 12.127579737335834, "grad_norm": 0.6138117909431458, "learning_rate": 0.002828, "loss": 1.4477, "step": 161600 }, { "epoch": 12.132382739212007, "grad_norm": 0.5133177638053894, "learning_rate": 0.002828, "loss": 1.4568, "step": 161664 }, { "epoch": 12.13718574108818, "grad_norm": 0.5995278358459473, "learning_rate": 0.002828, "loss": 1.4513, "step": 161728 }, { "epoch": 12.141988742964353, "grad_norm": 0.5219971537590027, "learning_rate": 0.002828, "loss": 1.4549, "step": 161792 }, { "epoch": 12.146791744840526, "grad_norm": 0.5502182245254517, "learning_rate": 0.002828, "loss": 1.4469, "step": 161856 }, { "epoch": 12.151594746716698, "grad_norm": 0.5348833799362183, "learning_rate": 0.002828, "loss": 1.4517, "step": 161920 }, { "epoch": 12.156397748592871, "grad_norm": 0.5264079570770264, "learning_rate": 0.002828, "loss": 1.4439, "step": 161984 }, { "epoch": 12.161200750469042, "grad_norm": 0.5887817740440369, "learning_rate": 0.002828, "loss": 1.449, "step": 162048 }, { "epoch": 12.166003752345215, "grad_norm": 0.5023970007896423, "learning_rate": 0.002828, "loss": 1.4507, "step": 162112 }, { "epoch": 12.170806754221388, "grad_norm": 0.5127968192100525, "learning_rate": 0.002828, "loss": 1.4531, "step": 162176 }, { "epoch": 12.175609756097561, "grad_norm": 0.5700290203094482, "learning_rate": 0.002828, "loss": 1.448, "step": 162240 }, { "epoch": 12.180412757973734, "grad_norm": 0.5987260937690735, "learning_rate": 0.002828, "loss": 1.457, "step": 162304 }, { "epoch": 12.185215759849907, "grad_norm": 0.528264582157135, "learning_rate": 0.002828, "loss": 1.4567, "step": 162368 }, { "epoch": 12.190018761726078, "grad_norm": 0.5861890316009521, "learning_rate": 0.002828, "loss": 1.4477, "step": 162432 }, { "epoch": 12.194821763602251, "grad_norm": 0.5477088689804077, "learning_rate": 0.002828, "loss": 1.4546, "step": 162496 }, { "epoch": 12.199624765478424, "grad_norm": 0.5453941822052002, "learning_rate": 0.002828, "loss": 1.4521, "step": 162560 }, { "epoch": 12.204427767354597, "grad_norm": 0.5315229892730713, "learning_rate": 0.002828, "loss": 1.4498, "step": 162624 }, { "epoch": 12.20923076923077, "grad_norm": 0.5340282917022705, "learning_rate": 0.002828, "loss": 1.4546, "step": 162688 }, { "epoch": 12.214033771106942, "grad_norm": 0.5422166585922241, "learning_rate": 0.002828, "loss": 1.4459, "step": 162752 }, { "epoch": 12.218836772983114, "grad_norm": 0.4889850914478302, "learning_rate": 0.002828, "loss": 1.4544, "step": 162816 }, { "epoch": 12.223639774859286, "grad_norm": 0.5909858345985413, "learning_rate": 0.002828, "loss": 1.4565, "step": 162880 }, { "epoch": 12.22844277673546, "grad_norm": 0.5490917563438416, "learning_rate": 0.002828, "loss": 1.4517, "step": 162944 }, { "epoch": 12.233245778611632, "grad_norm": 0.5376546382904053, "learning_rate": 0.002828, "loss": 1.4559, "step": 163008 }, { "epoch": 12.238048780487805, "grad_norm": 0.48789891600608826, "learning_rate": 0.002828, "loss": 1.4436, "step": 163072 }, { "epoch": 12.242851782363978, "grad_norm": 0.5413309335708618, "learning_rate": 0.002828, "loss": 1.4566, "step": 163136 }, { "epoch": 12.24765478424015, "grad_norm": 0.6094352006912231, "learning_rate": 0.002828, "loss": 1.4552, "step": 163200 }, { "epoch": 12.252457786116322, "grad_norm": 0.5055562853813171, "learning_rate": 0.002828, "loss": 1.4533, "step": 163264 }, { "epoch": 12.257260787992495, "grad_norm": 0.47938257455825806, "learning_rate": 0.002828, "loss": 1.4466, "step": 163328 }, { "epoch": 12.262063789868668, "grad_norm": 0.5385513305664062, "learning_rate": 0.002828, "loss": 1.4508, "step": 163392 }, { "epoch": 12.26686679174484, "grad_norm": 0.5873063802719116, "learning_rate": 0.002828, "loss": 1.4475, "step": 163456 }, { "epoch": 12.271669793621014, "grad_norm": 0.4774555265903473, "learning_rate": 0.002828, "loss": 1.458, "step": 163520 }, { "epoch": 12.276472795497186, "grad_norm": 0.5163844227790833, "learning_rate": 0.002828, "loss": 1.4489, "step": 163584 }, { "epoch": 12.281275797373358, "grad_norm": 0.5182616114616394, "learning_rate": 0.002828, "loss": 1.4518, "step": 163648 }, { "epoch": 12.28607879924953, "grad_norm": 0.49142399430274963, "learning_rate": 0.002828, "loss": 1.4539, "step": 163712 }, { "epoch": 12.290881801125703, "grad_norm": 0.5692216753959656, "learning_rate": 0.002828, "loss": 1.4644, "step": 163776 }, { "epoch": 12.295684803001876, "grad_norm": 0.46027058362960815, "learning_rate": 0.002828, "loss": 1.4474, "step": 163840 }, { "epoch": 12.300487804878049, "grad_norm": 0.46703729033470154, "learning_rate": 0.002828, "loss": 1.4508, "step": 163904 }, { "epoch": 12.305290806754222, "grad_norm": 0.500545859336853, "learning_rate": 0.002828, "loss": 1.455, "step": 163968 }, { "epoch": 12.310093808630395, "grad_norm": 0.5360391736030579, "learning_rate": 0.002828, "loss": 1.4587, "step": 164032 }, { "epoch": 12.314896810506566, "grad_norm": 0.4937238097190857, "learning_rate": 0.002828, "loss": 1.4573, "step": 164096 }, { "epoch": 12.319699812382739, "grad_norm": 0.5751468539237976, "learning_rate": 0.002828, "loss": 1.4583, "step": 164160 }, { "epoch": 12.324502814258912, "grad_norm": 0.5222389698028564, "learning_rate": 0.002828, "loss": 1.4541, "step": 164224 }, { "epoch": 12.329305816135085, "grad_norm": 0.4793798327445984, "learning_rate": 0.002828, "loss": 1.4617, "step": 164288 }, { "epoch": 12.334108818011257, "grad_norm": 0.5237858295440674, "learning_rate": 0.002828, "loss": 1.4543, "step": 164352 }, { "epoch": 12.33891181988743, "grad_norm": 0.5332927107810974, "learning_rate": 0.002828, "loss": 1.4529, "step": 164416 }, { "epoch": 12.343714821763601, "grad_norm": 0.5909544825553894, "learning_rate": 0.002828, "loss": 1.453, "step": 164480 }, { "epoch": 12.348517823639774, "grad_norm": 0.49265050888061523, "learning_rate": 0.002828, "loss": 1.456, "step": 164544 }, { "epoch": 12.353320825515947, "grad_norm": 0.5843788385391235, "learning_rate": 0.002828, "loss": 1.4516, "step": 164608 }, { "epoch": 12.35812382739212, "grad_norm": 0.5406148433685303, "learning_rate": 0.002828, "loss": 1.4583, "step": 164672 }, { "epoch": 12.362926829268293, "grad_norm": 0.5167796015739441, "learning_rate": 0.002828, "loss": 1.4493, "step": 164736 }, { "epoch": 12.367729831144466, "grad_norm": 0.48411092162132263, "learning_rate": 0.002828, "loss": 1.4511, "step": 164800 }, { "epoch": 12.372532833020639, "grad_norm": 0.44566309452056885, "learning_rate": 0.002828, "loss": 1.4623, "step": 164864 }, { "epoch": 12.37733583489681, "grad_norm": 0.5984368920326233, "learning_rate": 0.002828, "loss": 1.4604, "step": 164928 }, { "epoch": 12.382138836772983, "grad_norm": 0.5471071600914001, "learning_rate": 0.002828, "loss": 1.4584, "step": 164992 }, { "epoch": 12.386941838649156, "grad_norm": 0.5406402349472046, "learning_rate": 0.002828, "loss": 1.4611, "step": 165056 }, { "epoch": 12.391744840525329, "grad_norm": 0.4552817642688751, "learning_rate": 0.002828, "loss": 1.4553, "step": 165120 }, { "epoch": 12.396547842401501, "grad_norm": 0.569806694984436, "learning_rate": 0.002828, "loss": 1.4587, "step": 165184 }, { "epoch": 12.401350844277674, "grad_norm": 0.6869778037071228, "learning_rate": 0.002828, "loss": 1.4581, "step": 165248 }, { "epoch": 12.406153846153845, "grad_norm": 0.5332195162773132, "learning_rate": 0.002828, "loss": 1.4506, "step": 165312 }, { "epoch": 12.410956848030018, "grad_norm": 0.5285490155220032, "learning_rate": 0.002828, "loss": 1.4544, "step": 165376 }, { "epoch": 12.415759849906191, "grad_norm": 0.5308892726898193, "learning_rate": 0.002828, "loss": 1.4565, "step": 165440 }, { "epoch": 12.420562851782364, "grad_norm": 0.47045472264289856, "learning_rate": 0.002828, "loss": 1.458, "step": 165504 }, { "epoch": 12.425365853658537, "grad_norm": 0.41003862023353577, "learning_rate": 0.002828, "loss": 1.4614, "step": 165568 }, { "epoch": 12.43016885553471, "grad_norm": 0.48407503962516785, "learning_rate": 0.002828, "loss": 1.4541, "step": 165632 }, { "epoch": 12.434971857410881, "grad_norm": 0.5479405522346497, "learning_rate": 0.002828, "loss": 1.4584, "step": 165696 }, { "epoch": 12.439774859287054, "grad_norm": 0.4842681586742401, "learning_rate": 0.002828, "loss": 1.4513, "step": 165760 }, { "epoch": 12.444577861163227, "grad_norm": 0.5198821425437927, "learning_rate": 0.002828, "loss": 1.4579, "step": 165824 }, { "epoch": 12.4493808630394, "grad_norm": 0.5267767310142517, "learning_rate": 0.002828, "loss": 1.4562, "step": 165888 }, { "epoch": 12.454183864915572, "grad_norm": 0.5741159319877625, "learning_rate": 0.002828, "loss": 1.4495, "step": 165952 }, { "epoch": 12.458986866791745, "grad_norm": 0.5260004997253418, "learning_rate": 0.002828, "loss": 1.4529, "step": 166016 }, { "epoch": 12.463789868667918, "grad_norm": 0.4495656490325928, "learning_rate": 0.002828, "loss": 1.4629, "step": 166080 }, { "epoch": 12.46859287054409, "grad_norm": 0.5246948599815369, "learning_rate": 0.002828, "loss": 1.4578, "step": 166144 }, { "epoch": 12.473395872420262, "grad_norm": 0.6091532707214355, "learning_rate": 0.002828, "loss": 1.4579, "step": 166208 }, { "epoch": 12.478198874296435, "grad_norm": 0.4875415563583374, "learning_rate": 0.002828, "loss": 1.4587, "step": 166272 }, { "epoch": 12.483001876172608, "grad_norm": 0.507404625415802, "learning_rate": 0.002828, "loss": 1.4567, "step": 166336 }, { "epoch": 12.487804878048781, "grad_norm": 0.5386779308319092, "learning_rate": 0.002828, "loss": 1.4536, "step": 166400 }, { "epoch": 12.492607879924954, "grad_norm": 0.475137323141098, "learning_rate": 0.002828, "loss": 1.4544, "step": 166464 }, { "epoch": 12.497410881801125, "grad_norm": 0.49984148144721985, "learning_rate": 0.002828, "loss": 1.4582, "step": 166528 }, { "epoch": 12.502213883677298, "grad_norm": 0.4507026672363281, "learning_rate": 0.002828, "loss": 1.4583, "step": 166592 }, { "epoch": 12.50701688555347, "grad_norm": 0.49608319997787476, "learning_rate": 0.002828, "loss": 1.4612, "step": 166656 }, { "epoch": 12.511819887429644, "grad_norm": 0.5014333128929138, "learning_rate": 0.002828, "loss": 1.4565, "step": 166720 }, { "epoch": 12.516622889305816, "grad_norm": 0.45654651522636414, "learning_rate": 0.002828, "loss": 1.4641, "step": 166784 }, { "epoch": 12.52142589118199, "grad_norm": 0.5392569303512573, "learning_rate": 0.002828, "loss": 1.4652, "step": 166848 }, { "epoch": 12.52622889305816, "grad_norm": 0.5011345148086548, "learning_rate": 0.002828, "loss": 1.4621, "step": 166912 }, { "epoch": 12.531031894934333, "grad_norm": 0.5161344408988953, "learning_rate": 0.002828, "loss": 1.4655, "step": 166976 }, { "epoch": 12.535834896810506, "grad_norm": 0.7373325824737549, "learning_rate": 0.002828, "loss": 1.4647, "step": 167040 }, { "epoch": 12.540637898686679, "grad_norm": 0.5518906116485596, "learning_rate": 0.002828, "loss": 1.4565, "step": 167104 }, { "epoch": 12.545440900562852, "grad_norm": 0.4990812838077545, "learning_rate": 0.002828, "loss": 1.4629, "step": 167168 }, { "epoch": 12.550243902439025, "grad_norm": 0.4953933656215668, "learning_rate": 0.002828, "loss": 1.4595, "step": 167232 }, { "epoch": 12.555046904315198, "grad_norm": 0.4726664125919342, "learning_rate": 0.002828, "loss": 1.4622, "step": 167296 }, { "epoch": 12.559849906191369, "grad_norm": 0.4469330310821533, "learning_rate": 0.002828, "loss": 1.4604, "step": 167360 }, { "epoch": 12.564652908067542, "grad_norm": 0.48912984132766724, "learning_rate": 0.002828, "loss": 1.4563, "step": 167424 }, { "epoch": 12.569455909943715, "grad_norm": 0.6089437007904053, "learning_rate": 0.002828, "loss": 1.4596, "step": 167488 }, { "epoch": 12.574258911819888, "grad_norm": 0.4961417019367218, "learning_rate": 0.002828, "loss": 1.4605, "step": 167552 }, { "epoch": 12.57906191369606, "grad_norm": 0.4331898093223572, "learning_rate": 0.002828, "loss": 1.4646, "step": 167616 }, { "epoch": 12.583864915572233, "grad_norm": 0.515690803527832, "learning_rate": 0.002828, "loss": 1.4604, "step": 167680 }, { "epoch": 12.588667917448404, "grad_norm": 0.4997495412826538, "learning_rate": 0.002828, "loss": 1.4633, "step": 167744 }, { "epoch": 12.593470919324577, "grad_norm": 0.4562072157859802, "learning_rate": 0.002828, "loss": 1.4603, "step": 167808 }, { "epoch": 12.59827392120075, "grad_norm": 0.46387189626693726, "learning_rate": 0.002828, "loss": 1.4592, "step": 167872 }, { "epoch": 12.603076923076923, "grad_norm": 0.46868836879730225, "learning_rate": 0.002828, "loss": 1.4568, "step": 167936 }, { "epoch": 12.607879924953096, "grad_norm": 0.5087418556213379, "learning_rate": 0.002828, "loss": 1.4514, "step": 168000 }, { "epoch": 12.612682926829269, "grad_norm": 0.5555745959281921, "learning_rate": 0.002828, "loss": 1.4537, "step": 168064 }, { "epoch": 12.617485928705442, "grad_norm": 0.49841299653053284, "learning_rate": 0.002828, "loss": 1.4565, "step": 168128 }, { "epoch": 12.622288930581613, "grad_norm": 0.5797818899154663, "learning_rate": 0.002828, "loss": 1.4518, "step": 168192 }, { "epoch": 12.627091932457786, "grad_norm": 0.5726368427276611, "learning_rate": 0.002828, "loss": 1.4611, "step": 168256 }, { "epoch": 12.631894934333959, "grad_norm": 0.5542452335357666, "learning_rate": 0.002828, "loss": 1.4522, "step": 168320 }, { "epoch": 12.636697936210131, "grad_norm": 0.42682501673698425, "learning_rate": 0.002828, "loss": 1.456, "step": 168384 }, { "epoch": 12.641500938086304, "grad_norm": 0.4608093202114105, "learning_rate": 0.002828, "loss": 1.4614, "step": 168448 }, { "epoch": 12.646303939962477, "grad_norm": 0.6001752614974976, "learning_rate": 0.002828, "loss": 1.461, "step": 168512 }, { "epoch": 12.651106941838648, "grad_norm": 0.5241263508796692, "learning_rate": 0.002828, "loss": 1.4613, "step": 168576 }, { "epoch": 12.655909943714821, "grad_norm": 0.5729988813400269, "learning_rate": 0.002828, "loss": 1.4539, "step": 168640 }, { "epoch": 12.660712945590994, "grad_norm": 0.5749446153640747, "learning_rate": 0.002828, "loss": 1.4538, "step": 168704 }, { "epoch": 12.665515947467167, "grad_norm": 0.45133233070373535, "learning_rate": 0.002828, "loss": 1.4627, "step": 168768 }, { "epoch": 12.67031894934334, "grad_norm": 0.583003044128418, "learning_rate": 0.002828, "loss": 1.4638, "step": 168832 }, { "epoch": 12.675121951219513, "grad_norm": 0.5264136791229248, "learning_rate": 0.002828, "loss": 1.4579, "step": 168896 }, { "epoch": 12.679924953095686, "grad_norm": 0.5577759146690369, "learning_rate": 0.002828, "loss": 1.4613, "step": 168960 }, { "epoch": 12.684727954971857, "grad_norm": 0.5690147876739502, "learning_rate": 0.002828, "loss": 1.4583, "step": 169024 }, { "epoch": 12.68953095684803, "grad_norm": 0.5202603340148926, "learning_rate": 0.002828, "loss": 1.4605, "step": 169088 }, { "epoch": 12.694333958724203, "grad_norm": 0.6959776282310486, "learning_rate": 0.002828, "loss": 1.4665, "step": 169152 }, { "epoch": 12.699136960600375, "grad_norm": 0.5649484395980835, "learning_rate": 0.002828, "loss": 1.4547, "step": 169216 }, { "epoch": 12.703939962476548, "grad_norm": 0.5574206709861755, "learning_rate": 0.002828, "loss": 1.4602, "step": 169280 }, { "epoch": 12.708742964352721, "grad_norm": 0.4697055220603943, "learning_rate": 0.002828, "loss": 1.4622, "step": 169344 }, { "epoch": 12.713545966228892, "grad_norm": 0.5403327345848083, "learning_rate": 0.002828, "loss": 1.4602, "step": 169408 }, { "epoch": 12.718348968105065, "grad_norm": 0.5151571035385132, "learning_rate": 0.002828, "loss": 1.4611, "step": 169472 }, { "epoch": 12.723151969981238, "grad_norm": 0.48533812165260315, "learning_rate": 0.002828, "loss": 1.4651, "step": 169536 }, { "epoch": 12.727954971857411, "grad_norm": 0.5039843916893005, "learning_rate": 0.002828, "loss": 1.4582, "step": 169600 }, { "epoch": 12.732757973733584, "grad_norm": 0.5662795901298523, "learning_rate": 0.002828, "loss": 1.4576, "step": 169664 }, { "epoch": 12.737560975609757, "grad_norm": 0.568792998790741, "learning_rate": 0.002828, "loss": 1.4585, "step": 169728 }, { "epoch": 12.74236397748593, "grad_norm": 0.5497303009033203, "learning_rate": 0.002828, "loss": 1.4583, "step": 169792 }, { "epoch": 12.7471669793621, "grad_norm": 0.49786439538002014, "learning_rate": 0.002828, "loss": 1.4639, "step": 169856 }, { "epoch": 12.751969981238274, "grad_norm": 0.6249164342880249, "learning_rate": 0.002828, "loss": 1.4561, "step": 169920 }, { "epoch": 12.756772983114447, "grad_norm": 0.49063023924827576, "learning_rate": 0.002828, "loss": 1.4588, "step": 169984 }, { "epoch": 12.76157598499062, "grad_norm": 0.4647848904132843, "learning_rate": 0.002828, "loss": 1.4608, "step": 170048 }, { "epoch": 12.766378986866792, "grad_norm": 0.5198040008544922, "learning_rate": 0.002828, "loss": 1.4576, "step": 170112 }, { "epoch": 12.771181988742965, "grad_norm": 0.5746068358421326, "learning_rate": 0.002828, "loss": 1.4588, "step": 170176 }, { "epoch": 12.775984990619136, "grad_norm": 0.48485812544822693, "learning_rate": 0.002828, "loss": 1.4625, "step": 170240 }, { "epoch": 12.78078799249531, "grad_norm": 0.43870824575424194, "learning_rate": 0.002828, "loss": 1.4587, "step": 170304 }, { "epoch": 12.785590994371482, "grad_norm": 0.5836806893348694, "learning_rate": 0.002828, "loss": 1.4591, "step": 170368 }, { "epoch": 12.790393996247655, "grad_norm": 0.4936177432537079, "learning_rate": 0.002828, "loss": 1.4596, "step": 170432 }, { "epoch": 12.795196998123828, "grad_norm": 0.5804800987243652, "learning_rate": 0.002828, "loss": 1.4684, "step": 170496 }, { "epoch": 12.8, "grad_norm": 0.5017887353897095, "learning_rate": 0.002828, "loss": 1.459, "step": 170560 }, { "epoch": 12.804803001876172, "grad_norm": 0.5359672904014587, "learning_rate": 0.002828, "loss": 1.4555, "step": 170624 }, { "epoch": 12.809606003752345, "grad_norm": 0.592132031917572, "learning_rate": 0.002828, "loss": 1.4495, "step": 170688 }, { "epoch": 12.814409005628518, "grad_norm": 0.5940999388694763, "learning_rate": 0.002828, "loss": 1.451, "step": 170752 }, { "epoch": 12.81921200750469, "grad_norm": 0.4877603352069855, "learning_rate": 0.002828, "loss": 1.4562, "step": 170816 }, { "epoch": 12.824015009380863, "grad_norm": 0.5055379271507263, "learning_rate": 0.002828, "loss": 1.4593, "step": 170880 }, { "epoch": 12.828818011257036, "grad_norm": 0.5798386335372925, "learning_rate": 0.002828, "loss": 1.4549, "step": 170944 }, { "epoch": 12.833621013133207, "grad_norm": 0.5174988508224487, "learning_rate": 0.002828, "loss": 1.4614, "step": 171008 }, { "epoch": 12.83842401500938, "grad_norm": 0.5694937109947205, "learning_rate": 0.002828, "loss": 1.4595, "step": 171072 }, { "epoch": 12.843227016885553, "grad_norm": 0.5167633891105652, "learning_rate": 0.002828, "loss": 1.4569, "step": 171136 }, { "epoch": 12.848030018761726, "grad_norm": 0.5683608055114746, "learning_rate": 0.002828, "loss": 1.4556, "step": 171200 }, { "epoch": 12.852833020637899, "grad_norm": 0.5424241423606873, "learning_rate": 0.002828, "loss": 1.456, "step": 171264 }, { "epoch": 12.857636022514072, "grad_norm": 0.49074310064315796, "learning_rate": 0.002828, "loss": 1.4585, "step": 171328 }, { "epoch": 12.862439024390245, "grad_norm": 0.5989189743995667, "learning_rate": 0.002828, "loss": 1.4654, "step": 171392 }, { "epoch": 12.867242026266416, "grad_norm": 0.5615851879119873, "learning_rate": 0.002828, "loss": 1.4615, "step": 171456 }, { "epoch": 12.872045028142589, "grad_norm": 0.5346879959106445, "learning_rate": 0.002828, "loss": 1.4544, "step": 171520 }, { "epoch": 12.876848030018762, "grad_norm": 0.5329033136367798, "learning_rate": 0.002828, "loss": 1.4625, "step": 171584 }, { "epoch": 12.881651031894934, "grad_norm": 0.4970327317714691, "learning_rate": 0.002828, "loss": 1.459, "step": 171648 }, { "epoch": 12.886454033771107, "grad_norm": 0.5486003160476685, "learning_rate": 0.002828, "loss": 1.4543, "step": 171712 }, { "epoch": 12.89125703564728, "grad_norm": 0.4972151815891266, "learning_rate": 0.002828, "loss": 1.4608, "step": 171776 }, { "epoch": 12.896060037523451, "grad_norm": 0.4709850549697876, "learning_rate": 0.002828, "loss": 1.462, "step": 171840 }, { "epoch": 12.900863039399624, "grad_norm": 0.548951268196106, "learning_rate": 0.002828, "loss": 1.4526, "step": 171904 }, { "epoch": 12.905666041275797, "grad_norm": 0.6316720843315125, "learning_rate": 0.002828, "loss": 1.4548, "step": 171968 }, { "epoch": 12.91046904315197, "grad_norm": 0.711441159248352, "learning_rate": 0.002828, "loss": 1.4544, "step": 172032 }, { "epoch": 12.915272045028143, "grad_norm": 0.5292863845825195, "learning_rate": 0.002828, "loss": 1.4618, "step": 172096 }, { "epoch": 12.920075046904316, "grad_norm": 0.5618671178817749, "learning_rate": 0.002828, "loss": 1.4585, "step": 172160 }, { "epoch": 12.924878048780489, "grad_norm": 0.5491626262664795, "learning_rate": 0.002828, "loss": 1.4605, "step": 172224 }, { "epoch": 12.92968105065666, "grad_norm": 0.5500079393386841, "learning_rate": 0.002828, "loss": 1.4642, "step": 172288 }, { "epoch": 12.934484052532833, "grad_norm": 0.761467456817627, "learning_rate": 0.002828, "loss": 1.4611, "step": 172352 }, { "epoch": 12.939287054409006, "grad_norm": 0.4791053235530853, "learning_rate": 0.002828, "loss": 1.4521, "step": 172416 }, { "epoch": 12.944090056285178, "grad_norm": 0.5197027921676636, "learning_rate": 0.002828, "loss": 1.4569, "step": 172480 }, { "epoch": 12.948893058161351, "grad_norm": 0.4713915288448334, "learning_rate": 0.002828, "loss": 1.4586, "step": 172544 }, { "epoch": 12.953696060037524, "grad_norm": 0.5665622353553772, "learning_rate": 0.002828, "loss": 1.448, "step": 172608 }, { "epoch": 12.958499061913695, "grad_norm": 0.4697657823562622, "learning_rate": 0.002828, "loss": 1.4615, "step": 172672 }, { "epoch": 12.963302063789868, "grad_norm": 0.5036899447441101, "learning_rate": 0.002828, "loss": 1.4704, "step": 172736 }, { "epoch": 12.968105065666041, "grad_norm": 0.5290060639381409, "learning_rate": 0.002828, "loss": 1.4646, "step": 172800 }, { "epoch": 12.972908067542214, "grad_norm": 0.4270978271961212, "learning_rate": 0.002828, "loss": 1.4542, "step": 172864 }, { "epoch": 12.977711069418387, "grad_norm": 0.5034455060958862, "learning_rate": 0.002828, "loss": 1.4541, "step": 172928 }, { "epoch": 12.98251407129456, "grad_norm": 0.5570845603942871, "learning_rate": 0.002828, "loss": 1.4559, "step": 172992 }, { "epoch": 12.987317073170733, "grad_norm": 0.5905221700668335, "learning_rate": 0.002828, "loss": 1.4593, "step": 173056 }, { "epoch": 12.992120075046904, "grad_norm": 0.48616600036621094, "learning_rate": 0.002828, "loss": 1.462, "step": 173120 }, { "epoch": 12.996923076923077, "grad_norm": 0.5468747615814209, "learning_rate": 0.002828, "loss": 1.4559, "step": 173184 }, { "epoch": 13.00172607879925, "grad_norm": 0.5406197905540466, "learning_rate": 0.002828, "loss": 1.4456, "step": 173248 }, { "epoch": 13.006529080675422, "grad_norm": 0.6132946014404297, "learning_rate": 0.002828, "loss": 1.4222, "step": 173312 }, { "epoch": 13.011332082551595, "grad_norm": 0.6026862859725952, "learning_rate": 0.002828, "loss": 1.4131, "step": 173376 }, { "epoch": 13.016135084427768, "grad_norm": 0.49466168880462646, "learning_rate": 0.002828, "loss": 1.4189, "step": 173440 }, { "epoch": 13.02093808630394, "grad_norm": 0.4750056862831116, "learning_rate": 0.002828, "loss": 1.4215, "step": 173504 }, { "epoch": 13.025741088180112, "grad_norm": 0.6869794726371765, "learning_rate": 0.002828, "loss": 1.4197, "step": 173568 }, { "epoch": 13.030544090056285, "grad_norm": 0.5326526761054993, "learning_rate": 0.002828, "loss": 1.4168, "step": 173632 }, { "epoch": 13.035347091932458, "grad_norm": 0.5036327242851257, "learning_rate": 0.002828, "loss": 1.4156, "step": 173696 }, { "epoch": 13.04015009380863, "grad_norm": 0.5569616556167603, "learning_rate": 0.002828, "loss": 1.42, "step": 173760 }, { "epoch": 13.044953095684804, "grad_norm": 0.5766422152519226, "learning_rate": 0.002828, "loss": 1.4208, "step": 173824 }, { "epoch": 13.049756097560975, "grad_norm": 0.570945143699646, "learning_rate": 0.002828, "loss": 1.4199, "step": 173888 }, { "epoch": 13.054559099437148, "grad_norm": 0.4492458403110504, "learning_rate": 0.002828, "loss": 1.4163, "step": 173952 }, { "epoch": 13.05936210131332, "grad_norm": 0.5762324929237366, "learning_rate": 0.002828, "loss": 1.4224, "step": 174016 }, { "epoch": 13.064165103189493, "grad_norm": 0.5214763283729553, "learning_rate": 0.002828, "loss": 1.4176, "step": 174080 }, { "epoch": 13.068968105065666, "grad_norm": 0.49345290660858154, "learning_rate": 0.002828, "loss": 1.4207, "step": 174144 }, { "epoch": 13.07377110694184, "grad_norm": 0.5769919753074646, "learning_rate": 0.002828, "loss": 1.4256, "step": 174208 }, { "epoch": 13.078574108818012, "grad_norm": 0.5672862529754639, "learning_rate": 0.002828, "loss": 1.4269, "step": 174272 }, { "epoch": 13.083377110694183, "grad_norm": 0.6378344893455505, "learning_rate": 0.002828, "loss": 1.4198, "step": 174336 }, { "epoch": 13.088180112570356, "grad_norm": 0.5776243805885315, "learning_rate": 0.002828, "loss": 1.4198, "step": 174400 }, { "epoch": 13.092983114446529, "grad_norm": 0.5405304431915283, "learning_rate": 0.002828, "loss": 1.4225, "step": 174464 }, { "epoch": 13.097786116322702, "grad_norm": 0.47602930665016174, "learning_rate": 0.002828, "loss": 1.4244, "step": 174528 }, { "epoch": 13.102589118198875, "grad_norm": 0.554095447063446, "learning_rate": 0.002828, "loss": 1.4115, "step": 174592 }, { "epoch": 13.107392120075048, "grad_norm": 0.49326562881469727, "learning_rate": 0.002828, "loss": 1.4206, "step": 174656 }, { "epoch": 13.112195121951219, "grad_norm": 0.5742772221565247, "learning_rate": 0.002828, "loss": 1.4267, "step": 174720 }, { "epoch": 13.116998123827392, "grad_norm": 0.5664711594581604, "learning_rate": 0.002828, "loss": 1.4201, "step": 174784 }, { "epoch": 13.121801125703564, "grad_norm": 0.5422520041465759, "learning_rate": 0.002828, "loss": 1.4205, "step": 174848 }, { "epoch": 13.126604127579737, "grad_norm": 0.5431873798370361, "learning_rate": 0.002828, "loss": 1.4203, "step": 174912 }, { "epoch": 13.13140712945591, "grad_norm": 0.5788468718528748, "learning_rate": 0.002828, "loss": 1.4241, "step": 174976 }, { "epoch": 13.136210131332083, "grad_norm": 0.6146829724311829, "learning_rate": 0.002828, "loss": 1.4238, "step": 175040 }, { "epoch": 13.141013133208254, "grad_norm": 0.44153454899787903, "learning_rate": 0.002828, "loss": 1.4233, "step": 175104 }, { "epoch": 13.145816135084427, "grad_norm": 0.4862412214279175, "learning_rate": 0.002828, "loss": 1.4227, "step": 175168 }, { "epoch": 13.1506191369606, "grad_norm": 0.5295515060424805, "learning_rate": 0.002828, "loss": 1.4242, "step": 175232 }, { "epoch": 13.155422138836773, "grad_norm": 0.4826089143753052, "learning_rate": 0.002828, "loss": 1.4219, "step": 175296 }, { "epoch": 13.160225140712946, "grad_norm": 0.5136275291442871, "learning_rate": 0.002828, "loss": 1.421, "step": 175360 }, { "epoch": 13.165028142589119, "grad_norm": 0.5050927996635437, "learning_rate": 0.002828, "loss": 1.4283, "step": 175424 }, { "epoch": 13.169831144465292, "grad_norm": 0.4709009528160095, "learning_rate": 0.002828, "loss": 1.4244, "step": 175488 }, { "epoch": 13.174634146341463, "grad_norm": 0.5698823928833008, "learning_rate": 0.002828, "loss": 1.4242, "step": 175552 }, { "epoch": 13.179437148217636, "grad_norm": 0.5443013310432434, "learning_rate": 0.002828, "loss": 1.4245, "step": 175616 }, { "epoch": 13.184240150093808, "grad_norm": 0.5437474846839905, "learning_rate": 0.002828, "loss": 1.4211, "step": 175680 }, { "epoch": 13.189043151969981, "grad_norm": 0.5265381336212158, "learning_rate": 0.002828, "loss": 1.4228, "step": 175744 }, { "epoch": 13.193846153846154, "grad_norm": 0.5583203434944153, "learning_rate": 0.002828, "loss": 1.4257, "step": 175808 }, { "epoch": 13.198649155722327, "grad_norm": 0.5392881035804749, "learning_rate": 0.002828, "loss": 1.4245, "step": 175872 }, { "epoch": 13.203452157598498, "grad_norm": 0.5564644932746887, "learning_rate": 0.002828, "loss": 1.4226, "step": 175936 }, { "epoch": 13.208255159474671, "grad_norm": 0.5897397994995117, "learning_rate": 0.002828, "loss": 1.4283, "step": 176000 }, { "epoch": 13.213058161350844, "grad_norm": 0.48577988147735596, "learning_rate": 0.002828, "loss": 1.4227, "step": 176064 }, { "epoch": 13.217861163227017, "grad_norm": 0.5362162590026855, "learning_rate": 0.002828, "loss": 1.4212, "step": 176128 }, { "epoch": 13.22266416510319, "grad_norm": 0.4959189295768738, "learning_rate": 0.002828, "loss": 1.4266, "step": 176192 }, { "epoch": 13.227467166979363, "grad_norm": 0.4970269203186035, "learning_rate": 0.002828, "loss": 1.4181, "step": 176256 }, { "epoch": 13.232270168855536, "grad_norm": 0.6063593029975891, "learning_rate": 0.002828, "loss": 1.4228, "step": 176320 }, { "epoch": 13.237073170731707, "grad_norm": 0.5324926972389221, "learning_rate": 0.002828, "loss": 1.4233, "step": 176384 }, { "epoch": 13.24187617260788, "grad_norm": 0.6734857559204102, "learning_rate": 0.002828, "loss": 1.4236, "step": 176448 }, { "epoch": 13.246679174484052, "grad_norm": 0.5381393432617188, "learning_rate": 0.002828, "loss": 1.4191, "step": 176512 }, { "epoch": 13.251482176360225, "grad_norm": 0.6253378391265869, "learning_rate": 0.002828, "loss": 1.424, "step": 176576 }, { "epoch": 13.256285178236398, "grad_norm": 0.4672098755836487, "learning_rate": 0.002828, "loss": 1.4212, "step": 176640 }, { "epoch": 13.261088180112571, "grad_norm": 0.6410490870475769, "learning_rate": 0.002828, "loss": 1.4266, "step": 176704 }, { "epoch": 13.265891181988742, "grad_norm": 0.6024436354637146, "learning_rate": 0.002828, "loss": 1.4216, "step": 176768 }, { "epoch": 13.270694183864915, "grad_norm": 0.6217461228370667, "learning_rate": 0.002828, "loss": 1.4252, "step": 176832 }, { "epoch": 13.275497185741088, "grad_norm": 0.7444032430648804, "learning_rate": 0.002828, "loss": 1.43, "step": 176896 }, { "epoch": 13.28030018761726, "grad_norm": 0.5802358984947205, "learning_rate": 0.002828, "loss": 1.4287, "step": 176960 }, { "epoch": 13.285103189493434, "grad_norm": 0.4666067361831665, "learning_rate": 0.002828, "loss": 1.4229, "step": 177024 }, { "epoch": 13.289906191369607, "grad_norm": 0.4871880114078522, "learning_rate": 0.002828, "loss": 1.4292, "step": 177088 }, { "epoch": 13.29470919324578, "grad_norm": 0.5733477473258972, "learning_rate": 0.002828, "loss": 1.4227, "step": 177152 }, { "epoch": 13.29951219512195, "grad_norm": 0.6557471752166748, "learning_rate": 0.002828, "loss": 1.4365, "step": 177216 }, { "epoch": 13.304315196998123, "grad_norm": 0.478577584028244, "learning_rate": 0.002828, "loss": 1.4361, "step": 177280 }, { "epoch": 13.309118198874296, "grad_norm": 0.5637518167495728, "learning_rate": 0.002828, "loss": 1.4278, "step": 177344 }, { "epoch": 13.31392120075047, "grad_norm": 0.561852216720581, "learning_rate": 0.002828, "loss": 1.4278, "step": 177408 }, { "epoch": 13.318724202626642, "grad_norm": 0.5598611235618591, "learning_rate": 0.002828, "loss": 1.4292, "step": 177472 }, { "epoch": 13.323527204502815, "grad_norm": 0.5581865310668945, "learning_rate": 0.002828, "loss": 1.4317, "step": 177536 }, { "epoch": 13.328330206378986, "grad_norm": 0.42338356375694275, "learning_rate": 0.002828, "loss": 1.4261, "step": 177600 }, { "epoch": 13.333133208255159, "grad_norm": 0.49160394072532654, "learning_rate": 0.002828, "loss": 1.4244, "step": 177664 }, { "epoch": 13.337936210131332, "grad_norm": 0.4856988787651062, "learning_rate": 0.002828, "loss": 1.4298, "step": 177728 }, { "epoch": 13.342739212007505, "grad_norm": 0.547420859336853, "learning_rate": 0.002828, "loss": 1.4238, "step": 177792 }, { "epoch": 13.347542213883678, "grad_norm": 0.5926133990287781, "learning_rate": 0.002828, "loss": 1.4258, "step": 177856 }, { "epoch": 13.35234521575985, "grad_norm": 0.5286599397659302, "learning_rate": 0.002828, "loss": 1.425, "step": 177920 }, { "epoch": 13.357148217636022, "grad_norm": 0.4704970121383667, "learning_rate": 0.002828, "loss": 1.4322, "step": 177984 }, { "epoch": 13.361951219512195, "grad_norm": 0.6747083067893982, "learning_rate": 0.002828, "loss": 1.4266, "step": 178048 }, { "epoch": 13.366754221388367, "grad_norm": 0.4974871873855591, "learning_rate": 0.002828, "loss": 1.4228, "step": 178112 }, { "epoch": 13.37155722326454, "grad_norm": 0.51888108253479, "learning_rate": 0.002828, "loss": 1.4265, "step": 178176 }, { "epoch": 13.376360225140713, "grad_norm": 0.6136510968208313, "learning_rate": 0.002828, "loss": 1.4304, "step": 178240 }, { "epoch": 13.381163227016886, "grad_norm": 0.5147790908813477, "learning_rate": 0.002828, "loss": 1.4339, "step": 178304 }, { "epoch": 13.385966228893059, "grad_norm": 0.572532594203949, "learning_rate": 0.002828, "loss": 1.4254, "step": 178368 }, { "epoch": 13.39076923076923, "grad_norm": 0.4552408754825592, "learning_rate": 0.002828, "loss": 1.4265, "step": 178432 }, { "epoch": 13.395572232645403, "grad_norm": 0.5829170346260071, "learning_rate": 0.002828, "loss": 1.4266, "step": 178496 }, { "epoch": 13.400375234521576, "grad_norm": 0.43553870916366577, "learning_rate": 0.002828, "loss": 1.4339, "step": 178560 }, { "epoch": 13.405178236397749, "grad_norm": 0.5179685354232788, "learning_rate": 0.002828, "loss": 1.4263, "step": 178624 }, { "epoch": 13.409981238273922, "grad_norm": 0.5742409825325012, "learning_rate": 0.002828, "loss": 1.4303, "step": 178688 }, { "epoch": 13.414784240150095, "grad_norm": 0.6354867815971375, "learning_rate": 0.002828, "loss": 1.4311, "step": 178752 }, { "epoch": 13.419587242026266, "grad_norm": 0.5913352370262146, "learning_rate": 0.002828, "loss": 1.4259, "step": 178816 }, { "epoch": 13.424390243902439, "grad_norm": 0.47367367148399353, "learning_rate": 0.002828, "loss": 1.4281, "step": 178880 }, { "epoch": 13.429193245778611, "grad_norm": 0.5402623414993286, "learning_rate": 0.002828, "loss": 1.4281, "step": 178944 }, { "epoch": 13.433996247654784, "grad_norm": 0.5336705446243286, "learning_rate": 0.002828, "loss": 1.4314, "step": 179008 }, { "epoch": 13.438799249530957, "grad_norm": 0.5876074433326721, "learning_rate": 0.002828, "loss": 1.4333, "step": 179072 }, { "epoch": 13.44360225140713, "grad_norm": 0.5205850005149841, "learning_rate": 0.002828, "loss": 1.4237, "step": 179136 }, { "epoch": 13.448405253283301, "grad_norm": 0.5328284502029419, "learning_rate": 0.002828, "loss": 1.4304, "step": 179200 }, { "epoch": 13.453208255159474, "grad_norm": 0.5152733325958252, "learning_rate": 0.002828, "loss": 1.429, "step": 179264 }, { "epoch": 13.458011257035647, "grad_norm": 0.45550423860549927, "learning_rate": 0.002828, "loss": 1.4347, "step": 179328 }, { "epoch": 13.46281425891182, "grad_norm": 0.485805869102478, "learning_rate": 0.002828, "loss": 1.4312, "step": 179392 }, { "epoch": 13.467617260787993, "grad_norm": 0.5946416258811951, "learning_rate": 0.002828, "loss": 1.422, "step": 179456 }, { "epoch": 13.472420262664166, "grad_norm": 0.5384697318077087, "learning_rate": 0.002828, "loss": 1.4328, "step": 179520 }, { "epoch": 13.477223264540338, "grad_norm": 0.48482996225357056, "learning_rate": 0.002828, "loss": 1.4267, "step": 179584 }, { "epoch": 13.48202626641651, "grad_norm": 0.4668543040752411, "learning_rate": 0.002828, "loss": 1.43, "step": 179648 }, { "epoch": 13.486829268292682, "grad_norm": 0.508421003818512, "learning_rate": 0.002828, "loss": 1.4295, "step": 179712 }, { "epoch": 13.491632270168855, "grad_norm": 0.5609977841377258, "learning_rate": 0.002828, "loss": 1.4242, "step": 179776 }, { "epoch": 13.496435272045028, "grad_norm": 0.43659254908561707, "learning_rate": 0.002828, "loss": 1.4306, "step": 179840 }, { "epoch": 13.501238273921201, "grad_norm": 0.5313818454742432, "learning_rate": 0.002828, "loss": 1.4317, "step": 179904 }, { "epoch": 13.506041275797374, "grad_norm": 0.5664373636245728, "learning_rate": 0.002828, "loss": 1.4326, "step": 179968 }, { "epoch": 13.510844277673545, "grad_norm": 0.5259975790977478, "learning_rate": 0.002828, "loss": 1.4283, "step": 180032 }, { "epoch": 13.515647279549718, "grad_norm": 0.5671187043190002, "learning_rate": 0.002828, "loss": 1.4305, "step": 180096 }, { "epoch": 13.520450281425891, "grad_norm": 0.593250036239624, "learning_rate": 0.002828, "loss": 1.433, "step": 180160 }, { "epoch": 13.525253283302064, "grad_norm": 0.4607378840446472, "learning_rate": 0.002828, "loss": 1.4254, "step": 180224 }, { "epoch": 13.530056285178237, "grad_norm": 0.4907410144805908, "learning_rate": 0.002828, "loss": 1.4331, "step": 180288 }, { "epoch": 13.53485928705441, "grad_norm": 0.6493052840232849, "learning_rate": 0.002828, "loss": 1.4391, "step": 180352 }, { "epoch": 13.539662288930582, "grad_norm": 0.5539810657501221, "learning_rate": 0.002828, "loss": 1.4323, "step": 180416 }, { "epoch": 13.544465290806754, "grad_norm": 0.5795511603355408, "learning_rate": 0.002828, "loss": 1.4378, "step": 180480 }, { "epoch": 13.549268292682926, "grad_norm": 0.5561606287956238, "learning_rate": 0.002828, "loss": 1.4298, "step": 180544 }, { "epoch": 13.5540712945591, "grad_norm": 0.49703845381736755, "learning_rate": 0.002828, "loss": 1.433, "step": 180608 }, { "epoch": 13.558874296435272, "grad_norm": 0.5289865136146545, "learning_rate": 0.002828, "loss": 1.4323, "step": 180672 }, { "epoch": 13.563677298311445, "grad_norm": 0.588064432144165, "learning_rate": 0.002828, "loss": 1.4278, "step": 180736 }, { "epoch": 13.568480300187618, "grad_norm": 0.5216802954673767, "learning_rate": 0.002828, "loss": 1.4262, "step": 180800 }, { "epoch": 13.573283302063789, "grad_norm": 0.6461803317070007, "learning_rate": 0.002828, "loss": 1.4399, "step": 180864 }, { "epoch": 13.578086303939962, "grad_norm": 0.4628434479236603, "learning_rate": 0.002828, "loss": 1.4294, "step": 180928 }, { "epoch": 13.582889305816135, "grad_norm": 0.630901038646698, "learning_rate": 0.002828, "loss": 1.4251, "step": 180992 }, { "epoch": 13.587692307692308, "grad_norm": 0.6294755339622498, "learning_rate": 0.002828, "loss": 1.4268, "step": 181056 }, { "epoch": 13.59249530956848, "grad_norm": 0.5399813055992126, "learning_rate": 0.002828, "loss": 1.4293, "step": 181120 }, { "epoch": 13.597298311444654, "grad_norm": 0.589293360710144, "learning_rate": 0.002828, "loss": 1.4352, "step": 181184 }, { "epoch": 13.602101313320826, "grad_norm": 0.5084601640701294, "learning_rate": 0.002828, "loss": 1.4342, "step": 181248 }, { "epoch": 13.606904315196998, "grad_norm": 0.5964998006820679, "learning_rate": 0.002828, "loss": 1.4355, "step": 181312 }, { "epoch": 13.61170731707317, "grad_norm": 0.48000743985176086, "learning_rate": 0.002828, "loss": 1.4298, "step": 181376 }, { "epoch": 13.616510318949343, "grad_norm": 0.5122075080871582, "learning_rate": 0.002828, "loss": 1.435, "step": 181440 }, { "epoch": 13.621313320825516, "grad_norm": 0.511771559715271, "learning_rate": 0.002828, "loss": 1.4284, "step": 181504 }, { "epoch": 13.626116322701689, "grad_norm": 0.5523917078971863, "learning_rate": 0.002828, "loss": 1.4267, "step": 181568 }, { "epoch": 13.630919324577862, "grad_norm": 0.48201820254325867, "learning_rate": 0.002828, "loss": 1.4286, "step": 181632 }, { "epoch": 13.635722326454033, "grad_norm": 0.5382052659988403, "learning_rate": 0.002828, "loss": 1.4371, "step": 181696 }, { "epoch": 13.640525328330206, "grad_norm": 0.5262125134468079, "learning_rate": 0.002828, "loss": 1.4329, "step": 181760 }, { "epoch": 13.645328330206379, "grad_norm": 0.5679051280021667, "learning_rate": 0.002828, "loss": 1.4376, "step": 181824 }, { "epoch": 13.650131332082552, "grad_norm": 0.5577001571655273, "learning_rate": 0.002828, "loss": 1.4318, "step": 181888 }, { "epoch": 13.654934333958725, "grad_norm": 0.6490796804428101, "learning_rate": 0.002828, "loss": 1.4339, "step": 181952 }, { "epoch": 13.659737335834897, "grad_norm": 0.5554207563400269, "learning_rate": 0.002828, "loss": 1.4373, "step": 182016 }, { "epoch": 13.66454033771107, "grad_norm": 0.4835376739501953, "learning_rate": 0.002828, "loss": 1.4353, "step": 182080 }, { "epoch": 13.669343339587241, "grad_norm": 0.4857877194881439, "learning_rate": 0.002828, "loss": 1.4319, "step": 182144 }, { "epoch": 13.674146341463414, "grad_norm": 0.5001787543296814, "learning_rate": 0.002828, "loss": 1.4292, "step": 182208 }, { "epoch": 13.678949343339587, "grad_norm": 0.6918062567710876, "learning_rate": 0.002828, "loss": 1.4337, "step": 182272 }, { "epoch": 13.68375234521576, "grad_norm": 0.6048857569694519, "learning_rate": 0.002828, "loss": 1.4298, "step": 182336 }, { "epoch": 13.688555347091933, "grad_norm": 0.5380560755729675, "learning_rate": 0.002828, "loss": 1.4282, "step": 182400 }, { "epoch": 13.693358348968106, "grad_norm": 0.534903883934021, "learning_rate": 0.002828, "loss": 1.4303, "step": 182464 }, { "epoch": 13.698161350844277, "grad_norm": 0.5099212527275085, "learning_rate": 0.002828, "loss": 1.4273, "step": 182528 }, { "epoch": 13.70296435272045, "grad_norm": 0.5878075957298279, "learning_rate": 0.002828, "loss": 1.4342, "step": 182592 }, { "epoch": 13.707767354596623, "grad_norm": 0.5337736010551453, "learning_rate": 0.002828, "loss": 1.4289, "step": 182656 }, { "epoch": 13.712570356472796, "grad_norm": 0.5323655605316162, "learning_rate": 0.002828, "loss": 1.4341, "step": 182720 }, { "epoch": 13.717373358348969, "grad_norm": 0.4101346433162689, "learning_rate": 0.002828, "loss": 1.4336, "step": 182784 }, { "epoch": 13.722176360225141, "grad_norm": 0.6183751225471497, "learning_rate": 0.002828, "loss": 1.4309, "step": 182848 }, { "epoch": 13.726979362101313, "grad_norm": 0.6076233983039856, "learning_rate": 0.002828, "loss": 1.4309, "step": 182912 }, { "epoch": 13.731782363977485, "grad_norm": 0.5721030831336975, "learning_rate": 0.002828, "loss": 1.4325, "step": 182976 }, { "epoch": 13.736585365853658, "grad_norm": 0.55230712890625, "learning_rate": 0.002828, "loss": 1.4294, "step": 183040 }, { "epoch": 13.741388367729831, "grad_norm": 0.5440775752067566, "learning_rate": 0.002828, "loss": 1.4313, "step": 183104 }, { "epoch": 13.746191369606004, "grad_norm": 0.5208132266998291, "learning_rate": 0.002828, "loss": 1.431, "step": 183168 }, { "epoch": 13.750994371482177, "grad_norm": 0.5345131158828735, "learning_rate": 0.002828, "loss": 1.4301, "step": 183232 }, { "epoch": 13.755797373358348, "grad_norm": 0.5960522890090942, "learning_rate": 0.002828, "loss": 1.4357, "step": 183296 }, { "epoch": 13.760600375234521, "grad_norm": 0.5363312363624573, "learning_rate": 0.002828, "loss": 1.4419, "step": 183360 }, { "epoch": 13.765403377110694, "grad_norm": 0.5879179835319519, "learning_rate": 0.002828, "loss": 1.4368, "step": 183424 }, { "epoch": 13.770206378986867, "grad_norm": 0.45576685667037964, "learning_rate": 0.002828, "loss": 1.4352, "step": 183488 }, { "epoch": 13.77500938086304, "grad_norm": 0.5763787031173706, "learning_rate": 0.002828, "loss": 1.4365, "step": 183552 }, { "epoch": 13.779812382739212, "grad_norm": 0.5253689885139465, "learning_rate": 0.002828, "loss": 1.4321, "step": 183616 }, { "epoch": 13.784615384615385, "grad_norm": 0.5026938915252686, "learning_rate": 0.002828, "loss": 1.4333, "step": 183680 }, { "epoch": 13.789418386491556, "grad_norm": 0.505780816078186, "learning_rate": 0.002828, "loss": 1.4363, "step": 183744 }, { "epoch": 13.79422138836773, "grad_norm": 0.530052900314331, "learning_rate": 0.002828, "loss": 1.4337, "step": 183808 }, { "epoch": 13.799024390243902, "grad_norm": 0.526617169380188, "learning_rate": 0.002828, "loss": 1.428, "step": 183872 }, { "epoch": 13.803827392120075, "grad_norm": 0.4842779040336609, "learning_rate": 0.002828, "loss": 1.432, "step": 183936 }, { "epoch": 13.808630393996248, "grad_norm": 0.4398646056652069, "learning_rate": 0.002828, "loss": 1.4313, "step": 184000 }, { "epoch": 13.813433395872421, "grad_norm": 0.6402506828308105, "learning_rate": 0.002828, "loss": 1.4344, "step": 184064 }, { "epoch": 13.818236397748592, "grad_norm": 0.4699048101902008, "learning_rate": 0.002828, "loss": 1.4377, "step": 184128 }, { "epoch": 13.823039399624765, "grad_norm": 0.5081595182418823, "learning_rate": 0.002828, "loss": 1.4373, "step": 184192 }, { "epoch": 13.827842401500938, "grad_norm": 0.46454253792762756, "learning_rate": 0.002828, "loss": 1.4318, "step": 184256 }, { "epoch": 13.83264540337711, "grad_norm": 0.6264491677284241, "learning_rate": 0.002828, "loss": 1.426, "step": 184320 }, { "epoch": 13.837448405253284, "grad_norm": 0.6079069972038269, "learning_rate": 0.002828, "loss": 1.4284, "step": 184384 }, { "epoch": 13.842251407129456, "grad_norm": 0.516166627407074, "learning_rate": 0.002828, "loss": 1.4334, "step": 184448 }, { "epoch": 13.84705440900563, "grad_norm": 0.4872256815433502, "learning_rate": 0.002828, "loss": 1.431, "step": 184512 }, { "epoch": 13.8518574108818, "grad_norm": 0.5415956974029541, "learning_rate": 0.002828, "loss": 1.4312, "step": 184576 }, { "epoch": 13.856660412757973, "grad_norm": 0.5147574543952942, "learning_rate": 0.002828, "loss": 1.43, "step": 184640 }, { "epoch": 13.861463414634146, "grad_norm": 0.5008430480957031, "learning_rate": 0.002828, "loss": 1.4332, "step": 184704 }, { "epoch": 13.866266416510319, "grad_norm": 0.5969945192337036, "learning_rate": 0.002828, "loss": 1.434, "step": 184768 }, { "epoch": 13.871069418386492, "grad_norm": 0.44767871499061584, "learning_rate": 0.002828, "loss": 1.4314, "step": 184832 }, { "epoch": 13.875872420262665, "grad_norm": 0.6381478905677795, "learning_rate": 0.002828, "loss": 1.4285, "step": 184896 }, { "epoch": 13.880675422138836, "grad_norm": 0.47069770097732544, "learning_rate": 0.002828, "loss": 1.4372, "step": 184960 }, { "epoch": 13.885478424015009, "grad_norm": 0.5484511256217957, "learning_rate": 0.002828, "loss": 1.4317, "step": 185024 }, { "epoch": 13.890281425891182, "grad_norm": 0.576411783695221, "learning_rate": 0.002828, "loss": 1.4271, "step": 185088 }, { "epoch": 13.895084427767355, "grad_norm": 0.5928360223770142, "learning_rate": 0.002828, "loss": 1.4376, "step": 185152 }, { "epoch": 13.899887429643528, "grad_norm": 0.7210304141044617, "learning_rate": 0.002828, "loss": 1.4365, "step": 185216 }, { "epoch": 13.9046904315197, "grad_norm": 0.5716149806976318, "learning_rate": 0.002828, "loss": 1.4312, "step": 185280 }, { "epoch": 13.909493433395873, "grad_norm": 0.5631930232048035, "learning_rate": 0.002828, "loss": 1.4337, "step": 185344 }, { "epoch": 13.914296435272044, "grad_norm": 0.4883274435997009, "learning_rate": 0.002828, "loss": 1.4279, "step": 185408 }, { "epoch": 13.919099437148217, "grad_norm": 0.5907841920852661, "learning_rate": 0.002828, "loss": 1.4298, "step": 185472 }, { "epoch": 13.92390243902439, "grad_norm": 0.7262232899665833, "learning_rate": 0.002828, "loss": 1.4352, "step": 185536 }, { "epoch": 13.928705440900563, "grad_norm": 0.5713350176811218, "learning_rate": 0.002828, "loss": 1.4302, "step": 185600 }, { "epoch": 13.933508442776736, "grad_norm": 0.48538756370544434, "learning_rate": 0.002828, "loss": 1.434, "step": 185664 }, { "epoch": 13.938311444652909, "grad_norm": 0.610945463180542, "learning_rate": 0.002828, "loss": 1.4289, "step": 185728 }, { "epoch": 13.94311444652908, "grad_norm": 0.5916759967803955, "learning_rate": 0.002828, "loss": 1.4358, "step": 185792 }, { "epoch": 13.947917448405253, "grad_norm": 0.49742409586906433, "learning_rate": 0.002828, "loss": 1.4264, "step": 185856 }, { "epoch": 13.952720450281426, "grad_norm": 0.5827100872993469, "learning_rate": 0.002828, "loss": 1.4301, "step": 185920 }, { "epoch": 13.957523452157599, "grad_norm": 0.517087459564209, "learning_rate": 0.002828, "loss": 1.4312, "step": 185984 }, { "epoch": 13.962326454033771, "grad_norm": 0.5403559803962708, "learning_rate": 0.002828, "loss": 1.4332, "step": 186048 }, { "epoch": 13.967129455909944, "grad_norm": 0.4724648594856262, "learning_rate": 0.002828, "loss": 1.4357, "step": 186112 }, { "epoch": 13.971932457786117, "grad_norm": 0.5587869882583618, "learning_rate": 0.002828, "loss": 1.4331, "step": 186176 }, { "epoch": 13.976735459662288, "grad_norm": 0.45337358117103577, "learning_rate": 0.002828, "loss": 1.4296, "step": 186240 }, { "epoch": 13.981538461538461, "grad_norm": 0.515040934085846, "learning_rate": 0.002828, "loss": 1.4357, "step": 186304 }, { "epoch": 13.986341463414634, "grad_norm": 0.5681731700897217, "learning_rate": 0.002828, "loss": 1.4379, "step": 186368 }, { "epoch": 13.991144465290807, "grad_norm": 0.6006734371185303, "learning_rate": 0.002828, "loss": 1.4316, "step": 186432 }, { "epoch": 13.99594746716698, "grad_norm": 0.6272892951965332, "learning_rate": 0.002828, "loss": 1.4394, "step": 186496 }, { "epoch": 14.000750469043153, "grad_norm": 0.4696020185947418, "learning_rate": 0.002828, "loss": 1.4281, "step": 186560 }, { "epoch": 14.005553470919324, "grad_norm": 0.5587984919548035, "learning_rate": 0.002828, "loss": 1.3876, "step": 186624 }, { "epoch": 14.010356472795497, "grad_norm": 0.514490008354187, "learning_rate": 0.002828, "loss": 1.391, "step": 186688 }, { "epoch": 14.01515947467167, "grad_norm": 0.4929649233818054, "learning_rate": 0.002828, "loss": 1.3858, "step": 186752 }, { "epoch": 14.019962476547843, "grad_norm": 0.552597165107727, "learning_rate": 0.002828, "loss": 1.3846, "step": 186816 }, { "epoch": 14.024765478424015, "grad_norm": 0.6769406199455261, "learning_rate": 0.002828, "loss": 1.3929, "step": 186880 }, { "epoch": 14.029568480300188, "grad_norm": 0.5288282632827759, "learning_rate": 0.002828, "loss": 1.3921, "step": 186944 }, { "epoch": 14.03437148217636, "grad_norm": 0.4571775496006012, "learning_rate": 0.002828, "loss": 1.3965, "step": 187008 }, { "epoch": 14.039174484052532, "grad_norm": 0.6377585530281067, "learning_rate": 0.002828, "loss": 1.3923, "step": 187072 }, { "epoch": 14.043977485928705, "grad_norm": 0.5901743769645691, "learning_rate": 0.002828, "loss": 1.3912, "step": 187136 }, { "epoch": 14.048780487804878, "grad_norm": 0.45205187797546387, "learning_rate": 0.002828, "loss": 1.3922, "step": 187200 }, { "epoch": 14.053583489681051, "grad_norm": 0.6338102221488953, "learning_rate": 0.002828, "loss": 1.3913, "step": 187264 }, { "epoch": 14.058386491557224, "grad_norm": 0.515400230884552, "learning_rate": 0.002828, "loss": 1.3877, "step": 187328 }, { "epoch": 14.063189493433397, "grad_norm": 0.5111676454544067, "learning_rate": 0.002828, "loss": 1.39, "step": 187392 }, { "epoch": 14.067992495309568, "grad_norm": 0.5326048731803894, "learning_rate": 0.002828, "loss": 1.396, "step": 187456 }, { "epoch": 14.07279549718574, "grad_norm": 0.5814757347106934, "learning_rate": 0.002828, "loss": 1.3915, "step": 187520 }, { "epoch": 14.077598499061914, "grad_norm": 0.5279295444488525, "learning_rate": 0.002828, "loss": 1.3944, "step": 187584 }, { "epoch": 14.082401500938087, "grad_norm": 0.5335688591003418, "learning_rate": 0.002828, "loss": 1.4015, "step": 187648 }, { "epoch": 14.08720450281426, "grad_norm": 0.5352903008460999, "learning_rate": 0.002828, "loss": 1.3978, "step": 187712 }, { "epoch": 14.092007504690432, "grad_norm": 0.5011479258537292, "learning_rate": 0.002828, "loss": 1.3978, "step": 187776 }, { "epoch": 14.096810506566603, "grad_norm": 0.504731297492981, "learning_rate": 0.002828, "loss": 1.3983, "step": 187840 }, { "epoch": 14.101613508442776, "grad_norm": 0.5467804670333862, "learning_rate": 0.002828, "loss": 1.3979, "step": 187904 }, { "epoch": 14.10641651031895, "grad_norm": 0.5368660688400269, "learning_rate": 0.002828, "loss": 1.4006, "step": 187968 }, { "epoch": 14.111219512195122, "grad_norm": 0.5375622510910034, "learning_rate": 0.002828, "loss": 1.3932, "step": 188032 }, { "epoch": 14.116022514071295, "grad_norm": 0.5747446417808533, "learning_rate": 0.002828, "loss": 1.3975, "step": 188096 }, { "epoch": 14.120825515947468, "grad_norm": 0.4997574985027313, "learning_rate": 0.002828, "loss": 1.3893, "step": 188160 }, { "epoch": 14.125628517823639, "grad_norm": 0.5358232259750366, "learning_rate": 0.002828, "loss": 1.3987, "step": 188224 }, { "epoch": 14.130431519699812, "grad_norm": 0.5235414505004883, "learning_rate": 0.002828, "loss": 1.3933, "step": 188288 }, { "epoch": 14.135234521575985, "grad_norm": 0.5142853260040283, "learning_rate": 0.002828, "loss": 1.3996, "step": 188352 }, { "epoch": 14.140037523452158, "grad_norm": 0.5567483901977539, "learning_rate": 0.002828, "loss": 1.3967, "step": 188416 }, { "epoch": 14.14484052532833, "grad_norm": 0.590877890586853, "learning_rate": 0.002828, "loss": 1.4016, "step": 188480 }, { "epoch": 14.149643527204503, "grad_norm": 0.5319374203681946, "learning_rate": 0.002828, "loss": 1.3925, "step": 188544 }, { "epoch": 14.154446529080676, "grad_norm": 0.5865071415901184, "learning_rate": 0.002828, "loss": 1.3952, "step": 188608 }, { "epoch": 14.159249530956847, "grad_norm": 0.5776681303977966, "learning_rate": 0.002828, "loss": 1.3993, "step": 188672 }, { "epoch": 14.16405253283302, "grad_norm": 0.49375268816947937, "learning_rate": 0.002828, "loss": 1.3935, "step": 188736 }, { "epoch": 14.168855534709193, "grad_norm": 0.5858403444290161, "learning_rate": 0.002828, "loss": 1.4046, "step": 188800 }, { "epoch": 14.173658536585366, "grad_norm": 0.4694221615791321, "learning_rate": 0.002828, "loss": 1.3969, "step": 188864 }, { "epoch": 14.178461538461539, "grad_norm": 0.6087458729743958, "learning_rate": 0.002828, "loss": 1.3959, "step": 188928 }, { "epoch": 14.183264540337712, "grad_norm": 0.46161791682243347, "learning_rate": 0.002828, "loss": 1.4003, "step": 188992 }, { "epoch": 14.188067542213883, "grad_norm": 0.5340674519538879, "learning_rate": 0.002828, "loss": 1.4042, "step": 189056 }, { "epoch": 14.192870544090056, "grad_norm": 0.504485011100769, "learning_rate": 0.002828, "loss": 1.4034, "step": 189120 }, { "epoch": 14.197673545966229, "grad_norm": 0.584781289100647, "learning_rate": 0.002828, "loss": 1.3935, "step": 189184 }, { "epoch": 14.202476547842402, "grad_norm": 0.5133593082427979, "learning_rate": 0.002828, "loss": 1.3983, "step": 189248 }, { "epoch": 14.207279549718574, "grad_norm": 0.5022109746932983, "learning_rate": 0.002828, "loss": 1.4013, "step": 189312 }, { "epoch": 14.212082551594747, "grad_norm": 0.44956791400909424, "learning_rate": 0.002828, "loss": 1.4033, "step": 189376 }, { "epoch": 14.21688555347092, "grad_norm": 0.5756507515907288, "learning_rate": 0.002828, "loss": 1.4017, "step": 189440 }, { "epoch": 14.221688555347091, "grad_norm": 0.6916122436523438, "learning_rate": 0.002828, "loss": 1.4018, "step": 189504 }, { "epoch": 14.226491557223264, "grad_norm": 0.532734751701355, "learning_rate": 0.002828, "loss": 1.4015, "step": 189568 }, { "epoch": 14.231294559099437, "grad_norm": 0.726618230342865, "learning_rate": 0.002828, "loss": 1.3956, "step": 189632 }, { "epoch": 14.23609756097561, "grad_norm": 0.530405580997467, "learning_rate": 0.002828, "loss": 1.407, "step": 189696 }, { "epoch": 14.240900562851783, "grad_norm": 0.4780811369419098, "learning_rate": 0.002828, "loss": 1.4016, "step": 189760 }, { "epoch": 14.245703564727956, "grad_norm": 0.5396596193313599, "learning_rate": 0.002828, "loss": 1.403, "step": 189824 }, { "epoch": 14.250506566604127, "grad_norm": 0.4692460298538208, "learning_rate": 0.002828, "loss": 1.3945, "step": 189888 }, { "epoch": 14.2553095684803, "grad_norm": 0.4458451271057129, "learning_rate": 0.002828, "loss": 1.3985, "step": 189952 }, { "epoch": 14.260112570356473, "grad_norm": 0.5502991080284119, "learning_rate": 0.002828, "loss": 1.4037, "step": 190016 }, { "epoch": 14.264915572232646, "grad_norm": 0.5964510440826416, "learning_rate": 0.002828, "loss": 1.4016, "step": 190080 }, { "epoch": 14.269718574108818, "grad_norm": 0.49159181118011475, "learning_rate": 0.002828, "loss": 1.4036, "step": 190144 }, { "epoch": 14.274521575984991, "grad_norm": 0.4755879044532776, "learning_rate": 0.002828, "loss": 1.405, "step": 190208 }, { "epoch": 14.279324577861162, "grad_norm": 0.6329382061958313, "learning_rate": 0.002828, "loss": 1.3975, "step": 190272 }, { "epoch": 14.284127579737335, "grad_norm": 0.5941036343574524, "learning_rate": 0.002828, "loss": 1.3992, "step": 190336 }, { "epoch": 14.288930581613508, "grad_norm": 0.4472516179084778, "learning_rate": 0.002828, "loss": 1.402, "step": 190400 }, { "epoch": 14.293733583489681, "grad_norm": 0.614414393901825, "learning_rate": 0.002828, "loss": 1.4067, "step": 190464 }, { "epoch": 14.298536585365854, "grad_norm": 0.469532310962677, "learning_rate": 0.002828, "loss": 1.4017, "step": 190528 }, { "epoch": 14.303339587242027, "grad_norm": 0.49450796842575073, "learning_rate": 0.002828, "loss": 1.4022, "step": 190592 }, { "epoch": 14.3081425891182, "grad_norm": 0.6684780716896057, "learning_rate": 0.002828, "loss": 1.4056, "step": 190656 }, { "epoch": 14.31294559099437, "grad_norm": 0.5274812579154968, "learning_rate": 0.002828, "loss": 1.409, "step": 190720 }, { "epoch": 14.317748592870544, "grad_norm": 0.5079588890075684, "learning_rate": 0.002828, "loss": 1.3962, "step": 190784 }, { "epoch": 14.322551594746717, "grad_norm": 0.5359573364257812, "learning_rate": 0.002828, "loss": 1.4018, "step": 190848 }, { "epoch": 14.32735459662289, "grad_norm": 0.5036746859550476, "learning_rate": 0.002828, "loss": 1.3986, "step": 190912 }, { "epoch": 14.332157598499062, "grad_norm": 0.510995626449585, "learning_rate": 0.002828, "loss": 1.4031, "step": 190976 }, { "epoch": 14.336960600375235, "grad_norm": 0.48344898223876953, "learning_rate": 0.002828, "loss": 1.4071, "step": 191040 }, { "epoch": 14.341763602251406, "grad_norm": 0.5662881731987, "learning_rate": 0.002828, "loss": 1.4005, "step": 191104 }, { "epoch": 14.34656660412758, "grad_norm": 0.5350774526596069, "learning_rate": 0.002828, "loss": 1.402, "step": 191168 }, { "epoch": 14.351369606003752, "grad_norm": 0.5573024749755859, "learning_rate": 0.002828, "loss": 1.3971, "step": 191232 }, { "epoch": 14.356172607879925, "grad_norm": 0.5997970700263977, "learning_rate": 0.002828, "loss": 1.4008, "step": 191296 }, { "epoch": 14.360975609756098, "grad_norm": 0.5876054763793945, "learning_rate": 0.002828, "loss": 1.404, "step": 191360 }, { "epoch": 14.36577861163227, "grad_norm": 0.5237852931022644, "learning_rate": 0.002828, "loss": 1.4115, "step": 191424 }, { "epoch": 14.370581613508442, "grad_norm": 0.5114300847053528, "learning_rate": 0.002828, "loss": 1.405, "step": 191488 }, { "epoch": 14.375384615384615, "grad_norm": 0.5123426914215088, "learning_rate": 0.002828, "loss": 1.3975, "step": 191552 }, { "epoch": 14.380187617260788, "grad_norm": 0.5161942839622498, "learning_rate": 0.002828, "loss": 1.4076, "step": 191616 }, { "epoch": 14.38499061913696, "grad_norm": 0.5509490370750427, "learning_rate": 0.002828, "loss": 1.4038, "step": 191680 }, { "epoch": 14.389793621013133, "grad_norm": 0.49601686000823975, "learning_rate": 0.002828, "loss": 1.4021, "step": 191744 }, { "epoch": 14.394596622889306, "grad_norm": 0.46365320682525635, "learning_rate": 0.002828, "loss": 1.4075, "step": 191808 }, { "epoch": 14.39939962476548, "grad_norm": 0.5520146489143372, "learning_rate": 0.002828, "loss": 1.4054, "step": 191872 }, { "epoch": 14.40420262664165, "grad_norm": 0.49274471402168274, "learning_rate": 0.002828, "loss": 1.4005, "step": 191936 }, { "epoch": 14.409005628517823, "grad_norm": 0.6408708095550537, "learning_rate": 0.002828, "loss": 1.3995, "step": 192000 }, { "epoch": 14.413808630393996, "grad_norm": 0.5490612983703613, "learning_rate": 0.002828, "loss": 1.4086, "step": 192064 }, { "epoch": 14.418611632270169, "grad_norm": 0.5333877205848694, "learning_rate": 0.002828, "loss": 1.4091, "step": 192128 }, { "epoch": 14.423414634146342, "grad_norm": 0.5989339351654053, "learning_rate": 0.002828, "loss": 1.4026, "step": 192192 }, { "epoch": 14.428217636022515, "grad_norm": 0.5512287020683289, "learning_rate": 0.002828, "loss": 1.4088, "step": 192256 }, { "epoch": 14.433020637898686, "grad_norm": 0.5418707132339478, "learning_rate": 0.002828, "loss": 1.4048, "step": 192320 }, { "epoch": 14.437823639774859, "grad_norm": 0.5657520890235901, "learning_rate": 0.002828, "loss": 1.4074, "step": 192384 }, { "epoch": 14.442626641651032, "grad_norm": 0.5935457348823547, "learning_rate": 0.002828, "loss": 1.4041, "step": 192448 }, { "epoch": 14.447429643527204, "grad_norm": 0.5439548492431641, "learning_rate": 0.002828, "loss": 1.4035, "step": 192512 }, { "epoch": 14.452232645403377, "grad_norm": 0.5025527477264404, "learning_rate": 0.002828, "loss": 1.4072, "step": 192576 }, { "epoch": 14.45703564727955, "grad_norm": 0.46855494379997253, "learning_rate": 0.002828, "loss": 1.4017, "step": 192640 }, { "epoch": 14.461838649155723, "grad_norm": 0.5161790251731873, "learning_rate": 0.002828, "loss": 1.4117, "step": 192704 }, { "epoch": 14.466641651031894, "grad_norm": 0.52967369556427, "learning_rate": 0.002828, "loss": 1.4033, "step": 192768 }, { "epoch": 14.471444652908067, "grad_norm": 0.5561485290527344, "learning_rate": 0.002828, "loss": 1.4057, "step": 192832 }, { "epoch": 14.47624765478424, "grad_norm": 0.5013352632522583, "learning_rate": 0.002828, "loss": 1.4089, "step": 192896 }, { "epoch": 14.481050656660413, "grad_norm": 0.4954836070537567, "learning_rate": 0.002828, "loss": 1.4081, "step": 192960 }, { "epoch": 14.485853658536586, "grad_norm": 0.4955345690250397, "learning_rate": 0.002828, "loss": 1.4092, "step": 193024 }, { "epoch": 14.490656660412759, "grad_norm": 0.5120537877082825, "learning_rate": 0.002828, "loss": 1.4062, "step": 193088 }, { "epoch": 14.49545966228893, "grad_norm": 0.5533945560455322, "learning_rate": 0.002828, "loss": 1.4134, "step": 193152 }, { "epoch": 14.500262664165103, "grad_norm": 0.5256770849227905, "learning_rate": 0.002828, "loss": 1.4035, "step": 193216 }, { "epoch": 14.505065666041276, "grad_norm": 0.5609396696090698, "learning_rate": 0.002828, "loss": 1.4016, "step": 193280 }, { "epoch": 14.509868667917448, "grad_norm": 0.6170324683189392, "learning_rate": 0.002828, "loss": 1.4034, "step": 193344 }, { "epoch": 14.514671669793621, "grad_norm": 0.5539875626564026, "learning_rate": 0.002828, "loss": 1.4052, "step": 193408 }, { "epoch": 14.519474671669794, "grad_norm": 0.5213680267333984, "learning_rate": 0.002828, "loss": 1.4072, "step": 193472 }, { "epoch": 14.524277673545967, "grad_norm": 0.5134106278419495, "learning_rate": 0.002828, "loss": 1.4126, "step": 193536 }, { "epoch": 14.529080675422138, "grad_norm": 0.5207715630531311, "learning_rate": 0.002828, "loss": 1.4099, "step": 193600 }, { "epoch": 14.533883677298311, "grad_norm": 0.6132842302322388, "learning_rate": 0.002828, "loss": 1.4017, "step": 193664 }, { "epoch": 14.538686679174484, "grad_norm": 0.5139353275299072, "learning_rate": 0.002828, "loss": 1.4039, "step": 193728 }, { "epoch": 14.543489681050657, "grad_norm": 0.5206514000892639, "learning_rate": 0.002828, "loss": 1.4017, "step": 193792 }, { "epoch": 14.54829268292683, "grad_norm": 0.5245344638824463, "learning_rate": 0.002828, "loss": 1.4036, "step": 193856 }, { "epoch": 14.553095684803003, "grad_norm": 0.48017024993896484, "learning_rate": 0.002828, "loss": 1.4082, "step": 193920 }, { "epoch": 14.557898686679174, "grad_norm": 0.5209543704986572, "learning_rate": 0.002828, "loss": 1.4007, "step": 193984 }, { "epoch": 14.562701688555347, "grad_norm": 0.6338755488395691, "learning_rate": 0.002828, "loss": 1.4064, "step": 194048 }, { "epoch": 14.56750469043152, "grad_norm": 0.5111967325210571, "learning_rate": 0.002828, "loss": 1.3982, "step": 194112 }, { "epoch": 14.572307692307692, "grad_norm": 0.5508695244789124, "learning_rate": 0.002828, "loss": 1.409, "step": 194176 }, { "epoch": 14.577110694183865, "grad_norm": 0.549004077911377, "learning_rate": 0.002828, "loss": 1.4103, "step": 194240 }, { "epoch": 14.581913696060038, "grad_norm": 0.4958650767803192, "learning_rate": 0.002828, "loss": 1.4077, "step": 194304 }, { "epoch": 14.586716697936211, "grad_norm": 0.5928217172622681, "learning_rate": 0.002828, "loss": 1.4053, "step": 194368 }, { "epoch": 14.591519699812382, "grad_norm": 0.5710644125938416, "learning_rate": 0.002828, "loss": 1.4077, "step": 194432 }, { "epoch": 14.596322701688555, "grad_norm": 0.5788741111755371, "learning_rate": 0.002828, "loss": 1.4032, "step": 194496 }, { "epoch": 14.601125703564728, "grad_norm": 0.4544178247451782, "learning_rate": 0.002828, "loss": 1.4017, "step": 194560 }, { "epoch": 14.6059287054409, "grad_norm": 0.5514257550239563, "learning_rate": 0.002828, "loss": 1.4081, "step": 194624 }, { "epoch": 14.610731707317074, "grad_norm": 0.48939162492752075, "learning_rate": 0.002828, "loss": 1.4081, "step": 194688 }, { "epoch": 14.615534709193247, "grad_norm": 0.7316038608551025, "learning_rate": 0.002828, "loss": 1.4049, "step": 194752 }, { "epoch": 14.620337711069418, "grad_norm": 0.5802710652351379, "learning_rate": 0.002828, "loss": 1.4035, "step": 194816 }, { "epoch": 14.62514071294559, "grad_norm": 0.5377091765403748, "learning_rate": 0.002828, "loss": 1.4026, "step": 194880 }, { "epoch": 14.629943714821763, "grad_norm": 0.5343953371047974, "learning_rate": 0.002828, "loss": 1.4099, "step": 194944 }, { "epoch": 14.634746716697936, "grad_norm": 0.5183101296424866, "learning_rate": 0.002828, "loss": 1.4083, "step": 195008 }, { "epoch": 14.63954971857411, "grad_norm": 0.48518890142440796, "learning_rate": 0.002828, "loss": 1.4111, "step": 195072 }, { "epoch": 14.644352720450282, "grad_norm": 0.5483662486076355, "learning_rate": 0.002828, "loss": 1.4027, "step": 195136 }, { "epoch": 14.649155722326453, "grad_norm": 0.5991758108139038, "learning_rate": 0.002828, "loss": 1.4041, "step": 195200 }, { "epoch": 14.653958724202626, "grad_norm": 0.554240345954895, "learning_rate": 0.002828, "loss": 1.4085, "step": 195264 }, { "epoch": 14.658761726078799, "grad_norm": 0.5651766061782837, "learning_rate": 0.002828, "loss": 1.4124, "step": 195328 }, { "epoch": 14.663564727954972, "grad_norm": 0.5138752460479736, "learning_rate": 0.002828, "loss": 1.4122, "step": 195392 }, { "epoch": 14.668367729831145, "grad_norm": 0.5441687703132629, "learning_rate": 0.002828, "loss": 1.4135, "step": 195456 }, { "epoch": 14.673170731707318, "grad_norm": 0.5138674974441528, "learning_rate": 0.002828, "loss": 1.4086, "step": 195520 }, { "epoch": 14.677973733583489, "grad_norm": 0.5577060580253601, "learning_rate": 0.002828, "loss": 1.4129, "step": 195584 }, { "epoch": 14.682776735459662, "grad_norm": 0.6189695000648499, "learning_rate": 0.002828, "loss": 1.4085, "step": 195648 }, { "epoch": 14.687579737335835, "grad_norm": 0.5921385884284973, "learning_rate": 0.002828, "loss": 1.4108, "step": 195712 }, { "epoch": 14.692382739212007, "grad_norm": 0.577090859413147, "learning_rate": 0.002828, "loss": 1.4071, "step": 195776 }, { "epoch": 14.69718574108818, "grad_norm": 0.553539514541626, "learning_rate": 0.002828, "loss": 1.3994, "step": 195840 }, { "epoch": 14.701988742964353, "grad_norm": 0.4840497374534607, "learning_rate": 0.002828, "loss": 1.4099, "step": 195904 }, { "epoch": 14.706791744840526, "grad_norm": 0.5382407307624817, "learning_rate": 0.002828, "loss": 1.4073, "step": 195968 }, { "epoch": 14.711594746716697, "grad_norm": 0.5615788102149963, "learning_rate": 0.002828, "loss": 1.4057, "step": 196032 }, { "epoch": 14.71639774859287, "grad_norm": 0.46225443482398987, "learning_rate": 0.002828, "loss": 1.4049, "step": 196096 }, { "epoch": 14.721200750469043, "grad_norm": 0.49434852600097656, "learning_rate": 0.002828, "loss": 1.4065, "step": 196160 }, { "epoch": 14.726003752345216, "grad_norm": 0.7275422811508179, "learning_rate": 0.002828, "loss": 1.4059, "step": 196224 }, { "epoch": 14.730806754221389, "grad_norm": 0.542582094669342, "learning_rate": 0.002828, "loss": 1.4071, "step": 196288 }, { "epoch": 14.735609756097562, "grad_norm": 0.4883684515953064, "learning_rate": 0.002828, "loss": 1.4058, "step": 196352 }, { "epoch": 14.740412757973733, "grad_norm": 0.5530311465263367, "learning_rate": 0.002828, "loss": 1.406, "step": 196416 }, { "epoch": 14.745215759849906, "grad_norm": 0.5233826041221619, "learning_rate": 0.002828, "loss": 1.4044, "step": 196480 }, { "epoch": 14.750018761726079, "grad_norm": 0.5350394248962402, "learning_rate": 0.002828, "loss": 1.4089, "step": 196544 }, { "epoch": 14.754821763602251, "grad_norm": 0.550305962562561, "learning_rate": 0.002828, "loss": 1.3991, "step": 196608 }, { "epoch": 14.759624765478424, "grad_norm": 0.5499565005302429, "learning_rate": 0.002828, "loss": 1.4074, "step": 196672 }, { "epoch": 14.764427767354597, "grad_norm": 0.6032944917678833, "learning_rate": 0.002828, "loss": 1.407, "step": 196736 }, { "epoch": 14.76923076923077, "grad_norm": 0.45191773772239685, "learning_rate": 0.002828, "loss": 1.4066, "step": 196800 }, { "epoch": 14.774033771106941, "grad_norm": 0.7020035982131958, "learning_rate": 0.002828, "loss": 1.4012, "step": 196864 }, { "epoch": 14.778836772983114, "grad_norm": 0.5450897216796875, "learning_rate": 0.002828, "loss": 1.4012, "step": 196928 }, { "epoch": 14.783639774859287, "grad_norm": 0.5871745347976685, "learning_rate": 0.002828, "loss": 1.4064, "step": 196992 }, { "epoch": 14.78844277673546, "grad_norm": 0.5858376622200012, "learning_rate": 0.002828, "loss": 1.4134, "step": 197056 }, { "epoch": 14.793245778611633, "grad_norm": 0.4942261576652527, "learning_rate": 0.002828, "loss": 1.405, "step": 197120 }, { "epoch": 14.798048780487806, "grad_norm": 0.4891422390937805, "learning_rate": 0.002828, "loss": 1.4166, "step": 197184 }, { "epoch": 14.802851782363977, "grad_norm": 0.5212449431419373, "learning_rate": 0.002828, "loss": 1.4097, "step": 197248 }, { "epoch": 14.80765478424015, "grad_norm": 0.48919591307640076, "learning_rate": 0.002828, "loss": 1.4023, "step": 197312 }, { "epoch": 14.812457786116322, "grad_norm": 0.5116396546363831, "learning_rate": 0.002828, "loss": 1.4029, "step": 197376 }, { "epoch": 14.817260787992495, "grad_norm": 0.5862394571304321, "learning_rate": 0.002828, "loss": 1.4091, "step": 197440 }, { "epoch": 14.822063789868668, "grad_norm": 0.6540351510047913, "learning_rate": 0.002828, "loss": 1.4074, "step": 197504 }, { "epoch": 14.826866791744841, "grad_norm": 0.5462371110916138, "learning_rate": 0.002828, "loss": 1.4067, "step": 197568 }, { "epoch": 14.831669793621014, "grad_norm": 0.5242631435394287, "learning_rate": 0.002828, "loss": 1.4058, "step": 197632 }, { "epoch": 14.836472795497185, "grad_norm": 0.6098849177360535, "learning_rate": 0.002828, "loss": 1.4088, "step": 197696 }, { "epoch": 14.841275797373358, "grad_norm": 0.601551353931427, "learning_rate": 0.002828, "loss": 1.4041, "step": 197760 }, { "epoch": 14.846078799249531, "grad_norm": 0.6099250912666321, "learning_rate": 0.002828, "loss": 1.4057, "step": 197824 }, { "epoch": 14.850881801125704, "grad_norm": 0.4936770796775818, "learning_rate": 0.002828, "loss": 1.4117, "step": 197888 }, { "epoch": 14.855684803001877, "grad_norm": 0.5036704540252686, "learning_rate": 0.002828, "loss": 1.4132, "step": 197952 }, { "epoch": 14.86048780487805, "grad_norm": 0.5548554062843323, "learning_rate": 0.002828, "loss": 1.4105, "step": 198016 }, { "epoch": 14.86529080675422, "grad_norm": 0.5694041848182678, "learning_rate": 0.002828, "loss": 1.4101, "step": 198080 }, { "epoch": 14.870093808630394, "grad_norm": 0.5238790512084961, "learning_rate": 0.002828, "loss": 1.416, "step": 198144 }, { "epoch": 14.874896810506566, "grad_norm": 0.5360506176948547, "learning_rate": 0.002828, "loss": 1.4096, "step": 198208 }, { "epoch": 14.87969981238274, "grad_norm": 0.6266505122184753, "learning_rate": 0.002828, "loss": 1.4102, "step": 198272 }, { "epoch": 14.884502814258912, "grad_norm": 0.5030892491340637, "learning_rate": 0.002828, "loss": 1.4052, "step": 198336 }, { "epoch": 14.889305816135085, "grad_norm": 0.48328977823257446, "learning_rate": 0.002828, "loss": 1.4052, "step": 198400 }, { "epoch": 14.894108818011258, "grad_norm": 0.567842960357666, "learning_rate": 0.002828, "loss": 1.4113, "step": 198464 }, { "epoch": 14.898911819887429, "grad_norm": 0.507936418056488, "learning_rate": 0.002828, "loss": 1.4117, "step": 198528 }, { "epoch": 14.903714821763602, "grad_norm": 0.46350201964378357, "learning_rate": 0.002828, "loss": 1.4081, "step": 198592 }, { "epoch": 14.908517823639775, "grad_norm": 0.550236165523529, "learning_rate": 0.002828, "loss": 1.4115, "step": 198656 }, { "epoch": 14.913320825515948, "grad_norm": 0.5407876372337341, "learning_rate": 0.002828, "loss": 1.415, "step": 198720 }, { "epoch": 14.91812382739212, "grad_norm": 0.4663850665092468, "learning_rate": 0.002828, "loss": 1.4109, "step": 198784 }, { "epoch": 14.922926829268294, "grad_norm": 0.5446844100952148, "learning_rate": 0.002828, "loss": 1.4141, "step": 198848 }, { "epoch": 14.927729831144465, "grad_norm": 0.517081081867218, "learning_rate": 0.002828, "loss": 1.4122, "step": 198912 }, { "epoch": 14.932532833020637, "grad_norm": 0.5174572467803955, "learning_rate": 0.002828, "loss": 1.411, "step": 198976 }, { "epoch": 14.93733583489681, "grad_norm": 0.5665841698646545, "learning_rate": 0.002828, "loss": 1.4124, "step": 199040 }, { "epoch": 14.942138836772983, "grad_norm": 0.6959680318832397, "learning_rate": 0.002828, "loss": 1.4055, "step": 199104 }, { "epoch": 14.946941838649156, "grad_norm": 0.7196769714355469, "learning_rate": 0.002828, "loss": 1.41, "step": 199168 }, { "epoch": 14.951744840525329, "grad_norm": 0.5885917544364929, "learning_rate": 0.002828, "loss": 1.41, "step": 199232 }, { "epoch": 14.9565478424015, "grad_norm": 0.5237954258918762, "learning_rate": 0.002828, "loss": 1.4133, "step": 199296 }, { "epoch": 14.961350844277673, "grad_norm": 0.5811425447463989, "learning_rate": 0.002828, "loss": 1.4104, "step": 199360 }, { "epoch": 14.966153846153846, "grad_norm": 0.4897754192352295, "learning_rate": 0.002828, "loss": 1.4087, "step": 199424 }, { "epoch": 14.970956848030019, "grad_norm": 0.6255586743354797, "learning_rate": 0.002828, "loss": 1.4179, "step": 199488 }, { "epoch": 14.975759849906192, "grad_norm": 0.617106556892395, "learning_rate": 0.002828, "loss": 1.4058, "step": 199552 }, { "epoch": 14.980562851782365, "grad_norm": 0.5452525019645691, "learning_rate": 0.002828, "loss": 1.4179, "step": 199616 }, { "epoch": 14.985365853658536, "grad_norm": 0.6155555248260498, "learning_rate": 0.002828, "loss": 1.4104, "step": 199680 }, { "epoch": 14.990168855534709, "grad_norm": 0.4624778628349304, "learning_rate": 0.002828, "loss": 1.4117, "step": 199744 }, { "epoch": 14.994971857410881, "grad_norm": 0.5621700286865234, "learning_rate": 0.002828, "loss": 1.4115, "step": 199808 }, { "epoch": 14.999774859287054, "grad_norm": 0.5766751766204834, "learning_rate": 0.002828, "loss": 1.4116, "step": 199872 }, { "epoch": 15.004577861163227, "grad_norm": 0.5539935231208801, "learning_rate": 0.002828, "loss": 1.3687, "step": 199936 }, { "epoch": 15.0093808630394, "grad_norm": 0.518322765827179, "learning_rate": 0.002828, "loss": 1.3684, "step": 200000 }, { "epoch": 15.014183864915573, "grad_norm": 0.48596128821372986, "learning_rate": 0.002828, "loss": 1.3641, "step": 200064 }, { "epoch": 15.018986866791744, "grad_norm": 0.46796655654907227, "learning_rate": 0.002828, "loss": 1.3666, "step": 200128 }, { "epoch": 15.023789868667917, "grad_norm": 0.5862635374069214, "learning_rate": 0.002828, "loss": 1.3703, "step": 200192 }, { "epoch": 15.02859287054409, "grad_norm": 0.48129528760910034, "learning_rate": 0.002828, "loss": 1.3698, "step": 200256 }, { "epoch": 15.033395872420263, "grad_norm": 0.5526949167251587, "learning_rate": 0.002828, "loss": 1.3714, "step": 200320 }, { "epoch": 15.038198874296436, "grad_norm": 0.6339066028594971, "learning_rate": 0.002828, "loss": 1.3652, "step": 200384 }, { "epoch": 15.043001876172609, "grad_norm": 0.7033270597457886, "learning_rate": 0.002828, "loss": 1.3748, "step": 200448 }, { "epoch": 15.04780487804878, "grad_norm": 0.5913888216018677, "learning_rate": 0.002828, "loss": 1.3612, "step": 200512 }, { "epoch": 15.052607879924953, "grad_norm": 0.6152012944221497, "learning_rate": 0.002828, "loss": 1.369, "step": 200576 }, { "epoch": 15.057410881801125, "grad_norm": 0.5023623704910278, "learning_rate": 0.002828, "loss": 1.3675, "step": 200640 }, { "epoch": 15.062213883677298, "grad_norm": 0.5588781833648682, "learning_rate": 0.002828, "loss": 1.3697, "step": 200704 }, { "epoch": 15.067016885553471, "grad_norm": 0.5159551501274109, "learning_rate": 0.002828, "loss": 1.3702, "step": 200768 }, { "epoch": 15.071819887429644, "grad_norm": 0.6186797022819519, "learning_rate": 0.002828, "loss": 1.373, "step": 200832 }, { "epoch": 15.076622889305817, "grad_norm": 0.5377756357192993, "learning_rate": 0.002828, "loss": 1.3685, "step": 200896 }, { "epoch": 15.081425891181988, "grad_norm": 0.679719865322113, "learning_rate": 0.002828, "loss": 1.3656, "step": 200960 }, { "epoch": 15.086228893058161, "grad_norm": 0.47345390915870667, "learning_rate": 0.002828, "loss": 1.3731, "step": 201024 }, { "epoch": 15.091031894934334, "grad_norm": 0.6200671792030334, "learning_rate": 0.002828, "loss": 1.3692, "step": 201088 }, { "epoch": 15.095834896810507, "grad_norm": 0.5481675267219543, "learning_rate": 0.002828, "loss": 1.3709, "step": 201152 }, { "epoch": 15.10063789868668, "grad_norm": 0.5293875336647034, "learning_rate": 0.002828, "loss": 1.376, "step": 201216 }, { "epoch": 15.105440900562852, "grad_norm": 0.4326186776161194, "learning_rate": 0.002828, "loss": 1.3772, "step": 201280 }, { "epoch": 15.110243902439024, "grad_norm": 0.7246432900428772, "learning_rate": 0.002828, "loss": 1.3748, "step": 201344 }, { "epoch": 15.115046904315196, "grad_norm": 0.5310239195823669, "learning_rate": 0.002828, "loss": 1.3749, "step": 201408 }, { "epoch": 15.11984990619137, "grad_norm": 0.48077937960624695, "learning_rate": 0.002828, "loss": 1.3682, "step": 201472 }, { "epoch": 15.124652908067542, "grad_norm": 0.5190368890762329, "learning_rate": 0.002828, "loss": 1.3728, "step": 201536 }, { "epoch": 15.129455909943715, "grad_norm": 0.6393243074417114, "learning_rate": 0.002828, "loss": 1.3781, "step": 201600 }, { "epoch": 15.134258911819888, "grad_norm": 0.5104050636291504, "learning_rate": 0.002828, "loss": 1.3743, "step": 201664 }, { "epoch": 15.139061913696061, "grad_norm": 0.46626007556915283, "learning_rate": 0.002828, "loss": 1.3776, "step": 201728 }, { "epoch": 15.143864915572232, "grad_norm": 0.5677950978279114, "learning_rate": 0.002828, "loss": 1.3767, "step": 201792 }, { "epoch": 15.148667917448405, "grad_norm": 0.47358930110931396, "learning_rate": 0.002828, "loss": 1.3763, "step": 201856 }, { "epoch": 15.153470919324578, "grad_norm": 0.6243211030960083, "learning_rate": 0.002828, "loss": 1.3805, "step": 201920 }, { "epoch": 15.15827392120075, "grad_norm": 0.6380032896995544, "learning_rate": 0.002828, "loss": 1.3793, "step": 201984 }, { "epoch": 15.163076923076924, "grad_norm": 0.4932658076286316, "learning_rate": 0.002828, "loss": 1.3752, "step": 202048 }, { "epoch": 15.167879924953096, "grad_norm": 0.5148807764053345, "learning_rate": 0.002828, "loss": 1.3764, "step": 202112 }, { "epoch": 15.172682926829268, "grad_norm": 0.5843935608863831, "learning_rate": 0.002828, "loss": 1.3772, "step": 202176 }, { "epoch": 15.17748592870544, "grad_norm": 0.46910738945007324, "learning_rate": 0.002828, "loss": 1.3737, "step": 202240 }, { "epoch": 15.182288930581613, "grad_norm": 0.683540403842926, "learning_rate": 0.002828, "loss": 1.3769, "step": 202304 }, { "epoch": 15.187091932457786, "grad_norm": 0.5317997932434082, "learning_rate": 0.002828, "loss": 1.3818, "step": 202368 }, { "epoch": 15.191894934333959, "grad_norm": 0.55516117811203, "learning_rate": 0.002828, "loss": 1.3796, "step": 202432 }, { "epoch": 15.196697936210132, "grad_norm": 0.5858634114265442, "learning_rate": 0.002828, "loss": 1.3733, "step": 202496 }, { "epoch": 15.201500938086303, "grad_norm": 0.5299066305160522, "learning_rate": 0.002828, "loss": 1.3725, "step": 202560 }, { "epoch": 15.206303939962476, "grad_norm": 0.5147692561149597, "learning_rate": 0.002828, "loss": 1.3785, "step": 202624 }, { "epoch": 15.211106941838649, "grad_norm": 0.5491654276847839, "learning_rate": 0.002828, "loss": 1.3741, "step": 202688 }, { "epoch": 15.215909943714822, "grad_norm": 0.6498444080352783, "learning_rate": 0.002828, "loss": 1.38, "step": 202752 }, { "epoch": 15.220712945590995, "grad_norm": 0.4774509072303772, "learning_rate": 0.002828, "loss": 1.3837, "step": 202816 }, { "epoch": 15.225515947467168, "grad_norm": 0.46530207991600037, "learning_rate": 0.002828, "loss": 1.3756, "step": 202880 }, { "epoch": 15.23031894934334, "grad_norm": 0.47992247343063354, "learning_rate": 0.002828, "loss": 1.3722, "step": 202944 }, { "epoch": 15.235121951219512, "grad_norm": 0.6161879897117615, "learning_rate": 0.002828, "loss": 1.3792, "step": 203008 }, { "epoch": 15.239924953095684, "grad_norm": 0.5422763228416443, "learning_rate": 0.002828, "loss": 1.3738, "step": 203072 }, { "epoch": 15.244727954971857, "grad_norm": 0.4786650240421295, "learning_rate": 0.002828, "loss": 1.3798, "step": 203136 }, { "epoch": 15.24953095684803, "grad_norm": 0.5520129799842834, "learning_rate": 0.002828, "loss": 1.3786, "step": 203200 }, { "epoch": 15.254333958724203, "grad_norm": 0.5212474465370178, "learning_rate": 0.002828, "loss": 1.381, "step": 203264 }, { "epoch": 15.259136960600376, "grad_norm": 0.5647684931755066, "learning_rate": 0.002828, "loss": 1.3776, "step": 203328 }, { "epoch": 15.263939962476547, "grad_norm": 0.5040179491043091, "learning_rate": 0.002828, "loss": 1.3732, "step": 203392 }, { "epoch": 15.26874296435272, "grad_norm": 0.5457432866096497, "learning_rate": 0.002828, "loss": 1.3776, "step": 203456 }, { "epoch": 15.273545966228893, "grad_norm": 0.5505269765853882, "learning_rate": 0.002828, "loss": 1.3809, "step": 203520 }, { "epoch": 15.278348968105066, "grad_norm": 0.5592472553253174, "learning_rate": 0.002828, "loss": 1.3862, "step": 203584 }, { "epoch": 15.283151969981239, "grad_norm": 0.5437265634536743, "learning_rate": 0.002828, "loss": 1.3823, "step": 203648 }, { "epoch": 15.287954971857411, "grad_norm": 0.5756497383117676, "learning_rate": 0.002828, "loss": 1.3781, "step": 203712 }, { "epoch": 15.292757973733583, "grad_norm": 0.5972731709480286, "learning_rate": 0.002828, "loss": 1.3715, "step": 203776 }, { "epoch": 15.297560975609755, "grad_norm": 0.4744695723056793, "learning_rate": 0.002828, "loss": 1.3743, "step": 203840 }, { "epoch": 15.302363977485928, "grad_norm": 0.4611794352531433, "learning_rate": 0.002828, "loss": 1.3796, "step": 203904 }, { "epoch": 15.307166979362101, "grad_norm": 0.5892916917800903, "learning_rate": 0.002828, "loss": 1.3834, "step": 203968 }, { "epoch": 15.311969981238274, "grad_norm": 0.5543408989906311, "learning_rate": 0.002828, "loss": 1.3856, "step": 204032 }, { "epoch": 15.316772983114447, "grad_norm": 0.5828499794006348, "learning_rate": 0.002828, "loss": 1.3767, "step": 204096 }, { "epoch": 15.32157598499062, "grad_norm": 0.5696132779121399, "learning_rate": 0.002828, "loss": 1.3778, "step": 204160 }, { "epoch": 15.326378986866791, "grad_norm": 0.5638845562934875, "learning_rate": 0.002828, "loss": 1.382, "step": 204224 }, { "epoch": 15.331181988742964, "grad_norm": 0.49652546644210815, "learning_rate": 0.002828, "loss": 1.378, "step": 204288 }, { "epoch": 15.335984990619137, "grad_norm": 0.6045218706130981, "learning_rate": 0.002828, "loss": 1.3744, "step": 204352 }, { "epoch": 15.34078799249531, "grad_norm": 0.459846556186676, "learning_rate": 0.002828, "loss": 1.3842, "step": 204416 }, { "epoch": 15.345590994371483, "grad_norm": 0.5356844067573547, "learning_rate": 0.002828, "loss": 1.3824, "step": 204480 }, { "epoch": 15.350393996247655, "grad_norm": 0.565377950668335, "learning_rate": 0.002828, "loss": 1.3852, "step": 204544 }, { "epoch": 15.355196998123827, "grad_norm": 0.4540599584579468, "learning_rate": 0.002828, "loss": 1.3803, "step": 204608 }, { "epoch": 15.36, "grad_norm": 0.5405008792877197, "learning_rate": 0.002828, "loss": 1.3843, "step": 204672 }, { "epoch": 15.364803001876172, "grad_norm": 0.5116636753082275, "learning_rate": 0.002828, "loss": 1.376, "step": 204736 }, { "epoch": 15.369606003752345, "grad_norm": 0.5005480647087097, "learning_rate": 0.002828, "loss": 1.3815, "step": 204800 }, { "epoch": 15.374409005628518, "grad_norm": 0.5235862731933594, "learning_rate": 0.002828, "loss": 1.3831, "step": 204864 }, { "epoch": 15.379212007504691, "grad_norm": 0.5118006467819214, "learning_rate": 0.002828, "loss": 1.381, "step": 204928 }, { "epoch": 15.384015009380864, "grad_norm": 0.43683668971061707, "learning_rate": 0.002828, "loss": 1.3904, "step": 204992 }, { "epoch": 15.388818011257035, "grad_norm": 0.49627912044525146, "learning_rate": 0.002828, "loss": 1.3912, "step": 205056 }, { "epoch": 15.393621013133208, "grad_norm": 0.535225510597229, "learning_rate": 0.002828, "loss": 1.3858, "step": 205120 }, { "epoch": 15.39842401500938, "grad_norm": 0.565176248550415, "learning_rate": 0.002828, "loss": 1.3785, "step": 205184 }, { "epoch": 15.403227016885554, "grad_norm": 0.5722188949584961, "learning_rate": 0.002828, "loss": 1.3792, "step": 205248 }, { "epoch": 15.408030018761727, "grad_norm": 0.5208520889282227, "learning_rate": 0.002828, "loss": 1.3817, "step": 205312 }, { "epoch": 15.4128330206379, "grad_norm": 0.5423282980918884, "learning_rate": 0.002828, "loss": 1.3837, "step": 205376 }, { "epoch": 15.41763602251407, "grad_norm": 0.538154125213623, "learning_rate": 0.002828, "loss": 1.3803, "step": 205440 }, { "epoch": 15.422439024390243, "grad_norm": 0.6185964941978455, "learning_rate": 0.002828, "loss": 1.378, "step": 205504 }, { "epoch": 15.427242026266416, "grad_norm": 0.4758653938770294, "learning_rate": 0.002828, "loss": 1.3789, "step": 205568 }, { "epoch": 15.43204502814259, "grad_norm": 0.5673360824584961, "learning_rate": 0.002828, "loss": 1.3804, "step": 205632 }, { "epoch": 15.436848030018762, "grad_norm": 0.5129354000091553, "learning_rate": 0.002828, "loss": 1.3832, "step": 205696 }, { "epoch": 15.441651031894935, "grad_norm": 0.507262110710144, "learning_rate": 0.002828, "loss": 1.3807, "step": 205760 }, { "epoch": 15.446454033771108, "grad_norm": 0.5329039692878723, "learning_rate": 0.002828, "loss": 1.3827, "step": 205824 }, { "epoch": 15.451257035647279, "grad_norm": 0.48899421095848083, "learning_rate": 0.002828, "loss": 1.3803, "step": 205888 }, { "epoch": 15.456060037523452, "grad_norm": 0.5405635833740234, "learning_rate": 0.002828, "loss": 1.3748, "step": 205952 }, { "epoch": 15.460863039399625, "grad_norm": 0.506642758846283, "learning_rate": 0.002828, "loss": 1.384, "step": 206016 }, { "epoch": 15.465666041275798, "grad_norm": 0.4873145818710327, "learning_rate": 0.002828, "loss": 1.38, "step": 206080 }, { "epoch": 15.47046904315197, "grad_norm": 0.5708457231521606, "learning_rate": 0.002828, "loss": 1.3843, "step": 206144 }, { "epoch": 15.475272045028143, "grad_norm": 0.6255208849906921, "learning_rate": 0.002828, "loss": 1.3762, "step": 206208 }, { "epoch": 15.480075046904314, "grad_norm": 0.6195931434631348, "learning_rate": 0.002828, "loss": 1.383, "step": 206272 }, { "epoch": 15.484878048780487, "grad_norm": 0.611381471157074, "learning_rate": 0.002828, "loss": 1.3834, "step": 206336 }, { "epoch": 15.48968105065666, "grad_norm": 0.5173780918121338, "learning_rate": 0.002828, "loss": 1.3825, "step": 206400 }, { "epoch": 15.494484052532833, "grad_norm": 0.5662381052970886, "learning_rate": 0.002828, "loss": 1.382, "step": 206464 }, { "epoch": 15.499287054409006, "grad_norm": 0.6043580174446106, "learning_rate": 0.002828, "loss": 1.3785, "step": 206528 }, { "epoch": 15.504090056285179, "grad_norm": 0.5214434266090393, "learning_rate": 0.002828, "loss": 1.3803, "step": 206592 }, { "epoch": 15.508893058161352, "grad_norm": 0.5894580483436584, "learning_rate": 0.002828, "loss": 1.3859, "step": 206656 }, { "epoch": 15.513696060037523, "grad_norm": 0.5345766544342041, "learning_rate": 0.002828, "loss": 1.386, "step": 206720 }, { "epoch": 15.518499061913696, "grad_norm": 0.5783444046974182, "learning_rate": 0.002828, "loss": 1.3781, "step": 206784 }, { "epoch": 15.523302063789869, "grad_norm": 0.5113909244537354, "learning_rate": 0.002828, "loss": 1.3815, "step": 206848 }, { "epoch": 15.528105065666042, "grad_norm": 0.6762928366661072, "learning_rate": 0.002828, "loss": 1.3899, "step": 206912 }, { "epoch": 15.532908067542214, "grad_norm": 0.5763272047042847, "learning_rate": 0.002828, "loss": 1.3844, "step": 206976 }, { "epoch": 15.537711069418387, "grad_norm": 0.4625857174396515, "learning_rate": 0.002828, "loss": 1.3819, "step": 207040 }, { "epoch": 15.542514071294558, "grad_norm": 0.5895203948020935, "learning_rate": 0.002828, "loss": 1.3839, "step": 207104 }, { "epoch": 15.547317073170731, "grad_norm": 0.5495424270629883, "learning_rate": 0.002828, "loss": 1.3886, "step": 207168 }, { "epoch": 15.552120075046904, "grad_norm": 0.5420121550559998, "learning_rate": 0.002828, "loss": 1.3844, "step": 207232 }, { "epoch": 15.556923076923077, "grad_norm": 0.585555374622345, "learning_rate": 0.002828, "loss": 1.3834, "step": 207296 }, { "epoch": 15.56172607879925, "grad_norm": 0.5260416269302368, "learning_rate": 0.002828, "loss": 1.3823, "step": 207360 }, { "epoch": 15.566529080675423, "grad_norm": 0.7156786322593689, "learning_rate": 0.002828, "loss": 1.3841, "step": 207424 }, { "epoch": 15.571332082551594, "grad_norm": 0.4893474578857422, "learning_rate": 0.002828, "loss": 1.38, "step": 207488 }, { "epoch": 15.576135084427767, "grad_norm": 0.5773012638092041, "learning_rate": 0.002828, "loss": 1.3887, "step": 207552 }, { "epoch": 15.58093808630394, "grad_norm": 0.5502187013626099, "learning_rate": 0.002828, "loss": 1.3877, "step": 207616 }, { "epoch": 15.585741088180113, "grad_norm": 0.4841638505458832, "learning_rate": 0.002828, "loss": 1.3835, "step": 207680 }, { "epoch": 15.590544090056285, "grad_norm": 0.6301225423812866, "learning_rate": 0.002828, "loss": 1.3782, "step": 207744 }, { "epoch": 15.595347091932458, "grad_norm": 0.5887924432754517, "learning_rate": 0.002828, "loss": 1.3792, "step": 207808 }, { "epoch": 15.60015009380863, "grad_norm": 0.5262457728385925, "learning_rate": 0.002828, "loss": 1.3898, "step": 207872 }, { "epoch": 15.604953095684802, "grad_norm": 0.5991269946098328, "learning_rate": 0.002828, "loss": 1.3809, "step": 207936 }, { "epoch": 15.609756097560975, "grad_norm": 0.556826651096344, "learning_rate": 0.002828, "loss": 1.3833, "step": 208000 }, { "epoch": 15.614559099437148, "grad_norm": 0.6832888722419739, "learning_rate": 0.002828, "loss": 1.388, "step": 208064 }, { "epoch": 15.619362101313321, "grad_norm": 0.5342592000961304, "learning_rate": 0.002828, "loss": 1.3851, "step": 208128 }, { "epoch": 15.624165103189494, "grad_norm": 0.5431777834892273, "learning_rate": 0.002828, "loss": 1.3914, "step": 208192 }, { "epoch": 15.628968105065667, "grad_norm": 0.5456152558326721, "learning_rate": 0.002828, "loss": 1.3799, "step": 208256 }, { "epoch": 15.633771106941838, "grad_norm": 0.5626450181007385, "learning_rate": 0.002828, "loss": 1.386, "step": 208320 }, { "epoch": 15.63857410881801, "grad_norm": 0.5906189680099487, "learning_rate": 0.002828, "loss": 1.3872, "step": 208384 }, { "epoch": 15.643377110694184, "grad_norm": 0.6024333834648132, "learning_rate": 0.002828, "loss": 1.39, "step": 208448 }, { "epoch": 15.648180112570357, "grad_norm": 0.5341130495071411, "learning_rate": 0.002828, "loss": 1.3866, "step": 208512 }, { "epoch": 15.65298311444653, "grad_norm": 0.6072880625724792, "learning_rate": 0.002828, "loss": 1.3868, "step": 208576 }, { "epoch": 15.657786116322702, "grad_norm": 0.5600910782814026, "learning_rate": 0.002828, "loss": 1.3864, "step": 208640 }, { "epoch": 15.662589118198873, "grad_norm": 0.502774178981781, "learning_rate": 0.002828, "loss": 1.3875, "step": 208704 }, { "epoch": 15.667392120075046, "grad_norm": 0.6341469883918762, "learning_rate": 0.002828, "loss": 1.3903, "step": 208768 }, { "epoch": 15.67219512195122, "grad_norm": 0.5548173189163208, "learning_rate": 0.002828, "loss": 1.3833, "step": 208832 }, { "epoch": 15.676998123827392, "grad_norm": 0.4528850018978119, "learning_rate": 0.002828, "loss": 1.3881, "step": 208896 }, { "epoch": 15.681801125703565, "grad_norm": 0.6027002930641174, "learning_rate": 0.002828, "loss": 1.3802, "step": 208960 }, { "epoch": 15.686604127579738, "grad_norm": 0.5031800866127014, "learning_rate": 0.002828, "loss": 1.3938, "step": 209024 }, { "epoch": 15.69140712945591, "grad_norm": 0.650576114654541, "learning_rate": 0.002828, "loss": 1.3892, "step": 209088 }, { "epoch": 15.696210131332082, "grad_norm": 0.5973570346832275, "learning_rate": 0.002828, "loss": 1.386, "step": 209152 }, { "epoch": 15.701013133208255, "grad_norm": 0.5872775316238403, "learning_rate": 0.002828, "loss": 1.3855, "step": 209216 }, { "epoch": 15.705816135084428, "grad_norm": 0.6111817955970764, "learning_rate": 0.002828, "loss": 1.3825, "step": 209280 }, { "epoch": 15.7106191369606, "grad_norm": 0.5026399493217468, "learning_rate": 0.002828, "loss": 1.3855, "step": 209344 }, { "epoch": 15.715422138836773, "grad_norm": 0.5055294036865234, "learning_rate": 0.002828, "loss": 1.3843, "step": 209408 }, { "epoch": 15.720225140712946, "grad_norm": 0.510923445224762, "learning_rate": 0.002828, "loss": 1.3939, "step": 209472 }, { "epoch": 15.725028142589117, "grad_norm": 0.4886623024940491, "learning_rate": 0.002828, "loss": 1.3862, "step": 209536 }, { "epoch": 15.72983114446529, "grad_norm": 0.4989912211894989, "learning_rate": 0.002828, "loss": 1.375, "step": 209600 }, { "epoch": 15.734634146341463, "grad_norm": 0.5838048458099365, "learning_rate": 0.002828, "loss": 1.3845, "step": 209664 }, { "epoch": 15.739437148217636, "grad_norm": 0.5743147730827332, "learning_rate": 0.002828, "loss": 1.3854, "step": 209728 }, { "epoch": 15.744240150093809, "grad_norm": 0.5653936862945557, "learning_rate": 0.002828, "loss": 1.3794, "step": 209792 }, { "epoch": 15.749043151969982, "grad_norm": 0.5322737097740173, "learning_rate": 0.002828, "loss": 1.3866, "step": 209856 }, { "epoch": 15.753846153846155, "grad_norm": 0.5013566017150879, "learning_rate": 0.002828, "loss": 1.3853, "step": 209920 }, { "epoch": 15.758649155722326, "grad_norm": 0.5475387573242188, "learning_rate": 0.002828, "loss": 1.3605, "step": 209984 }, { "epoch": 15.763452157598499, "grad_norm": 0.5118324160575867, "learning_rate": 0.002828, "loss": 1.3574, "step": 210048 }, { "epoch": 15.768255159474672, "grad_norm": 0.5450679659843445, "learning_rate": 0.002828, "loss": 1.3605, "step": 210112 }, { "epoch": 15.773058161350844, "grad_norm": 0.5049896240234375, "learning_rate": 0.002828, "loss": 1.3572, "step": 210176 }, { "epoch": 15.777861163227017, "grad_norm": 0.6178387403488159, "learning_rate": 0.002828, "loss": 1.3619, "step": 210240 }, { "epoch": 15.78266416510319, "grad_norm": 0.5072968602180481, "learning_rate": 0.002828, "loss": 1.363, "step": 210304 }, { "epoch": 15.787467166979361, "grad_norm": 0.5722081661224365, "learning_rate": 0.002828, "loss": 1.3542, "step": 210368 }, { "epoch": 15.792270168855534, "grad_norm": 0.5220540165901184, "learning_rate": 0.002828, "loss": 1.365, "step": 210432 }, { "epoch": 15.797073170731707, "grad_norm": 0.4980974793434143, "learning_rate": 0.002828, "loss": 1.3593, "step": 210496 }, { "epoch": 15.80187617260788, "grad_norm": 0.5043259263038635, "learning_rate": 0.002828, "loss": 1.359, "step": 210560 }, { "epoch": 15.806679174484053, "grad_norm": 0.5757930278778076, "learning_rate": 0.002828, "loss": 1.3663, "step": 210624 }, { "epoch": 15.811482176360226, "grad_norm": 0.5797905921936035, "learning_rate": 0.002828, "loss": 1.3611, "step": 210688 }, { "epoch": 15.816285178236399, "grad_norm": 0.535588800907135, "learning_rate": 0.002828, "loss": 1.3659, "step": 210752 }, { "epoch": 15.82108818011257, "grad_norm": 0.5623985528945923, "learning_rate": 0.002828, "loss": 1.3637, "step": 210816 }, { "epoch": 15.825891181988743, "grad_norm": 0.587024986743927, "learning_rate": 0.002828, "loss": 1.366, "step": 210880 }, { "epoch": 15.830694183864916, "grad_norm": 0.5084807276725769, "learning_rate": 0.002828, "loss": 1.3615, "step": 210944 }, { "epoch": 15.835497185741088, "grad_norm": 0.5876424908638, "learning_rate": 0.002828, "loss": 1.3655, "step": 211008 }, { "epoch": 15.840300187617261, "grad_norm": 0.5620876550674438, "learning_rate": 0.002828, "loss": 1.3646, "step": 211072 }, { "epoch": 15.845103189493434, "grad_norm": 0.4719368815422058, "learning_rate": 0.002828, "loss": 1.3611, "step": 211136 }, { "epoch": 15.849906191369605, "grad_norm": 0.5494318008422852, "learning_rate": 0.002828, "loss": 1.368, "step": 211200 }, { "epoch": 15.854709193245778, "grad_norm": 0.4704643785953522, "learning_rate": 0.002828, "loss": 1.3666, "step": 211264 }, { "epoch": 15.859512195121951, "grad_norm": 0.588175892829895, "learning_rate": 0.002828, "loss": 1.3696, "step": 211328 }, { "epoch": 15.864315196998124, "grad_norm": 0.6054316759109497, "learning_rate": 0.002828, "loss": 1.3661, "step": 211392 }, { "epoch": 15.869118198874297, "grad_norm": 0.5715833902359009, "learning_rate": 0.002828, "loss": 1.3627, "step": 211456 }, { "epoch": 15.87392120075047, "grad_norm": 0.5927461981773376, "learning_rate": 0.002828, "loss": 1.3678, "step": 211520 }, { "epoch": 15.87872420262664, "grad_norm": 0.6130070090293884, "learning_rate": 0.002828, "loss": 1.3689, "step": 211584 }, { "epoch": 15.883527204502814, "grad_norm": 0.5040857791900635, "learning_rate": 0.002828, "loss": 1.3591, "step": 211648 }, { "epoch": 15.888330206378987, "grad_norm": 0.5852608680725098, "learning_rate": 0.002828, "loss": 1.3669, "step": 211712 }, { "epoch": 15.89313320825516, "grad_norm": 0.5842130184173584, "learning_rate": 0.002828, "loss": 1.3697, "step": 211776 }, { "epoch": 15.897936210131332, "grad_norm": 0.5647704005241394, "learning_rate": 0.002828, "loss": 1.3679, "step": 211840 }, { "epoch": 15.902739212007505, "grad_norm": 0.5339858531951904, "learning_rate": 0.002828, "loss": 1.3733, "step": 211904 }, { "epoch": 15.907542213883676, "grad_norm": 0.6253660321235657, "learning_rate": 0.002828, "loss": 1.3775, "step": 211968 }, { "epoch": 15.91234521575985, "grad_norm": 0.5627161860466003, "learning_rate": 0.002828, "loss": 1.363, "step": 212032 }, { "epoch": 15.917148217636022, "grad_norm": 0.508140504360199, "learning_rate": 0.002828, "loss": 1.3695, "step": 212096 }, { "epoch": 15.921951219512195, "grad_norm": 0.5700469017028809, "learning_rate": 0.002828, "loss": 1.3674, "step": 212160 }, { "epoch": 15.926754221388368, "grad_norm": 0.6160951256752014, "learning_rate": 0.002828, "loss": 1.3687, "step": 212224 }, { "epoch": 15.93155722326454, "grad_norm": 0.4808539152145386, "learning_rate": 0.002828, "loss": 1.3681, "step": 212288 }, { "epoch": 15.936360225140714, "grad_norm": 0.5854923129081726, "learning_rate": 0.002828, "loss": 1.3682, "step": 212352 }, { "epoch": 15.941163227016885, "grad_norm": 0.4344835877418518, "learning_rate": 0.002828, "loss": 1.3648, "step": 212416 }, { "epoch": 15.945966228893058, "grad_norm": 0.6398991346359253, "learning_rate": 0.002828, "loss": 1.3665, "step": 212480 }, { "epoch": 15.95076923076923, "grad_norm": 0.5153722763061523, "learning_rate": 0.002828, "loss": 1.3748, "step": 212544 }, { "epoch": 15.955572232645403, "grad_norm": 0.6487797498703003, "learning_rate": 0.002828, "loss": 1.3728, "step": 212608 }, { "epoch": 15.960375234521576, "grad_norm": 0.6281577348709106, "learning_rate": 0.002828, "loss": 1.3655, "step": 212672 }, { "epoch": 15.96517823639775, "grad_norm": 0.6675734519958496, "learning_rate": 0.002828, "loss": 1.3732, "step": 212736 }, { "epoch": 15.96998123827392, "grad_norm": 0.46980762481689453, "learning_rate": 0.002828, "loss": 1.3667, "step": 212800 }, { "epoch": 15.974784240150093, "grad_norm": 0.5400161147117615, "learning_rate": 0.002828, "loss": 1.3732, "step": 212864 }, { "epoch": 15.979587242026266, "grad_norm": 0.5441579222679138, "learning_rate": 0.002828, "loss": 1.3732, "step": 212928 }, { "epoch": 15.984390243902439, "grad_norm": 0.48642051219940186, "learning_rate": 0.002828, "loss": 1.3668, "step": 212992 }, { "epoch": 15.989193245778612, "grad_norm": 0.4595506489276886, "learning_rate": 0.002828, "loss": 1.3701, "step": 213056 }, { "epoch": 15.993996247654785, "grad_norm": 0.5701270699501038, "learning_rate": 0.002828, "loss": 1.3703, "step": 213120 }, { "epoch": 15.998799249530958, "grad_norm": 0.48854881525039673, "learning_rate": 0.002828, "loss": 1.3723, "step": 213184 }, { "epoch": 16.00360225140713, "grad_norm": 0.6080936193466187, "learning_rate": 0.002828, "loss": 1.3561, "step": 213248 }, { "epoch": 16.008405253283303, "grad_norm": 0.5640507936477661, "learning_rate": 0.002828, "loss": 1.3516, "step": 213312 }, { "epoch": 16.013208255159476, "grad_norm": 0.5358575582504272, "learning_rate": 0.002828, "loss": 1.3571, "step": 213376 }, { "epoch": 16.018011257035646, "grad_norm": 0.5076690316200256, "learning_rate": 0.002828, "loss": 1.3547, "step": 213440 }, { "epoch": 16.02281425891182, "grad_norm": 0.5925807356834412, "learning_rate": 0.002828, "loss": 1.3598, "step": 213504 }, { "epoch": 16.02761726078799, "grad_norm": 0.5533409714698792, "learning_rate": 0.002828, "loss": 1.3533, "step": 213568 }, { "epoch": 16.032420262664164, "grad_norm": 0.507273256778717, "learning_rate": 0.002828, "loss": 1.3521, "step": 213632 }, { "epoch": 16.037223264540337, "grad_norm": 0.5224199891090393, "learning_rate": 0.002828, "loss": 1.3587, "step": 213696 }, { "epoch": 16.04202626641651, "grad_norm": 0.5440003275871277, "learning_rate": 0.002828, "loss": 1.354, "step": 213760 }, { "epoch": 16.046829268292683, "grad_norm": 0.5843242406845093, "learning_rate": 0.002828, "loss": 1.364, "step": 213824 }, { "epoch": 16.051632270168856, "grad_norm": 0.5096895694732666, "learning_rate": 0.002828, "loss": 1.3611, "step": 213888 }, { "epoch": 16.05643527204503, "grad_norm": 0.6344131827354431, "learning_rate": 0.002828, "loss": 1.3617, "step": 213952 }, { "epoch": 16.0612382739212, "grad_norm": 0.6061226725578308, "learning_rate": 0.002828, "loss": 1.3586, "step": 214016 }, { "epoch": 16.066041275797375, "grad_norm": 0.5750422477722168, "learning_rate": 0.002828, "loss": 1.3576, "step": 214080 }, { "epoch": 16.070844277673547, "grad_norm": 0.495776891708374, "learning_rate": 0.002828, "loss": 1.3573, "step": 214144 }, { "epoch": 16.07564727954972, "grad_norm": 0.4955301582813263, "learning_rate": 0.002828, "loss": 1.3589, "step": 214208 }, { "epoch": 16.08045028142589, "grad_norm": 0.6393030881881714, "learning_rate": 0.002828, "loss": 1.3622, "step": 214272 }, { "epoch": 16.085253283302062, "grad_norm": 0.5954785346984863, "learning_rate": 0.002828, "loss": 1.3577, "step": 214336 }, { "epoch": 16.090056285178235, "grad_norm": 0.5928305983543396, "learning_rate": 0.002828, "loss": 1.3551, "step": 214400 }, { "epoch": 16.09485928705441, "grad_norm": 0.5459696650505066, "learning_rate": 0.002828, "loss": 1.3643, "step": 214464 }, { "epoch": 16.09966228893058, "grad_norm": 0.7050192952156067, "learning_rate": 0.002828, "loss": 1.3586, "step": 214528 }, { "epoch": 16.104465290806754, "grad_norm": 0.5199483633041382, "learning_rate": 0.002828, "loss": 1.3611, "step": 214592 }, { "epoch": 16.109268292682927, "grad_norm": 0.4743548333644867, "learning_rate": 0.002828, "loss": 1.3612, "step": 214656 }, { "epoch": 16.1140712945591, "grad_norm": 0.5115127563476562, "learning_rate": 0.002828, "loss": 1.3634, "step": 214720 }, { "epoch": 16.118874296435273, "grad_norm": 0.5872589945793152, "learning_rate": 0.002828, "loss": 1.3626, "step": 214784 }, { "epoch": 16.123677298311446, "grad_norm": 0.5724947452545166, "learning_rate": 0.002828, "loss": 1.3645, "step": 214848 }, { "epoch": 16.12848030018762, "grad_norm": 0.6491893529891968, "learning_rate": 0.002828, "loss": 1.3583, "step": 214912 }, { "epoch": 16.13328330206379, "grad_norm": 0.579578697681427, "learning_rate": 0.002828, "loss": 1.3671, "step": 214976 }, { "epoch": 16.138086303939964, "grad_norm": 0.5547657608985901, "learning_rate": 0.002828, "loss": 1.3614, "step": 215040 }, { "epoch": 16.142889305816134, "grad_norm": 0.6079813241958618, "learning_rate": 0.002828, "loss": 1.3577, "step": 215104 }, { "epoch": 16.147692307692306, "grad_norm": 0.6912538409233093, "learning_rate": 0.002828, "loss": 1.3616, "step": 215168 }, { "epoch": 16.15249530956848, "grad_norm": 0.5580403208732605, "learning_rate": 0.002828, "loss": 1.3637, "step": 215232 }, { "epoch": 16.157298311444652, "grad_norm": 0.6073527932167053, "learning_rate": 0.002828, "loss": 1.3685, "step": 215296 }, { "epoch": 16.162101313320825, "grad_norm": 0.5496214032173157, "learning_rate": 0.002828, "loss": 1.3663, "step": 215360 }, { "epoch": 16.166904315196998, "grad_norm": 0.47354716062545776, "learning_rate": 0.002828, "loss": 1.3676, "step": 215424 }, { "epoch": 16.17170731707317, "grad_norm": 0.607663094997406, "learning_rate": 0.002828, "loss": 1.3666, "step": 215488 }, { "epoch": 16.176510318949344, "grad_norm": 0.5812382102012634, "learning_rate": 0.002828, "loss": 1.3652, "step": 215552 }, { "epoch": 16.181313320825517, "grad_norm": 0.713192880153656, "learning_rate": 0.002828, "loss": 1.3691, "step": 215616 }, { "epoch": 16.18611632270169, "grad_norm": 0.5395285487174988, "learning_rate": 0.002828, "loss": 1.3642, "step": 215680 }, { "epoch": 16.190919324577862, "grad_norm": 0.5327445268630981, "learning_rate": 0.002828, "loss": 1.3648, "step": 215744 }, { "epoch": 16.195722326454035, "grad_norm": 0.6013232469558716, "learning_rate": 0.002828, "loss": 1.3677, "step": 215808 }, { "epoch": 16.200525328330205, "grad_norm": 0.5524231195449829, "learning_rate": 0.002828, "loss": 1.3631, "step": 215872 }, { "epoch": 16.205328330206378, "grad_norm": 0.4762788712978363, "learning_rate": 0.002828, "loss": 1.3605, "step": 215936 }, { "epoch": 16.21013133208255, "grad_norm": 0.4953647553920746, "learning_rate": 0.002828, "loss": 1.3656, "step": 216000 }, { "epoch": 16.214934333958723, "grad_norm": 0.5233112573623657, "learning_rate": 0.002828, "loss": 1.3685, "step": 216064 }, { "epoch": 16.219737335834896, "grad_norm": 0.5747987031936646, "learning_rate": 0.002828, "loss": 1.3609, "step": 216128 }, { "epoch": 16.22454033771107, "grad_norm": 0.5410612225532532, "learning_rate": 0.002828, "loss": 1.364, "step": 216192 }, { "epoch": 16.229343339587242, "grad_norm": 0.5992555022239685, "learning_rate": 0.002828, "loss": 1.3719, "step": 216256 }, { "epoch": 16.234146341463415, "grad_norm": 0.6096734404563904, "learning_rate": 0.002828, "loss": 1.3628, "step": 216320 }, { "epoch": 16.238949343339588, "grad_norm": 0.5461161732673645, "learning_rate": 0.002828, "loss": 1.3621, "step": 216384 }, { "epoch": 16.24375234521576, "grad_norm": 0.552280068397522, "learning_rate": 0.002828, "loss": 1.3694, "step": 216448 }, { "epoch": 16.248555347091933, "grad_norm": 0.6835575103759766, "learning_rate": 0.002828, "loss": 1.3676, "step": 216512 }, { "epoch": 16.253358348968106, "grad_norm": 0.5092332363128662, "learning_rate": 0.002828, "loss": 1.3701, "step": 216576 }, { "epoch": 16.25816135084428, "grad_norm": 0.582518458366394, "learning_rate": 0.002828, "loss": 1.3665, "step": 216640 }, { "epoch": 16.26296435272045, "grad_norm": 0.5306048393249512, "learning_rate": 0.002828, "loss": 1.3657, "step": 216704 }, { "epoch": 16.26776735459662, "grad_norm": 0.6176237463951111, "learning_rate": 0.002828, "loss": 1.3717, "step": 216768 }, { "epoch": 16.272570356472794, "grad_norm": 0.5232059359550476, "learning_rate": 0.002828, "loss": 1.3675, "step": 216832 }, { "epoch": 16.277373358348967, "grad_norm": 0.6212632060050964, "learning_rate": 0.002828, "loss": 1.3663, "step": 216896 }, { "epoch": 16.28217636022514, "grad_norm": 0.4637361168861389, "learning_rate": 0.002828, "loss": 1.3652, "step": 216960 }, { "epoch": 16.286979362101313, "grad_norm": 0.5681953430175781, "learning_rate": 0.002828, "loss": 1.3757, "step": 217024 }, { "epoch": 16.291782363977486, "grad_norm": 0.5358713865280151, "learning_rate": 0.002828, "loss": 1.3647, "step": 217088 }, { "epoch": 16.29658536585366, "grad_norm": 0.5948871970176697, "learning_rate": 0.002828, "loss": 1.365, "step": 217152 }, { "epoch": 16.30138836772983, "grad_norm": 0.553391695022583, "learning_rate": 0.002828, "loss": 1.3698, "step": 217216 }, { "epoch": 16.306191369606005, "grad_norm": 0.6753780245780945, "learning_rate": 0.002828, "loss": 1.3693, "step": 217280 }, { "epoch": 16.310994371482177, "grad_norm": 0.5137534737586975, "learning_rate": 0.002828, "loss": 1.3696, "step": 217344 }, { "epoch": 16.31579737335835, "grad_norm": 0.5785669684410095, "learning_rate": 0.002828, "loss": 1.3699, "step": 217408 }, { "epoch": 16.320600375234523, "grad_norm": 0.45743459463119507, "learning_rate": 0.002828, "loss": 1.3691, "step": 217472 }, { "epoch": 16.325403377110693, "grad_norm": 0.6033856272697449, "learning_rate": 0.002828, "loss": 1.3697, "step": 217536 }, { "epoch": 16.330206378986865, "grad_norm": 0.5758998394012451, "learning_rate": 0.002828, "loss": 1.3687, "step": 217600 }, { "epoch": 16.33500938086304, "grad_norm": 0.5502091646194458, "learning_rate": 0.002828, "loss": 1.3664, "step": 217664 }, { "epoch": 16.33981238273921, "grad_norm": 0.530497670173645, "learning_rate": 0.002828, "loss": 1.3658, "step": 217728 }, { "epoch": 16.344615384615384, "grad_norm": 0.5255142450332642, "learning_rate": 0.002828, "loss": 1.3703, "step": 217792 }, { "epoch": 16.349418386491557, "grad_norm": 0.49928170442581177, "learning_rate": 0.002828, "loss": 1.3662, "step": 217856 }, { "epoch": 16.35422138836773, "grad_norm": 0.6465878486633301, "learning_rate": 0.002828, "loss": 1.3705, "step": 217920 }, { "epoch": 16.359024390243903, "grad_norm": 0.5199236869812012, "learning_rate": 0.002828, "loss": 1.3671, "step": 217984 }, { "epoch": 16.363827392120076, "grad_norm": 0.4599873721599579, "learning_rate": 0.002828, "loss": 1.3764, "step": 218048 }, { "epoch": 16.36863039399625, "grad_norm": 0.5065380930900574, "learning_rate": 0.002828, "loss": 1.3682, "step": 218112 }, { "epoch": 16.37343339587242, "grad_norm": 0.5988659262657166, "learning_rate": 0.002828, "loss": 1.3698, "step": 218176 }, { "epoch": 16.378236397748594, "grad_norm": 0.6505783796310425, "learning_rate": 0.002828, "loss": 1.366, "step": 218240 }, { "epoch": 16.383039399624767, "grad_norm": 0.5402714014053345, "learning_rate": 0.002828, "loss": 1.3703, "step": 218304 }, { "epoch": 16.387842401500937, "grad_norm": 0.5108478665351868, "learning_rate": 0.002828, "loss": 1.368, "step": 218368 }, { "epoch": 16.39264540337711, "grad_norm": 0.5588182210922241, "learning_rate": 0.002828, "loss": 1.3725, "step": 218432 }, { "epoch": 16.397448405253282, "grad_norm": 0.5530375838279724, "learning_rate": 0.002828, "loss": 1.3701, "step": 218496 }, { "epoch": 16.402251407129455, "grad_norm": 0.5329497456550598, "learning_rate": 0.002828, "loss": 1.3756, "step": 218560 }, { "epoch": 16.407054409005628, "grad_norm": 0.6167603731155396, "learning_rate": 0.002828, "loss": 1.3802, "step": 218624 }, { "epoch": 16.4118574108818, "grad_norm": 0.5216876268386841, "learning_rate": 0.002828, "loss": 1.3763, "step": 218688 }, { "epoch": 16.416660412757974, "grad_norm": 0.5277157425880432, "learning_rate": 0.002828, "loss": 1.3644, "step": 218752 }, { "epoch": 16.421463414634147, "grad_norm": 0.5166988372802734, "learning_rate": 0.002828, "loss": 1.3719, "step": 218816 }, { "epoch": 16.42626641651032, "grad_norm": 0.5149502754211426, "learning_rate": 0.002828, "loss": 1.3742, "step": 218880 }, { "epoch": 16.431069418386492, "grad_norm": 0.48137810826301575, "learning_rate": 0.002828, "loss": 1.3733, "step": 218944 }, { "epoch": 16.435872420262665, "grad_norm": 0.5344496369361877, "learning_rate": 0.002828, "loss": 1.3716, "step": 219008 }, { "epoch": 16.44067542213884, "grad_norm": 0.5200846791267395, "learning_rate": 0.002828, "loss": 1.3789, "step": 219072 }, { "epoch": 16.445478424015008, "grad_norm": 0.6055989861488342, "learning_rate": 0.002828, "loss": 1.3712, "step": 219136 }, { "epoch": 16.45028142589118, "grad_norm": 0.5656676888465881, "learning_rate": 0.002828, "loss": 1.369, "step": 219200 }, { "epoch": 16.455084427767353, "grad_norm": 0.5064542293548584, "learning_rate": 0.002828, "loss": 1.3709, "step": 219264 }, { "epoch": 16.459887429643526, "grad_norm": 0.49400073289871216, "learning_rate": 0.002828, "loss": 1.3781, "step": 219328 }, { "epoch": 16.4646904315197, "grad_norm": 0.5486621260643005, "learning_rate": 0.002828, "loss": 1.3748, "step": 219392 }, { "epoch": 16.469493433395872, "grad_norm": 0.5221492052078247, "learning_rate": 0.002828, "loss": 1.3722, "step": 219456 }, { "epoch": 16.474296435272045, "grad_norm": 0.6707462668418884, "learning_rate": 0.002828, "loss": 1.3674, "step": 219520 }, { "epoch": 16.479099437148218, "grad_norm": 0.5669146776199341, "learning_rate": 0.002828, "loss": 1.3685, "step": 219584 }, { "epoch": 16.48390243902439, "grad_norm": 0.5900082588195801, "learning_rate": 0.002828, "loss": 1.3724, "step": 219648 }, { "epoch": 16.488705440900564, "grad_norm": 0.4715104103088379, "learning_rate": 0.002828, "loss": 1.3763, "step": 219712 }, { "epoch": 16.493508442776736, "grad_norm": 0.6620226502418518, "learning_rate": 0.002828, "loss": 1.3711, "step": 219776 }, { "epoch": 16.49831144465291, "grad_norm": 0.5841343402862549, "learning_rate": 0.002828, "loss": 1.374, "step": 219840 }, { "epoch": 16.503114446529082, "grad_norm": 0.5079573392868042, "learning_rate": 0.002828, "loss": 1.3735, "step": 219904 }, { "epoch": 16.50791744840525, "grad_norm": 0.6196205019950867, "learning_rate": 0.002828, "loss": 1.3734, "step": 219968 }, { "epoch": 16.512720450281424, "grad_norm": 0.5488569736480713, "learning_rate": 0.002828, "loss": 1.377, "step": 220032 }, { "epoch": 16.517523452157597, "grad_norm": 0.5166944861412048, "learning_rate": 0.002828, "loss": 1.3781, "step": 220096 }, { "epoch": 16.52232645403377, "grad_norm": 0.5003791451454163, "learning_rate": 0.002828, "loss": 1.3772, "step": 220160 }, { "epoch": 16.527129455909943, "grad_norm": 0.5646136403083801, "learning_rate": 0.002828, "loss": 1.374, "step": 220224 }, { "epoch": 16.531932457786116, "grad_norm": 0.4585341811180115, "learning_rate": 0.002828, "loss": 1.3738, "step": 220288 }, { "epoch": 16.53673545966229, "grad_norm": 0.5085334181785583, "learning_rate": 0.002828, "loss": 1.3713, "step": 220352 }, { "epoch": 16.54153846153846, "grad_norm": 0.6526809930801392, "learning_rate": 0.002828, "loss": 1.3746, "step": 220416 }, { "epoch": 16.546341463414635, "grad_norm": 0.5648365020751953, "learning_rate": 0.002828, "loss": 1.3766, "step": 220480 }, { "epoch": 16.551144465290808, "grad_norm": 0.6174665689468384, "learning_rate": 0.002828, "loss": 1.3688, "step": 220544 }, { "epoch": 16.55594746716698, "grad_norm": 0.5618147253990173, "learning_rate": 0.002828, "loss": 1.3718, "step": 220608 }, { "epoch": 16.560750469043153, "grad_norm": 0.5534228682518005, "learning_rate": 0.002828, "loss": 1.3731, "step": 220672 }, { "epoch": 16.565553470919326, "grad_norm": 0.5594824552536011, "learning_rate": 0.002828, "loss": 1.3691, "step": 220736 }, { "epoch": 16.570356472795496, "grad_norm": 0.48453760147094727, "learning_rate": 0.002828, "loss": 1.3733, "step": 220800 }, { "epoch": 16.57515947467167, "grad_norm": 0.58493572473526, "learning_rate": 0.002828, "loss": 1.3763, "step": 220864 }, { "epoch": 16.57996247654784, "grad_norm": 0.5082259774208069, "learning_rate": 0.002828, "loss": 1.368, "step": 220928 }, { "epoch": 16.584765478424014, "grad_norm": 0.6410751342773438, "learning_rate": 0.002828, "loss": 1.3764, "step": 220992 }, { "epoch": 16.589568480300187, "grad_norm": 0.6781150698661804, "learning_rate": 0.002828, "loss": 1.3693, "step": 221056 }, { "epoch": 16.59437148217636, "grad_norm": 0.5025544762611389, "learning_rate": 0.002828, "loss": 1.3664, "step": 221120 }, { "epoch": 16.599174484052533, "grad_norm": 0.48140037059783936, "learning_rate": 0.002828, "loss": 1.3721, "step": 221184 }, { "epoch": 16.603977485928706, "grad_norm": 0.6274412274360657, "learning_rate": 0.002828, "loss": 1.3723, "step": 221248 }, { "epoch": 16.60878048780488, "grad_norm": 0.5692063570022583, "learning_rate": 0.002828, "loss": 1.3746, "step": 221312 }, { "epoch": 16.61358348968105, "grad_norm": 0.5218042135238647, "learning_rate": 0.002828, "loss": 1.371, "step": 221376 }, { "epoch": 16.618386491557224, "grad_norm": 0.5552325248718262, "learning_rate": 0.002828, "loss": 1.3761, "step": 221440 }, { "epoch": 16.623189493433397, "grad_norm": 0.5654879212379456, "learning_rate": 0.002828, "loss": 1.3727, "step": 221504 }, { "epoch": 16.62799249530957, "grad_norm": 0.5669631958007812, "learning_rate": 0.002828, "loss": 1.3835, "step": 221568 }, { "epoch": 16.63279549718574, "grad_norm": 0.669795572757721, "learning_rate": 0.002828, "loss": 1.3811, "step": 221632 }, { "epoch": 16.637598499061912, "grad_norm": 0.5432254076004028, "learning_rate": 0.002828, "loss": 1.3763, "step": 221696 }, { "epoch": 16.642401500938085, "grad_norm": 0.5432413220405579, "learning_rate": 0.002828, "loss": 1.3776, "step": 221760 }, { "epoch": 16.647204502814258, "grad_norm": 0.42493462562561035, "learning_rate": 0.002828, "loss": 1.3737, "step": 221824 }, { "epoch": 16.65200750469043, "grad_norm": 0.6230249404907227, "learning_rate": 0.002828, "loss": 1.3781, "step": 221888 }, { "epoch": 16.656810506566604, "grad_norm": 0.5844151973724365, "learning_rate": 0.002828, "loss": 1.3718, "step": 221952 }, { "epoch": 16.661613508442777, "grad_norm": 0.5197720527648926, "learning_rate": 0.002828, "loss": 1.3752, "step": 222016 }, { "epoch": 16.66641651031895, "grad_norm": 0.5851918458938599, "learning_rate": 0.002828, "loss": 1.3775, "step": 222080 }, { "epoch": 16.671219512195123, "grad_norm": 0.5978190898895264, "learning_rate": 0.002828, "loss": 1.3683, "step": 222144 }, { "epoch": 16.676022514071295, "grad_norm": 0.5710034370422363, "learning_rate": 0.002828, "loss": 1.3705, "step": 222208 }, { "epoch": 16.68082551594747, "grad_norm": 0.55964195728302, "learning_rate": 0.002828, "loss": 1.3699, "step": 222272 }, { "epoch": 16.68562851782364, "grad_norm": 0.6255779266357422, "learning_rate": 0.002828, "loss": 1.3712, "step": 222336 }, { "epoch": 16.690431519699814, "grad_norm": 0.5409938097000122, "learning_rate": 0.002828, "loss": 1.3798, "step": 222400 }, { "epoch": 16.695234521575983, "grad_norm": 0.5851971507072449, "learning_rate": 0.002828, "loss": 1.3754, "step": 222464 }, { "epoch": 16.700037523452156, "grad_norm": 0.5326768159866333, "learning_rate": 0.002828, "loss": 1.3707, "step": 222528 }, { "epoch": 16.70484052532833, "grad_norm": 0.5734680891036987, "learning_rate": 0.002828, "loss": 1.3704, "step": 222592 }, { "epoch": 16.709643527204502, "grad_norm": 0.5416348576545715, "learning_rate": 0.002828, "loss": 1.3777, "step": 222656 }, { "epoch": 16.714446529080675, "grad_norm": 0.49465447664260864, "learning_rate": 0.002828, "loss": 1.3753, "step": 222720 }, { "epoch": 16.719249530956848, "grad_norm": 0.4981372654438019, "learning_rate": 0.002828, "loss": 1.3759, "step": 222784 }, { "epoch": 16.72405253283302, "grad_norm": 0.5880029201507568, "learning_rate": 0.002828, "loss": 1.3668, "step": 222848 }, { "epoch": 16.728855534709194, "grad_norm": 0.5089569091796875, "learning_rate": 0.002828, "loss": 1.375, "step": 222912 }, { "epoch": 16.733658536585367, "grad_norm": 0.576672375202179, "learning_rate": 0.002828, "loss": 1.373, "step": 222976 }, { "epoch": 16.73846153846154, "grad_norm": 0.592309296131134, "learning_rate": 0.002828, "loss": 1.371, "step": 223040 }, { "epoch": 16.743264540337712, "grad_norm": 0.5511286854743958, "learning_rate": 0.002828, "loss": 1.3758, "step": 223104 }, { "epoch": 16.748067542213885, "grad_norm": 0.5695531368255615, "learning_rate": 0.002828, "loss": 1.3713, "step": 223168 }, { "epoch": 16.752870544090058, "grad_norm": 0.6317942142486572, "learning_rate": 0.002828, "loss": 1.3765, "step": 223232 }, { "epoch": 16.757673545966227, "grad_norm": 0.5613177418708801, "learning_rate": 0.002828, "loss": 1.3727, "step": 223296 }, { "epoch": 16.7624765478424, "grad_norm": 0.6550566554069519, "learning_rate": 0.002828, "loss": 1.3751, "step": 223360 }, { "epoch": 16.767279549718573, "grad_norm": 0.559780478477478, "learning_rate": 0.002828, "loss": 1.375, "step": 223424 }, { "epoch": 16.772082551594746, "grad_norm": 0.5176964402198792, "learning_rate": 0.002828, "loss": 1.3682, "step": 223488 }, { "epoch": 16.77688555347092, "grad_norm": 0.5380821824073792, "learning_rate": 0.002828, "loss": 1.3715, "step": 223552 }, { "epoch": 16.781688555347092, "grad_norm": 0.5392264723777771, "learning_rate": 0.002828, "loss": 1.3785, "step": 223616 }, { "epoch": 16.786491557223265, "grad_norm": 0.5847867727279663, "learning_rate": 0.002828, "loss": 1.3732, "step": 223680 }, { "epoch": 16.791294559099438, "grad_norm": 0.564135730266571, "learning_rate": 0.002828, "loss": 1.3753, "step": 223744 }, { "epoch": 16.79609756097561, "grad_norm": 0.5908150672912598, "learning_rate": 0.002828, "loss": 1.3724, "step": 223808 }, { "epoch": 16.800900562851783, "grad_norm": 0.5761885643005371, "learning_rate": 0.002828, "loss": 1.3764, "step": 223872 }, { "epoch": 16.805703564727956, "grad_norm": 0.5740852355957031, "learning_rate": 0.002828, "loss": 1.3798, "step": 223936 }, { "epoch": 16.81050656660413, "grad_norm": 0.49333980679512024, "learning_rate": 0.002828, "loss": 1.3788, "step": 224000 }, { "epoch": 16.815309568480302, "grad_norm": 0.4524737000465393, "learning_rate": 0.002828, "loss": 1.3672, "step": 224064 }, { "epoch": 16.82011257035647, "grad_norm": 0.5872974991798401, "learning_rate": 0.002828, "loss": 1.3792, "step": 224128 }, { "epoch": 16.824915572232644, "grad_norm": 0.6633861660957336, "learning_rate": 0.002828, "loss": 1.3736, "step": 224192 }, { "epoch": 16.829718574108817, "grad_norm": 0.7240063548088074, "learning_rate": 0.002828, "loss": 1.3703, "step": 224256 }, { "epoch": 16.83452157598499, "grad_norm": 0.5940957069396973, "learning_rate": 0.002828, "loss": 1.3753, "step": 224320 }, { "epoch": 16.839324577861163, "grad_norm": 0.5730873346328735, "learning_rate": 0.002828, "loss": 1.3767, "step": 224384 }, { "epoch": 16.844127579737336, "grad_norm": 0.6729320883750916, "learning_rate": 0.002828, "loss": 1.3753, "step": 224448 }, { "epoch": 16.84893058161351, "grad_norm": 0.5936250686645508, "learning_rate": 0.002828, "loss": 1.3726, "step": 224512 }, { "epoch": 16.85373358348968, "grad_norm": 0.546510636806488, "learning_rate": 0.002828, "loss": 1.3723, "step": 224576 }, { "epoch": 16.858536585365854, "grad_norm": 0.6515787839889526, "learning_rate": 0.002828, "loss": 1.3709, "step": 224640 }, { "epoch": 16.863339587242027, "grad_norm": 0.455196350812912, "learning_rate": 0.002828, "loss": 1.3698, "step": 224704 }, { "epoch": 16.8681425891182, "grad_norm": 0.498130202293396, "learning_rate": 0.002828, "loss": 1.3744, "step": 224768 }, { "epoch": 16.872945590994373, "grad_norm": 0.4296855926513672, "learning_rate": 0.002828, "loss": 1.3725, "step": 224832 }, { "epoch": 16.877748592870542, "grad_norm": 0.47755885124206543, "learning_rate": 0.002828, "loss": 1.3778, "step": 224896 }, { "epoch": 16.882551594746715, "grad_norm": 0.5465676188468933, "learning_rate": 0.002828, "loss": 1.3782, "step": 224960 }, { "epoch": 16.887354596622888, "grad_norm": 0.6344484686851501, "learning_rate": 0.002828, "loss": 1.3742, "step": 225024 }, { "epoch": 16.89215759849906, "grad_norm": 0.5083233714103699, "learning_rate": 0.002828, "loss": 1.3718, "step": 225088 }, { "epoch": 16.896960600375234, "grad_norm": 0.5428247451782227, "learning_rate": 0.002828, "loss": 1.375, "step": 225152 }, { "epoch": 16.901763602251407, "grad_norm": 0.47932419180870056, "learning_rate": 0.002828, "loss": 1.3739, "step": 225216 }, { "epoch": 16.90656660412758, "grad_norm": 0.534848153591156, "learning_rate": 0.002828, "loss": 1.3779, "step": 225280 }, { "epoch": 16.911369606003753, "grad_norm": 0.5262565612792969, "learning_rate": 0.002828, "loss": 1.3792, "step": 225344 }, { "epoch": 16.916172607879925, "grad_norm": 0.5471115112304688, "learning_rate": 0.002828, "loss": 1.3732, "step": 225408 }, { "epoch": 16.9209756097561, "grad_norm": 0.5170307755470276, "learning_rate": 0.002828, "loss": 1.3742, "step": 225472 }, { "epoch": 16.92577861163227, "grad_norm": 0.491405189037323, "learning_rate": 0.002828, "loss": 1.3755, "step": 225536 }, { "epoch": 16.930581613508444, "grad_norm": 0.5076206922531128, "learning_rate": 0.002828, "loss": 1.3717, "step": 225600 }, { "epoch": 16.935384615384617, "grad_norm": 0.6649317741394043, "learning_rate": 0.002828, "loss": 1.3787, "step": 225664 }, { "epoch": 16.940187617260786, "grad_norm": 0.5338141918182373, "learning_rate": 0.002828, "loss": 1.3786, "step": 225728 }, { "epoch": 16.94499061913696, "grad_norm": 0.4969607889652252, "learning_rate": 0.002828, "loss": 1.3772, "step": 225792 }, { "epoch": 16.949793621013132, "grad_norm": 0.6936911344528198, "learning_rate": 0.002828, "loss": 1.3713, "step": 225856 }, { "epoch": 16.954596622889305, "grad_norm": 0.5464868545532227, "learning_rate": 0.002828, "loss": 1.3798, "step": 225920 }, { "epoch": 16.959399624765478, "grad_norm": 0.4734916687011719, "learning_rate": 0.002828, "loss": 1.3737, "step": 225984 }, { "epoch": 16.96420262664165, "grad_norm": 0.5360217094421387, "learning_rate": 0.002828, "loss": 1.3718, "step": 226048 }, { "epoch": 16.969005628517824, "grad_norm": 0.5119659304618835, "learning_rate": 0.002828, "loss": 1.3771, "step": 226112 }, { "epoch": 16.973808630393997, "grad_norm": 0.6373065114021301, "learning_rate": 0.002828, "loss": 1.3751, "step": 226176 }, { "epoch": 16.97861163227017, "grad_norm": 0.6539568305015564, "learning_rate": 0.002828, "loss": 1.3789, "step": 226240 }, { "epoch": 16.983414634146342, "grad_norm": 0.5388275384902954, "learning_rate": 0.002828, "loss": 1.3793, "step": 226304 }, { "epoch": 16.988217636022515, "grad_norm": 0.5029416680335999, "learning_rate": 0.002828, "loss": 1.3769, "step": 226368 }, { "epoch": 16.993020637898688, "grad_norm": 0.6130713820457458, "learning_rate": 0.002828, "loss": 1.3824, "step": 226432 }, { "epoch": 16.99782363977486, "grad_norm": 0.5726588368415833, "learning_rate": 0.002828, "loss": 1.3749, "step": 226496 }, { "epoch": 17.00262664165103, "grad_norm": 0.5122669339179993, "learning_rate": 0.002828, "loss": 1.3472, "step": 226560 }, { "epoch": 17.007429643527203, "grad_norm": 0.5689617395401001, "learning_rate": 0.002828, "loss": 1.3311, "step": 226624 }, { "epoch": 17.012232645403376, "grad_norm": 0.4785527288913727, "learning_rate": 0.002828, "loss": 1.3295, "step": 226688 }, { "epoch": 17.01703564727955, "grad_norm": 0.48141196370124817, "learning_rate": 0.002828, "loss": 1.3372, "step": 226752 }, { "epoch": 17.021838649155722, "grad_norm": 0.5237526893615723, "learning_rate": 0.002828, "loss": 1.3331, "step": 226816 }, { "epoch": 17.026641651031895, "grad_norm": 0.6010721325874329, "learning_rate": 0.002828, "loss": 1.3416, "step": 226880 }, { "epoch": 17.031444652908068, "grad_norm": 0.5771501660346985, "learning_rate": 0.002828, "loss": 1.3296, "step": 226944 }, { "epoch": 17.03624765478424, "grad_norm": 0.48024001717567444, "learning_rate": 0.002828, "loss": 1.333, "step": 227008 }, { "epoch": 17.041050656660413, "grad_norm": 0.6010895371437073, "learning_rate": 0.002828, "loss": 1.3413, "step": 227072 }, { "epoch": 17.045853658536586, "grad_norm": 0.49624505639076233, "learning_rate": 0.002828, "loss": 1.339, "step": 227136 }, { "epoch": 17.05065666041276, "grad_norm": 0.6421620845794678, "learning_rate": 0.002828, "loss": 1.341, "step": 227200 }, { "epoch": 17.055459662288932, "grad_norm": 0.5180822014808655, "learning_rate": 0.002828, "loss": 1.3375, "step": 227264 }, { "epoch": 17.060262664165105, "grad_norm": 0.48310354351997375, "learning_rate": 0.002828, "loss": 1.3388, "step": 227328 }, { "epoch": 17.065065666041274, "grad_norm": 0.5890333652496338, "learning_rate": 0.002828, "loss": 1.3361, "step": 227392 }, { "epoch": 17.069868667917447, "grad_norm": 0.5011729001998901, "learning_rate": 0.002828, "loss": 1.3412, "step": 227456 }, { "epoch": 17.07467166979362, "grad_norm": 0.5238128304481506, "learning_rate": 0.002828, "loss": 1.3366, "step": 227520 }, { "epoch": 17.079474671669793, "grad_norm": 0.5375518798828125, "learning_rate": 0.002828, "loss": 1.3405, "step": 227584 }, { "epoch": 17.084277673545966, "grad_norm": 0.646587610244751, "learning_rate": 0.002828, "loss": 1.3415, "step": 227648 }, { "epoch": 17.08908067542214, "grad_norm": 0.538013756275177, "learning_rate": 0.002828, "loss": 1.3367, "step": 227712 }, { "epoch": 17.09388367729831, "grad_norm": 0.493001788854599, "learning_rate": 0.002828, "loss": 1.3374, "step": 227776 }, { "epoch": 17.098686679174484, "grad_norm": 0.6065325140953064, "learning_rate": 0.002828, "loss": 1.3353, "step": 227840 }, { "epoch": 17.103489681050657, "grad_norm": 0.6094088554382324, "learning_rate": 0.002828, "loss": 1.341, "step": 227904 }, { "epoch": 17.10829268292683, "grad_norm": 0.555350661277771, "learning_rate": 0.002828, "loss": 1.3364, "step": 227968 }, { "epoch": 17.113095684803003, "grad_norm": 0.5394548177719116, "learning_rate": 0.002828, "loss": 1.342, "step": 228032 }, { "epoch": 17.117898686679176, "grad_norm": 0.5966508984565735, "learning_rate": 0.002828, "loss": 1.3394, "step": 228096 }, { "epoch": 17.122701688555345, "grad_norm": 0.49855026602745056, "learning_rate": 0.002828, "loss": 1.3375, "step": 228160 }, { "epoch": 17.12750469043152, "grad_norm": 0.5403494834899902, "learning_rate": 0.002828, "loss": 1.3414, "step": 228224 }, { "epoch": 17.13230769230769, "grad_norm": 0.5960873365402222, "learning_rate": 0.002828, "loss": 1.3391, "step": 228288 }, { "epoch": 17.137110694183864, "grad_norm": 0.5608639717102051, "learning_rate": 0.002828, "loss": 1.3459, "step": 228352 }, { "epoch": 17.141913696060037, "grad_norm": 0.5412024855613708, "learning_rate": 0.002828, "loss": 1.3461, "step": 228416 }, { "epoch": 17.14671669793621, "grad_norm": 0.5174167156219482, "learning_rate": 0.002828, "loss": 1.3387, "step": 228480 }, { "epoch": 17.151519699812383, "grad_norm": 0.4773479104042053, "learning_rate": 0.002828, "loss": 1.346, "step": 228544 }, { "epoch": 17.156322701688556, "grad_norm": 0.5367104411125183, "learning_rate": 0.002828, "loss": 1.3468, "step": 228608 }, { "epoch": 17.16112570356473, "grad_norm": 0.5563543438911438, "learning_rate": 0.002828, "loss": 1.3451, "step": 228672 }, { "epoch": 17.1659287054409, "grad_norm": 0.5474345684051514, "learning_rate": 0.002828, "loss": 1.3351, "step": 228736 }, { "epoch": 17.170731707317074, "grad_norm": 0.621706485748291, "learning_rate": 0.002828, "loss": 1.3382, "step": 228800 }, { "epoch": 17.175534709193247, "grad_norm": 0.6461145281791687, "learning_rate": 0.002828, "loss": 1.3426, "step": 228864 }, { "epoch": 17.18033771106942, "grad_norm": 0.6269495487213135, "learning_rate": 0.002828, "loss": 1.3445, "step": 228928 }, { "epoch": 17.18514071294559, "grad_norm": 0.5621188282966614, "learning_rate": 0.002828, "loss": 1.3442, "step": 228992 }, { "epoch": 17.189943714821762, "grad_norm": 0.544793426990509, "learning_rate": 0.002828, "loss": 1.3443, "step": 229056 }, { "epoch": 17.194746716697935, "grad_norm": 0.5203141570091248, "learning_rate": 0.002828, "loss": 1.3445, "step": 229120 }, { "epoch": 17.199549718574108, "grad_norm": 0.6426690816879272, "learning_rate": 0.002828, "loss": 1.3463, "step": 229184 }, { "epoch": 17.20435272045028, "grad_norm": 0.6133685111999512, "learning_rate": 0.002828, "loss": 1.3511, "step": 229248 }, { "epoch": 17.209155722326454, "grad_norm": 0.5746537446975708, "learning_rate": 0.002828, "loss": 1.3431, "step": 229312 }, { "epoch": 17.213958724202627, "grad_norm": 0.6326211094856262, "learning_rate": 0.002828, "loss": 1.3479, "step": 229376 }, { "epoch": 17.2187617260788, "grad_norm": 0.545437216758728, "learning_rate": 0.002828, "loss": 1.338, "step": 229440 }, { "epoch": 17.223564727954972, "grad_norm": 0.5582890510559082, "learning_rate": 0.002828, "loss": 1.3433, "step": 229504 }, { "epoch": 17.228367729831145, "grad_norm": 0.4580340087413788, "learning_rate": 0.002828, "loss": 1.3476, "step": 229568 }, { "epoch": 17.233170731707318, "grad_norm": 0.6911355257034302, "learning_rate": 0.002828, "loss": 1.3526, "step": 229632 }, { "epoch": 17.23797373358349, "grad_norm": 0.5566486120223999, "learning_rate": 0.002828, "loss": 1.3482, "step": 229696 }, { "epoch": 17.242776735459664, "grad_norm": 0.6304554343223572, "learning_rate": 0.002828, "loss": 1.3476, "step": 229760 }, { "epoch": 17.247579737335833, "grad_norm": 0.6848873496055603, "learning_rate": 0.002828, "loss": 1.3424, "step": 229824 }, { "epoch": 17.252382739212006, "grad_norm": 0.5782124996185303, "learning_rate": 0.002828, "loss": 1.3502, "step": 229888 }, { "epoch": 17.25718574108818, "grad_norm": 0.533871054649353, "learning_rate": 0.002828, "loss": 1.3418, "step": 229952 }, { "epoch": 17.261988742964352, "grad_norm": 0.636326253414154, "learning_rate": 0.002828, "loss": 1.3464, "step": 230016 }, { "epoch": 17.266791744840525, "grad_norm": 0.5879301428794861, "learning_rate": 0.002828, "loss": 1.3457, "step": 230080 }, { "epoch": 17.271594746716698, "grad_norm": 0.6339291334152222, "learning_rate": 0.002828, "loss": 1.3487, "step": 230144 }, { "epoch": 17.27639774859287, "grad_norm": 0.5430083274841309, "learning_rate": 0.002828, "loss": 1.3452, "step": 230208 }, { "epoch": 17.281200750469043, "grad_norm": 0.498355895280838, "learning_rate": 0.002828, "loss": 1.3491, "step": 230272 }, { "epoch": 17.286003752345216, "grad_norm": 0.5435526371002197, "learning_rate": 0.002828, "loss": 1.3493, "step": 230336 }, { "epoch": 17.29080675422139, "grad_norm": 0.5432043671607971, "learning_rate": 0.002828, "loss": 1.3465, "step": 230400 }, { "epoch": 17.295609756097562, "grad_norm": 0.5218422412872314, "learning_rate": 0.002828, "loss": 1.3509, "step": 230464 }, { "epoch": 17.300412757973735, "grad_norm": 0.5061742663383484, "learning_rate": 0.002828, "loss": 1.3441, "step": 230528 }, { "epoch": 17.305215759849908, "grad_norm": 0.5243954062461853, "learning_rate": 0.002828, "loss": 1.3497, "step": 230592 }, { "epoch": 17.310018761726077, "grad_norm": 0.6132803559303284, "learning_rate": 0.002828, "loss": 1.3467, "step": 230656 }, { "epoch": 17.31482176360225, "grad_norm": 0.5467346906661987, "learning_rate": 0.002828, "loss": 1.3525, "step": 230720 }, { "epoch": 17.319624765478423, "grad_norm": 0.5703427791595459, "learning_rate": 0.002828, "loss": 1.3509, "step": 230784 }, { "epoch": 17.324427767354596, "grad_norm": 0.6172657012939453, "learning_rate": 0.002828, "loss": 1.3464, "step": 230848 }, { "epoch": 17.32923076923077, "grad_norm": 0.5590468645095825, "learning_rate": 0.002828, "loss": 1.3447, "step": 230912 }, { "epoch": 17.33403377110694, "grad_norm": 0.6089560985565186, "learning_rate": 0.002828, "loss": 1.3444, "step": 230976 }, { "epoch": 17.338836772983115, "grad_norm": 0.6214098334312439, "learning_rate": 0.002828, "loss": 1.3541, "step": 231040 }, { "epoch": 17.343639774859287, "grad_norm": 0.48207151889801025, "learning_rate": 0.002828, "loss": 1.3516, "step": 231104 }, { "epoch": 17.34844277673546, "grad_norm": 0.5069649815559387, "learning_rate": 0.002828, "loss": 1.3422, "step": 231168 }, { "epoch": 17.353245778611633, "grad_norm": 0.6617603302001953, "learning_rate": 0.002828, "loss": 1.345, "step": 231232 }, { "epoch": 17.358048780487806, "grad_norm": 0.5475003123283386, "learning_rate": 0.002828, "loss": 1.3446, "step": 231296 }, { "epoch": 17.36285178236398, "grad_norm": 0.495044469833374, "learning_rate": 0.002828, "loss": 1.3514, "step": 231360 }, { "epoch": 17.367654784240152, "grad_norm": 0.646589994430542, "learning_rate": 0.002828, "loss": 1.3472, "step": 231424 }, { "epoch": 17.37245778611632, "grad_norm": 0.6345487236976624, "learning_rate": 0.002828, "loss": 1.3473, "step": 231488 }, { "epoch": 17.377260787992494, "grad_norm": 0.6994035243988037, "learning_rate": 0.002828, "loss": 1.348, "step": 231552 }, { "epoch": 17.382063789868667, "grad_norm": 0.7058942317962646, "learning_rate": 0.002828, "loss": 1.348, "step": 231616 }, { "epoch": 17.38686679174484, "grad_norm": 0.5912184715270996, "learning_rate": 0.002828, "loss": 1.3523, "step": 231680 }, { "epoch": 17.391669793621013, "grad_norm": 0.4432578384876251, "learning_rate": 0.002828, "loss": 1.3516, "step": 231744 }, { "epoch": 17.396472795497186, "grad_norm": 0.5171277523040771, "learning_rate": 0.002828, "loss": 1.3544, "step": 231808 }, { "epoch": 17.40127579737336, "grad_norm": 0.6040003299713135, "learning_rate": 0.002828, "loss": 1.3496, "step": 231872 }, { "epoch": 17.40607879924953, "grad_norm": 0.5984319448471069, "learning_rate": 0.002828, "loss": 1.3529, "step": 231936 }, { "epoch": 17.410881801125704, "grad_norm": 0.5507851839065552, "learning_rate": 0.002828, "loss": 1.3544, "step": 232000 }, { "epoch": 17.415684803001877, "grad_norm": 0.5893108248710632, "learning_rate": 0.002828, "loss": 1.347, "step": 232064 }, { "epoch": 17.42048780487805, "grad_norm": 0.6688479781150818, "learning_rate": 0.002828, "loss": 1.352, "step": 232128 }, { "epoch": 17.425290806754223, "grad_norm": 0.5244783759117126, "learning_rate": 0.002828, "loss": 1.3554, "step": 232192 }, { "epoch": 17.430093808630392, "grad_norm": 0.5949782133102417, "learning_rate": 0.002828, "loss": 1.3511, "step": 232256 }, { "epoch": 17.434896810506565, "grad_norm": 0.6664429903030396, "learning_rate": 0.002828, "loss": 1.3498, "step": 232320 }, { "epoch": 17.439699812382738, "grad_norm": 0.48198601603507996, "learning_rate": 0.002828, "loss": 1.3457, "step": 232384 }, { "epoch": 17.44450281425891, "grad_norm": 0.5703681111335754, "learning_rate": 0.002828, "loss": 1.3515, "step": 232448 }, { "epoch": 17.449305816135084, "grad_norm": 0.6069356799125671, "learning_rate": 0.002828, "loss": 1.3586, "step": 232512 }, { "epoch": 17.454108818011257, "grad_norm": 0.5629026889801025, "learning_rate": 0.002828, "loss": 1.3515, "step": 232576 }, { "epoch": 17.45891181988743, "grad_norm": 0.6319453716278076, "learning_rate": 0.002828, "loss": 1.3477, "step": 232640 }, { "epoch": 17.463714821763602, "grad_norm": 0.5908174514770508, "learning_rate": 0.002828, "loss": 1.3521, "step": 232704 }, { "epoch": 17.468517823639775, "grad_norm": 0.5290798544883728, "learning_rate": 0.002828, "loss": 1.3528, "step": 232768 }, { "epoch": 17.47332082551595, "grad_norm": 0.553308367729187, "learning_rate": 0.002828, "loss": 1.3519, "step": 232832 }, { "epoch": 17.47812382739212, "grad_norm": 0.5727306008338928, "learning_rate": 0.002828, "loss": 1.3467, "step": 232896 }, { "epoch": 17.482926829268294, "grad_norm": 0.5325832962989807, "learning_rate": 0.002828, "loss": 1.3567, "step": 232960 }, { "epoch": 17.487729831144467, "grad_norm": 0.5459145307540894, "learning_rate": 0.002828, "loss": 1.3495, "step": 233024 }, { "epoch": 17.492532833020636, "grad_norm": 0.5476085543632507, "learning_rate": 0.002828, "loss": 1.3514, "step": 233088 }, { "epoch": 17.49733583489681, "grad_norm": 0.5742253661155701, "learning_rate": 0.002828, "loss": 1.3503, "step": 233152 }, { "epoch": 17.502138836772982, "grad_norm": 0.6163190007209778, "learning_rate": 0.002828, "loss": 1.3525, "step": 233216 }, { "epoch": 17.506941838649155, "grad_norm": 0.5045285224914551, "learning_rate": 0.002828, "loss": 1.3545, "step": 233280 }, { "epoch": 17.511744840525328, "grad_norm": 0.5438716411590576, "learning_rate": 0.002828, "loss": 1.3506, "step": 233344 }, { "epoch": 17.5165478424015, "grad_norm": 0.6147487163543701, "learning_rate": 0.002828, "loss": 1.3532, "step": 233408 }, { "epoch": 17.521350844277674, "grad_norm": 0.5019384026527405, "learning_rate": 0.002828, "loss": 1.3551, "step": 233472 }, { "epoch": 17.526153846153846, "grad_norm": 0.5296437740325928, "learning_rate": 0.002828, "loss": 1.3573, "step": 233536 }, { "epoch": 17.53095684803002, "grad_norm": 0.689786970615387, "learning_rate": 0.002828, "loss": 1.3553, "step": 233600 }, { "epoch": 17.535759849906192, "grad_norm": 0.5649539232254028, "learning_rate": 0.002828, "loss": 1.3569, "step": 233664 }, { "epoch": 17.540562851782365, "grad_norm": 0.563384473323822, "learning_rate": 0.002828, "loss": 1.3527, "step": 233728 }, { "epoch": 17.545365853658538, "grad_norm": 0.6230480670928955, "learning_rate": 0.002828, "loss": 1.348, "step": 233792 }, { "epoch": 17.55016885553471, "grad_norm": 0.5748286247253418, "learning_rate": 0.002828, "loss": 1.3464, "step": 233856 }, { "epoch": 17.55497185741088, "grad_norm": 0.6217999458312988, "learning_rate": 0.002828, "loss": 1.3488, "step": 233920 }, { "epoch": 17.559774859287053, "grad_norm": 0.5565934777259827, "learning_rate": 0.002828, "loss": 1.3505, "step": 233984 }, { "epoch": 17.564577861163226, "grad_norm": 0.5826540589332581, "learning_rate": 0.002828, "loss": 1.3547, "step": 234048 }, { "epoch": 17.5693808630394, "grad_norm": 0.6381579637527466, "learning_rate": 0.002828, "loss": 1.3519, "step": 234112 }, { "epoch": 17.57418386491557, "grad_norm": 0.5593814849853516, "learning_rate": 0.002828, "loss": 1.3553, "step": 234176 }, { "epoch": 17.578986866791745, "grad_norm": 0.5353369116783142, "learning_rate": 0.002828, "loss": 1.3504, "step": 234240 }, { "epoch": 17.583789868667917, "grad_norm": 0.5172046422958374, "learning_rate": 0.002828, "loss": 1.354, "step": 234304 }, { "epoch": 17.58859287054409, "grad_norm": 0.5404736995697021, "learning_rate": 0.002828, "loss": 1.3529, "step": 234368 }, { "epoch": 17.593395872420263, "grad_norm": 0.5068063139915466, "learning_rate": 0.002828, "loss": 1.3487, "step": 234432 }, { "epoch": 17.598198874296436, "grad_norm": 0.600818932056427, "learning_rate": 0.002828, "loss": 1.3605, "step": 234496 }, { "epoch": 17.60300187617261, "grad_norm": 0.5050070285797119, "learning_rate": 0.002828, "loss": 1.3499, "step": 234560 }, { "epoch": 17.607804878048782, "grad_norm": 0.5006924271583557, "learning_rate": 0.002828, "loss": 1.3574, "step": 234624 }, { "epoch": 17.612607879924955, "grad_norm": 0.5756498575210571, "learning_rate": 0.002828, "loss": 1.356, "step": 234688 }, { "epoch": 17.617410881801124, "grad_norm": 0.5910568237304688, "learning_rate": 0.002828, "loss": 1.3516, "step": 234752 }, { "epoch": 17.622213883677297, "grad_norm": 0.48506519198417664, "learning_rate": 0.002828, "loss": 1.3545, "step": 234816 }, { "epoch": 17.62701688555347, "grad_norm": 1.732113242149353, "learning_rate": 0.002828, "loss": 1.3485, "step": 234880 }, { "epoch": 17.631819887429643, "grad_norm": 0.4970155954360962, "learning_rate": 0.002828, "loss": 1.3588, "step": 234944 }, { "epoch": 17.636622889305816, "grad_norm": 0.5651705265045166, "learning_rate": 0.002828, "loss": 1.3495, "step": 235008 }, { "epoch": 17.64142589118199, "grad_norm": 0.5904951095581055, "learning_rate": 0.002828, "loss": 1.3566, "step": 235072 }, { "epoch": 17.64622889305816, "grad_norm": 0.5296029448509216, "learning_rate": 0.002828, "loss": 1.3471, "step": 235136 }, { "epoch": 17.651031894934334, "grad_norm": 0.5207241177558899, "learning_rate": 0.002828, "loss": 1.3547, "step": 235200 }, { "epoch": 17.655834896810507, "grad_norm": 0.6143551468849182, "learning_rate": 0.002828, "loss": 1.3537, "step": 235264 }, { "epoch": 17.66063789868668, "grad_norm": 0.6017210483551025, "learning_rate": 0.002828, "loss": 1.3512, "step": 235328 }, { "epoch": 17.665440900562853, "grad_norm": 0.4639129340648651, "learning_rate": 0.002828, "loss": 1.3461, "step": 235392 }, { "epoch": 17.670243902439026, "grad_norm": 0.5471293330192566, "learning_rate": 0.002828, "loss": 1.349, "step": 235456 }, { "epoch": 17.675046904315195, "grad_norm": 0.5401851534843445, "learning_rate": 0.002828, "loss": 1.3588, "step": 235520 }, { "epoch": 17.679849906191368, "grad_norm": 0.4593111574649811, "learning_rate": 0.002828, "loss": 1.3465, "step": 235584 }, { "epoch": 17.68465290806754, "grad_norm": 0.5718096494674683, "learning_rate": 0.002828, "loss": 1.3565, "step": 235648 }, { "epoch": 17.689455909943714, "grad_norm": 0.5341073274612427, "learning_rate": 0.002828, "loss": 1.3534, "step": 235712 }, { "epoch": 17.694258911819887, "grad_norm": 0.5088701248168945, "learning_rate": 0.002828, "loss": 1.3553, "step": 235776 }, { "epoch": 17.69906191369606, "grad_norm": 0.5865604877471924, "learning_rate": 0.002828, "loss": 1.3529, "step": 235840 }, { "epoch": 17.703864915572233, "grad_norm": 0.5808840990066528, "learning_rate": 0.002828, "loss": 1.354, "step": 235904 }, { "epoch": 17.708667917448405, "grad_norm": 0.5625920295715332, "learning_rate": 0.002828, "loss": 1.3522, "step": 235968 }, { "epoch": 17.71347091932458, "grad_norm": 0.6199504733085632, "learning_rate": 0.002828, "loss": 1.3521, "step": 236032 }, { "epoch": 17.71827392120075, "grad_norm": 0.4788922071456909, "learning_rate": 0.002828, "loss": 1.3543, "step": 236096 }, { "epoch": 17.723076923076924, "grad_norm": 0.6806089282035828, "learning_rate": 0.002828, "loss": 1.3548, "step": 236160 }, { "epoch": 17.727879924953097, "grad_norm": 0.5504467487335205, "learning_rate": 0.002828, "loss": 1.3534, "step": 236224 }, { "epoch": 17.73268292682927, "grad_norm": 0.5322136878967285, "learning_rate": 0.002828, "loss": 1.3538, "step": 236288 }, { "epoch": 17.73748592870544, "grad_norm": 0.5951442122459412, "learning_rate": 0.002828, "loss": 1.3485, "step": 236352 }, { "epoch": 17.742288930581612, "grad_norm": 0.5594306588172913, "learning_rate": 0.002828, "loss": 1.3579, "step": 236416 }, { "epoch": 17.747091932457785, "grad_norm": 0.6795274615287781, "learning_rate": 0.002828, "loss": 1.3487, "step": 236480 }, { "epoch": 17.751894934333958, "grad_norm": 0.4794277846813202, "learning_rate": 0.002828, "loss": 1.3562, "step": 236544 }, { "epoch": 17.75669793621013, "grad_norm": 0.5802697539329529, "learning_rate": 0.002828, "loss": 1.3489, "step": 236608 }, { "epoch": 17.761500938086304, "grad_norm": 0.4832291305065155, "learning_rate": 0.002828, "loss": 1.3501, "step": 236672 }, { "epoch": 17.766303939962476, "grad_norm": 0.5023680925369263, "learning_rate": 0.002828, "loss": 1.3534, "step": 236736 }, { "epoch": 17.77110694183865, "grad_norm": 0.6139066815376282, "learning_rate": 0.002828, "loss": 1.3501, "step": 236800 }, { "epoch": 17.775909943714822, "grad_norm": 0.5624128580093384, "learning_rate": 0.002828, "loss": 1.3564, "step": 236864 }, { "epoch": 17.780712945590995, "grad_norm": 0.584254801273346, "learning_rate": 0.002828, "loss": 1.3519, "step": 236928 }, { "epoch": 17.785515947467168, "grad_norm": 0.549300491809845, "learning_rate": 0.002828, "loss": 1.3556, "step": 236992 }, { "epoch": 17.79031894934334, "grad_norm": 0.590458333492279, "learning_rate": 0.002828, "loss": 1.3543, "step": 237056 }, { "epoch": 17.795121951219514, "grad_norm": 0.5119503140449524, "learning_rate": 0.002828, "loss": 1.3551, "step": 237120 }, { "epoch": 17.799924953095683, "grad_norm": 0.5858562588691711, "learning_rate": 0.002828, "loss": 1.3494, "step": 237184 }, { "epoch": 17.804727954971856, "grad_norm": 0.4904852509498596, "learning_rate": 0.002828, "loss": 1.3517, "step": 237248 }, { "epoch": 17.80953095684803, "grad_norm": 0.5314335823059082, "learning_rate": 0.002828, "loss": 1.3565, "step": 237312 }, { "epoch": 17.814333958724202, "grad_norm": 0.5037870407104492, "learning_rate": 0.002828, "loss": 1.3538, "step": 237376 }, { "epoch": 17.819136960600375, "grad_norm": 0.6075197458267212, "learning_rate": 0.002828, "loss": 1.3534, "step": 237440 }, { "epoch": 17.823939962476548, "grad_norm": 0.5014629364013672, "learning_rate": 0.002828, "loss": 1.3571, "step": 237504 }, { "epoch": 17.82874296435272, "grad_norm": 0.655502438545227, "learning_rate": 0.002828, "loss": 1.3561, "step": 237568 }, { "epoch": 17.833545966228893, "grad_norm": 0.5249149203300476, "learning_rate": 0.002828, "loss": 1.3469, "step": 237632 }, { "epoch": 17.838348968105066, "grad_norm": 0.5390370488166809, "learning_rate": 0.002828, "loss": 1.3502, "step": 237696 }, { "epoch": 17.84315196998124, "grad_norm": 0.5452220439910889, "learning_rate": 0.002828, "loss": 1.3536, "step": 237760 }, { "epoch": 17.847954971857412, "grad_norm": 0.545632541179657, "learning_rate": 0.002828, "loss": 1.3593, "step": 237824 }, { "epoch": 17.852757973733585, "grad_norm": 0.6663748025894165, "learning_rate": 0.002828, "loss": 1.3566, "step": 237888 }, { "epoch": 17.857560975609758, "grad_norm": 0.5412925481796265, "learning_rate": 0.002828, "loss": 1.3586, "step": 237952 }, { "epoch": 17.862363977485927, "grad_norm": 0.5036992430686951, "learning_rate": 0.002828, "loss": 1.358, "step": 238016 }, { "epoch": 17.8671669793621, "grad_norm": 0.6046959161758423, "learning_rate": 0.002828, "loss": 1.3565, "step": 238080 }, { "epoch": 17.871969981238273, "grad_norm": 0.520389199256897, "learning_rate": 0.002828, "loss": 1.3528, "step": 238144 }, { "epoch": 17.876772983114446, "grad_norm": 0.561380922794342, "learning_rate": 0.002828, "loss": 1.353, "step": 238208 }, { "epoch": 17.88157598499062, "grad_norm": 0.5111840963363647, "learning_rate": 0.002828, "loss": 1.356, "step": 238272 }, { "epoch": 17.88637898686679, "grad_norm": 0.5611425638198853, "learning_rate": 0.002828, "loss": 1.3502, "step": 238336 }, { "epoch": 17.891181988742964, "grad_norm": 0.5181549191474915, "learning_rate": 0.002828, "loss": 1.3537, "step": 238400 }, { "epoch": 17.895984990619137, "grad_norm": 0.5285221338272095, "learning_rate": 0.002828, "loss": 1.3543, "step": 238464 }, { "epoch": 17.90078799249531, "grad_norm": 0.5775920152664185, "learning_rate": 0.002828, "loss": 1.3565, "step": 238528 }, { "epoch": 17.905590994371483, "grad_norm": 0.5790485143661499, "learning_rate": 0.002828, "loss": 1.3532, "step": 238592 }, { "epoch": 17.910393996247656, "grad_norm": 0.5824828743934631, "learning_rate": 0.002828, "loss": 1.3488, "step": 238656 }, { "epoch": 17.91519699812383, "grad_norm": 0.5291104912757874, "learning_rate": 0.002828, "loss": 1.3598, "step": 238720 }, { "epoch": 17.92, "grad_norm": 0.5468594431877136, "learning_rate": 0.002828, "loss": 1.3536, "step": 238784 }, { "epoch": 17.92480300187617, "grad_norm": 0.5960270762443542, "learning_rate": 0.002828, "loss": 1.3548, "step": 238848 }, { "epoch": 17.929606003752344, "grad_norm": 0.514122724533081, "learning_rate": 0.002828, "loss": 1.3614, "step": 238912 }, { "epoch": 17.934409005628517, "grad_norm": 0.5227863192558289, "learning_rate": 0.002828, "loss": 1.3611, "step": 238976 }, { "epoch": 17.93921200750469, "grad_norm": 0.6620153784751892, "learning_rate": 0.002828, "loss": 1.3597, "step": 239040 }, { "epoch": 17.944015009380863, "grad_norm": 0.5719172954559326, "learning_rate": 0.002828, "loss": 1.3542, "step": 239104 }, { "epoch": 17.948818011257035, "grad_norm": 0.5674269795417786, "learning_rate": 0.002828, "loss": 1.3608, "step": 239168 }, { "epoch": 17.95362101313321, "grad_norm": 0.5276614427566528, "learning_rate": 0.002828, "loss": 1.3565, "step": 239232 }, { "epoch": 17.95842401500938, "grad_norm": 0.5094176530838013, "learning_rate": 0.002828, "loss": 1.3565, "step": 239296 }, { "epoch": 17.963227016885554, "grad_norm": 0.6211153864860535, "learning_rate": 0.002828, "loss": 1.3575, "step": 239360 }, { "epoch": 17.968030018761727, "grad_norm": 0.47066572308540344, "learning_rate": 0.002828, "loss": 1.3576, "step": 239424 }, { "epoch": 17.9728330206379, "grad_norm": 0.5774534940719604, "learning_rate": 0.002828, "loss": 1.3522, "step": 239488 }, { "epoch": 17.977636022514073, "grad_norm": 0.7815809845924377, "learning_rate": 0.002828, "loss": 1.3599, "step": 239552 }, { "epoch": 17.982439024390246, "grad_norm": 0.5297728776931763, "learning_rate": 0.002828, "loss": 1.3558, "step": 239616 }, { "epoch": 17.987242026266415, "grad_norm": 0.7529789209365845, "learning_rate": 0.002828, "loss": 1.3534, "step": 239680 }, { "epoch": 17.992045028142588, "grad_norm": 0.5998978018760681, "learning_rate": 0.002828, "loss": 1.3526, "step": 239744 }, { "epoch": 17.99684803001876, "grad_norm": 0.5187614560127258, "learning_rate": 0.002828, "loss": 1.3577, "step": 239808 }, { "epoch": 18.001651031894934, "grad_norm": 0.6527639031410217, "learning_rate": 0.002828, "loss": 1.3353, "step": 239872 }, { "epoch": 18.006454033771107, "grad_norm": 0.5733532309532166, "learning_rate": 0.002828, "loss": 1.3174, "step": 239936 }, { "epoch": 18.01125703564728, "grad_norm": 0.5666289925575256, "learning_rate": 0.002828, "loss": 1.3088, "step": 240000 }, { "epoch": 18.016060037523452, "grad_norm": 0.6003232598304749, "learning_rate": 0.002828, "loss": 1.317, "step": 240064 }, { "epoch": 18.020863039399625, "grad_norm": 0.5648309588432312, "learning_rate": 0.002828, "loss": 1.3115, "step": 240128 }, { "epoch": 18.025666041275798, "grad_norm": 0.5369406342506409, "learning_rate": 0.002828, "loss": 1.3172, "step": 240192 }, { "epoch": 18.03046904315197, "grad_norm": 0.5485430955886841, "learning_rate": 0.002828, "loss": 1.3124, "step": 240256 }, { "epoch": 18.035272045028144, "grad_norm": 0.5280949473381042, "learning_rate": 0.002828, "loss": 1.3177, "step": 240320 }, { "epoch": 18.040075046904317, "grad_norm": 0.5924586057662964, "learning_rate": 0.002828, "loss": 1.3154, "step": 240384 }, { "epoch": 18.044878048780486, "grad_norm": 0.4871281683444977, "learning_rate": 0.002828, "loss": 1.311, "step": 240448 }, { "epoch": 18.04968105065666, "grad_norm": 0.5817774534225464, "learning_rate": 0.002828, "loss": 1.3153, "step": 240512 }, { "epoch": 18.054484052532832, "grad_norm": 0.5690784454345703, "learning_rate": 0.002828, "loss": 1.3202, "step": 240576 }, { "epoch": 18.059287054409005, "grad_norm": 0.601919412612915, "learning_rate": 0.002828, "loss": 1.3211, "step": 240640 }, { "epoch": 18.064090056285178, "grad_norm": 0.5639865398406982, "learning_rate": 0.002828, "loss": 1.322, "step": 240704 }, { "epoch": 18.06889305816135, "grad_norm": 0.5849490165710449, "learning_rate": 0.002828, "loss": 1.3189, "step": 240768 }, { "epoch": 18.073696060037523, "grad_norm": 0.5788787007331848, "learning_rate": 0.002828, "loss": 1.3182, "step": 240832 }, { "epoch": 18.078499061913696, "grad_norm": 0.5909736156463623, "learning_rate": 0.002828, "loss": 1.3202, "step": 240896 }, { "epoch": 18.08330206378987, "grad_norm": 0.6812686324119568, "learning_rate": 0.002828, "loss": 1.3215, "step": 240960 }, { "epoch": 18.088105065666042, "grad_norm": 0.6109818816184998, "learning_rate": 0.002828, "loss": 1.3196, "step": 241024 }, { "epoch": 18.092908067542215, "grad_norm": 0.5453113317489624, "learning_rate": 0.002828, "loss": 1.322, "step": 241088 }, { "epoch": 18.097711069418388, "grad_norm": 0.5813498497009277, "learning_rate": 0.002828, "loss": 1.3173, "step": 241152 }, { "epoch": 18.10251407129456, "grad_norm": 0.5807278156280518, "learning_rate": 0.002828, "loss": 1.3244, "step": 241216 }, { "epoch": 18.10731707317073, "grad_norm": 0.5079891085624695, "learning_rate": 0.002828, "loss": 1.3103, "step": 241280 }, { "epoch": 18.112120075046903, "grad_norm": 0.6419689059257507, "learning_rate": 0.002828, "loss": 1.3181, "step": 241344 }, { "epoch": 18.116923076923076, "grad_norm": 0.5580244660377502, "learning_rate": 0.002828, "loss": 1.3225, "step": 241408 }, { "epoch": 18.12172607879925, "grad_norm": 0.574207067489624, "learning_rate": 0.002828, "loss": 1.3197, "step": 241472 }, { "epoch": 18.12652908067542, "grad_norm": 0.580829918384552, "learning_rate": 0.002828, "loss": 1.3214, "step": 241536 }, { "epoch": 18.131332082551594, "grad_norm": 0.5580022931098938, "learning_rate": 0.002828, "loss": 1.3252, "step": 241600 }, { "epoch": 18.136135084427767, "grad_norm": 0.5125178098678589, "learning_rate": 0.002828, "loss": 1.329, "step": 241664 }, { "epoch": 18.14093808630394, "grad_norm": 0.5466482639312744, "learning_rate": 0.002828, "loss": 1.3277, "step": 241728 }, { "epoch": 18.145741088180113, "grad_norm": 0.4944307804107666, "learning_rate": 0.002828, "loss": 1.3267, "step": 241792 }, { "epoch": 18.150544090056286, "grad_norm": 0.5637335777282715, "learning_rate": 0.002828, "loss": 1.3253, "step": 241856 }, { "epoch": 18.15534709193246, "grad_norm": 0.787543535232544, "learning_rate": 0.002828, "loss": 1.322, "step": 241920 }, { "epoch": 18.16015009380863, "grad_norm": 0.534782886505127, "learning_rate": 0.002828, "loss": 1.3198, "step": 241984 }, { "epoch": 18.164953095684805, "grad_norm": 0.5230289101600647, "learning_rate": 0.002828, "loss": 1.324, "step": 242048 }, { "epoch": 18.169756097560974, "grad_norm": 0.5719302296638489, "learning_rate": 0.002828, "loss": 1.3228, "step": 242112 }, { "epoch": 18.174559099437147, "grad_norm": 0.4946596920490265, "learning_rate": 0.002828, "loss": 1.3259, "step": 242176 }, { "epoch": 18.17936210131332, "grad_norm": 0.5574563145637512, "learning_rate": 0.002828, "loss": 1.322, "step": 242240 }, { "epoch": 18.184165103189493, "grad_norm": 0.6062040328979492, "learning_rate": 0.002828, "loss": 1.3249, "step": 242304 }, { "epoch": 18.188968105065666, "grad_norm": 0.5341858267784119, "learning_rate": 0.002828, "loss": 1.3193, "step": 242368 }, { "epoch": 18.19377110694184, "grad_norm": 0.7191112041473389, "learning_rate": 0.002828, "loss": 1.3215, "step": 242432 }, { "epoch": 18.19857410881801, "grad_norm": 0.5244624018669128, "learning_rate": 0.002828, "loss": 1.3217, "step": 242496 }, { "epoch": 18.203377110694184, "grad_norm": 0.5120688676834106, "learning_rate": 0.002828, "loss": 1.3297, "step": 242560 }, { "epoch": 18.208180112570357, "grad_norm": 0.49170535802841187, "learning_rate": 0.002828, "loss": 1.3244, "step": 242624 }, { "epoch": 18.21298311444653, "grad_norm": 0.554652750492096, "learning_rate": 0.002828, "loss": 1.3202, "step": 242688 }, { "epoch": 18.217786116322703, "grad_norm": 0.573849618434906, "learning_rate": 0.002828, "loss": 1.3267, "step": 242752 }, { "epoch": 18.222589118198876, "grad_norm": 0.49524179100990295, "learning_rate": 0.002828, "loss": 1.3331, "step": 242816 }, { "epoch": 18.22739212007505, "grad_norm": 0.485775351524353, "learning_rate": 0.002828, "loss": 1.3226, "step": 242880 }, { "epoch": 18.232195121951218, "grad_norm": 0.6272198557853699, "learning_rate": 0.002828, "loss": 1.3285, "step": 242944 }, { "epoch": 18.23699812382739, "grad_norm": 0.554471492767334, "learning_rate": 0.002828, "loss": 1.3237, "step": 243008 }, { "epoch": 18.241801125703564, "grad_norm": 0.5620614290237427, "learning_rate": 0.002828, "loss": 1.3283, "step": 243072 }, { "epoch": 18.246604127579737, "grad_norm": 0.5371513366699219, "learning_rate": 0.002828, "loss": 1.3298, "step": 243136 }, { "epoch": 18.25140712945591, "grad_norm": 0.6535647511482239, "learning_rate": 0.002828, "loss": 1.3258, "step": 243200 }, { "epoch": 18.256210131332082, "grad_norm": 0.5744162201881409, "learning_rate": 0.002828, "loss": 1.3286, "step": 243264 }, { "epoch": 18.261013133208255, "grad_norm": 0.5868451595306396, "learning_rate": 0.002828, "loss": 1.3273, "step": 243328 }, { "epoch": 18.265816135084428, "grad_norm": 0.46351656317710876, "learning_rate": 0.002828, "loss": 1.3271, "step": 243392 }, { "epoch": 18.2706191369606, "grad_norm": 0.5453354716300964, "learning_rate": 0.002828, "loss": 1.3263, "step": 243456 }, { "epoch": 18.275422138836774, "grad_norm": 0.5502650737762451, "learning_rate": 0.002828, "loss": 1.328, "step": 243520 }, { "epoch": 18.280225140712947, "grad_norm": 0.6256674528121948, "learning_rate": 0.002828, "loss": 1.3327, "step": 243584 }, { "epoch": 18.28502814258912, "grad_norm": 0.6549529433250427, "learning_rate": 0.002828, "loss": 1.3266, "step": 243648 }, { "epoch": 18.28983114446529, "grad_norm": 0.6387646198272705, "learning_rate": 0.002828, "loss": 1.3337, "step": 243712 }, { "epoch": 18.294634146341462, "grad_norm": 0.529467761516571, "learning_rate": 0.002828, "loss": 1.3333, "step": 243776 }, { "epoch": 18.299437148217635, "grad_norm": 0.6114325523376465, "learning_rate": 0.002828, "loss": 1.3263, "step": 243840 }, { "epoch": 18.304240150093808, "grad_norm": 0.5541909337043762, "learning_rate": 0.002828, "loss": 1.3254, "step": 243904 }, { "epoch": 18.30904315196998, "grad_norm": 0.5705113410949707, "learning_rate": 0.002828, "loss": 1.3202, "step": 243968 }, { "epoch": 18.313846153846153, "grad_norm": 0.4778616428375244, "learning_rate": 0.002828, "loss": 1.3278, "step": 244032 }, { "epoch": 18.318649155722326, "grad_norm": 0.489709734916687, "learning_rate": 0.002828, "loss": 1.3245, "step": 244096 }, { "epoch": 18.3234521575985, "grad_norm": 0.5244088172912598, "learning_rate": 0.002828, "loss": 1.3312, "step": 244160 }, { "epoch": 18.328255159474672, "grad_norm": 0.6958155632019043, "learning_rate": 0.002828, "loss": 1.3255, "step": 244224 }, { "epoch": 18.333058161350845, "grad_norm": 0.646683394908905, "learning_rate": 0.002828, "loss": 1.3244, "step": 244288 }, { "epoch": 18.337861163227018, "grad_norm": 0.6917694807052612, "learning_rate": 0.002828, "loss": 1.3287, "step": 244352 }, { "epoch": 18.34266416510319, "grad_norm": 0.6619091629981995, "learning_rate": 0.002828, "loss": 1.325, "step": 244416 }, { "epoch": 18.347467166979364, "grad_norm": 0.6327647566795349, "learning_rate": 0.002828, "loss": 1.3373, "step": 244480 }, { "epoch": 18.352270168855533, "grad_norm": 0.537758469581604, "learning_rate": 0.002828, "loss": 1.33, "step": 244544 }, { "epoch": 18.357073170731706, "grad_norm": 0.5703405737876892, "learning_rate": 0.002828, "loss": 1.3276, "step": 244608 }, { "epoch": 18.36187617260788, "grad_norm": 0.5090919733047485, "learning_rate": 0.002828, "loss": 1.3284, "step": 244672 }, { "epoch": 18.36667917448405, "grad_norm": 0.7423458695411682, "learning_rate": 0.002828, "loss": 1.3236, "step": 244736 }, { "epoch": 18.371482176360225, "grad_norm": 0.5132669806480408, "learning_rate": 0.002828, "loss": 1.3334, "step": 244800 }, { "epoch": 18.376285178236397, "grad_norm": 0.5711482763290405, "learning_rate": 0.002828, "loss": 1.3332, "step": 244864 }, { "epoch": 18.38108818011257, "grad_norm": 0.5306158065795898, "learning_rate": 0.002828, "loss": 1.3336, "step": 244928 }, { "epoch": 18.385891181988743, "grad_norm": 0.5341092348098755, "learning_rate": 0.002828, "loss": 1.3287, "step": 244992 }, { "epoch": 18.390694183864916, "grad_norm": 0.562984049320221, "learning_rate": 0.002828, "loss": 1.3245, "step": 245056 }, { "epoch": 18.39549718574109, "grad_norm": 0.5212814807891846, "learning_rate": 0.002828, "loss": 1.3239, "step": 245120 }, { "epoch": 18.400300187617262, "grad_norm": 0.6818906664848328, "learning_rate": 0.002828, "loss": 1.3314, "step": 245184 }, { "epoch": 18.405103189493435, "grad_norm": 0.4990367889404297, "learning_rate": 0.002828, "loss": 1.329, "step": 245248 }, { "epoch": 18.409906191369608, "grad_norm": 0.5012723803520203, "learning_rate": 0.002828, "loss": 1.3373, "step": 245312 }, { "epoch": 18.414709193245777, "grad_norm": 0.6447637677192688, "learning_rate": 0.002828, "loss": 1.3288, "step": 245376 }, { "epoch": 18.41951219512195, "grad_norm": 0.6037337779998779, "learning_rate": 0.002828, "loss": 1.3304, "step": 245440 }, { "epoch": 18.424315196998123, "grad_norm": 0.6380529999732971, "learning_rate": 0.002828, "loss": 1.3302, "step": 245504 }, { "epoch": 18.429118198874296, "grad_norm": 0.6336204409599304, "learning_rate": 0.002828, "loss": 1.3312, "step": 245568 }, { "epoch": 18.43392120075047, "grad_norm": 0.5741140842437744, "learning_rate": 0.002828, "loss": 1.3269, "step": 245632 }, { "epoch": 18.43872420262664, "grad_norm": 0.4480578899383545, "learning_rate": 0.002828, "loss": 1.3392, "step": 245696 }, { "epoch": 18.443527204502814, "grad_norm": 0.5134573578834534, "learning_rate": 0.002828, "loss": 1.3299, "step": 245760 }, { "epoch": 18.448330206378987, "grad_norm": 0.5798774361610413, "learning_rate": 0.002828, "loss": 1.3284, "step": 245824 }, { "epoch": 18.45313320825516, "grad_norm": 0.49131253361701965, "learning_rate": 0.002828, "loss": 1.3329, "step": 245888 }, { "epoch": 18.457936210131333, "grad_norm": 0.64532870054245, "learning_rate": 0.002828, "loss": 1.333, "step": 245952 }, { "epoch": 18.462739212007506, "grad_norm": 0.5741368532180786, "learning_rate": 0.002828, "loss": 1.3314, "step": 246016 }, { "epoch": 18.46754221388368, "grad_norm": 0.5318816304206848, "learning_rate": 0.002828, "loss": 1.3324, "step": 246080 }, { "epoch": 18.47234521575985, "grad_norm": 0.49761515855789185, "learning_rate": 0.002828, "loss": 1.33, "step": 246144 }, { "epoch": 18.47714821763602, "grad_norm": 0.5801960825920105, "learning_rate": 0.002828, "loss": 1.3322, "step": 246208 }, { "epoch": 18.481951219512194, "grad_norm": 0.6103347539901733, "learning_rate": 0.002828, "loss": 1.3373, "step": 246272 }, { "epoch": 18.486754221388367, "grad_norm": 0.6255427598953247, "learning_rate": 0.002828, "loss": 1.3294, "step": 246336 }, { "epoch": 18.49155722326454, "grad_norm": 0.6342930197715759, "learning_rate": 0.002828, "loss": 1.3299, "step": 246400 }, { "epoch": 18.496360225140712, "grad_norm": 0.5457690358161926, "learning_rate": 0.002828, "loss": 1.3235, "step": 246464 }, { "epoch": 18.501163227016885, "grad_norm": 0.4921017289161682, "learning_rate": 0.002828, "loss": 1.3298, "step": 246528 }, { "epoch": 18.505966228893058, "grad_norm": 0.520806074142456, "learning_rate": 0.002828, "loss": 1.3272, "step": 246592 }, { "epoch": 18.51076923076923, "grad_norm": 0.49572330713272095, "learning_rate": 0.002828, "loss": 1.3307, "step": 246656 }, { "epoch": 18.515572232645404, "grad_norm": 0.6336793303489685, "learning_rate": 0.002828, "loss": 1.3293, "step": 246720 }, { "epoch": 18.520375234521577, "grad_norm": 0.5762158036231995, "learning_rate": 0.002828, "loss": 1.329, "step": 246784 }, { "epoch": 18.52517823639775, "grad_norm": 0.5626549124717712, "learning_rate": 0.002828, "loss": 1.3354, "step": 246848 }, { "epoch": 18.529981238273923, "grad_norm": 0.5641196966171265, "learning_rate": 0.002828, "loss": 1.3277, "step": 246912 }, { "epoch": 18.534784240150096, "grad_norm": 0.7018244862556458, "learning_rate": 0.002828, "loss": 1.3281, "step": 246976 }, { "epoch": 18.539587242026265, "grad_norm": 0.582954466342926, "learning_rate": 0.002828, "loss": 1.333, "step": 247040 }, { "epoch": 18.544390243902438, "grad_norm": 0.5603383183479309, "learning_rate": 0.002828, "loss": 1.3335, "step": 247104 }, { "epoch": 18.54919324577861, "grad_norm": 0.6978287696838379, "learning_rate": 0.002828, "loss": 1.3328, "step": 247168 }, { "epoch": 18.553996247654784, "grad_norm": 0.6165763139724731, "learning_rate": 0.002828, "loss": 1.3345, "step": 247232 }, { "epoch": 18.558799249530956, "grad_norm": 0.5360153913497925, "learning_rate": 0.002828, "loss": 1.3302, "step": 247296 }, { "epoch": 18.56360225140713, "grad_norm": 0.6076267957687378, "learning_rate": 0.002828, "loss": 1.3295, "step": 247360 }, { "epoch": 18.568405253283302, "grad_norm": 0.6428861021995544, "learning_rate": 0.002828, "loss": 1.3255, "step": 247424 }, { "epoch": 18.573208255159475, "grad_norm": 0.6469327211380005, "learning_rate": 0.002828, "loss": 1.3348, "step": 247488 }, { "epoch": 18.578011257035648, "grad_norm": 0.49372467398643494, "learning_rate": 0.002828, "loss": 1.3343, "step": 247552 }, { "epoch": 18.58281425891182, "grad_norm": 0.5334155559539795, "learning_rate": 0.002828, "loss": 1.3337, "step": 247616 }, { "epoch": 18.587617260787994, "grad_norm": 0.589820384979248, "learning_rate": 0.002828, "loss": 1.3345, "step": 247680 }, { "epoch": 18.592420262664167, "grad_norm": 0.5126547813415527, "learning_rate": 0.002828, "loss": 1.3337, "step": 247744 }, { "epoch": 18.59722326454034, "grad_norm": 0.5486066937446594, "learning_rate": 0.002828, "loss": 1.3365, "step": 247808 }, { "epoch": 18.60202626641651, "grad_norm": 0.48571765422821045, "learning_rate": 0.002828, "loss": 1.3337, "step": 247872 }, { "epoch": 18.60682926829268, "grad_norm": 0.5378714799880981, "learning_rate": 0.002828, "loss": 1.3348, "step": 247936 }, { "epoch": 18.611632270168855, "grad_norm": 0.5203801393508911, "learning_rate": 0.002828, "loss": 1.3332, "step": 248000 }, { "epoch": 18.616435272045027, "grad_norm": 0.5799272060394287, "learning_rate": 0.002828, "loss": 1.3319, "step": 248064 }, { "epoch": 18.6212382739212, "grad_norm": 0.5430501103401184, "learning_rate": 0.002828, "loss": 1.3387, "step": 248128 }, { "epoch": 18.626041275797373, "grad_norm": 0.5245668888092041, "learning_rate": 0.002828, "loss": 1.3368, "step": 248192 }, { "epoch": 18.630844277673546, "grad_norm": 0.5500134229660034, "learning_rate": 0.002828, "loss": 1.3378, "step": 248256 }, { "epoch": 18.63564727954972, "grad_norm": 0.6872992515563965, "learning_rate": 0.002828, "loss": 1.3332, "step": 248320 }, { "epoch": 18.640450281425892, "grad_norm": 0.510874330997467, "learning_rate": 0.002828, "loss": 1.3332, "step": 248384 }, { "epoch": 18.645253283302065, "grad_norm": 0.5345406532287598, "learning_rate": 0.002828, "loss": 1.3368, "step": 248448 }, { "epoch": 18.650056285178238, "grad_norm": 0.5812759399414062, "learning_rate": 0.002828, "loss": 1.3316, "step": 248512 }, { "epoch": 18.65485928705441, "grad_norm": 0.5518085360527039, "learning_rate": 0.002828, "loss": 1.3394, "step": 248576 }, { "epoch": 18.659662288930583, "grad_norm": 0.6124371886253357, "learning_rate": 0.002828, "loss": 1.3317, "step": 248640 }, { "epoch": 18.664465290806753, "grad_norm": 0.5146282911300659, "learning_rate": 0.002828, "loss": 1.331, "step": 248704 }, { "epoch": 18.669268292682926, "grad_norm": 0.5922949314117432, "learning_rate": 0.002828, "loss": 1.3372, "step": 248768 }, { "epoch": 18.6740712945591, "grad_norm": 0.5442377924919128, "learning_rate": 0.002828, "loss": 1.334, "step": 248832 }, { "epoch": 18.67887429643527, "grad_norm": 0.561581552028656, "learning_rate": 0.002828, "loss": 1.3375, "step": 248896 }, { "epoch": 18.683677298311444, "grad_norm": 0.546955406665802, "learning_rate": 0.002828, "loss": 1.3296, "step": 248960 }, { "epoch": 18.688480300187617, "grad_norm": 0.531005322933197, "learning_rate": 0.002828, "loss": 1.3318, "step": 249024 }, { "epoch": 18.69328330206379, "grad_norm": 0.5529969334602356, "learning_rate": 0.002828, "loss": 1.3347, "step": 249088 }, { "epoch": 18.698086303939963, "grad_norm": 0.6071882843971252, "learning_rate": 0.002828, "loss": 1.3339, "step": 249152 }, { "epoch": 18.702889305816136, "grad_norm": 0.5620871782302856, "learning_rate": 0.002828, "loss": 1.3388, "step": 249216 }, { "epoch": 18.70769230769231, "grad_norm": 0.6317915320396423, "learning_rate": 0.002828, "loss": 1.34, "step": 249280 }, { "epoch": 18.71249530956848, "grad_norm": 0.5283500552177429, "learning_rate": 0.002828, "loss": 1.3445, "step": 249344 }, { "epoch": 18.717298311444655, "grad_norm": 0.5978621244430542, "learning_rate": 0.002828, "loss": 1.3301, "step": 249408 }, { "epoch": 18.722101313320824, "grad_norm": 0.5326447486877441, "learning_rate": 0.002828, "loss": 1.3356, "step": 249472 }, { "epoch": 18.726904315196997, "grad_norm": 0.49449408054351807, "learning_rate": 0.002828, "loss": 1.3377, "step": 249536 }, { "epoch": 18.73170731707317, "grad_norm": 0.5880950689315796, "learning_rate": 0.002828, "loss": 1.332, "step": 249600 }, { "epoch": 18.736510318949342, "grad_norm": 0.513163685798645, "learning_rate": 0.002828, "loss": 1.3346, "step": 249664 }, { "epoch": 18.741313320825515, "grad_norm": 0.5993733406066895, "learning_rate": 0.002828, "loss": 1.3402, "step": 249728 }, { "epoch": 18.74611632270169, "grad_norm": 0.581052839756012, "learning_rate": 0.002828, "loss": 1.3343, "step": 249792 }, { "epoch": 18.75091932457786, "grad_norm": 0.5721963047981262, "learning_rate": 0.002828, "loss": 1.3405, "step": 249856 }, { "epoch": 18.755722326454034, "grad_norm": 0.612507700920105, "learning_rate": 0.002828, "loss": 1.3327, "step": 249920 }, { "epoch": 18.760525328330207, "grad_norm": 0.6289708614349365, "learning_rate": 0.002828, "loss": 1.3423, "step": 249984 }, { "epoch": 18.76532833020638, "grad_norm": 0.5146111845970154, "learning_rate": 0.002828, "loss": 1.3331, "step": 250048 }, { "epoch": 18.770131332082553, "grad_norm": 0.5346642136573792, "learning_rate": 0.002828, "loss": 1.3326, "step": 250112 }, { "epoch": 18.774934333958726, "grad_norm": 0.4899759292602539, "learning_rate": 0.002828, "loss": 1.3313, "step": 250176 }, { "epoch": 18.7797373358349, "grad_norm": 0.5708340406417847, "learning_rate": 0.002828, "loss": 1.3411, "step": 250240 }, { "epoch": 18.784540337711068, "grad_norm": 0.5400072932243347, "learning_rate": 0.002828, "loss": 1.3393, "step": 250304 }, { "epoch": 18.78934333958724, "grad_norm": 0.5824398398399353, "learning_rate": 0.002828, "loss": 1.3345, "step": 250368 }, { "epoch": 18.794146341463414, "grad_norm": 0.6986384987831116, "learning_rate": 0.002828, "loss": 1.3321, "step": 250432 }, { "epoch": 18.798949343339586, "grad_norm": 0.6101935505867004, "learning_rate": 0.002828, "loss": 1.3393, "step": 250496 }, { "epoch": 18.80375234521576, "grad_norm": 0.6558166146278381, "learning_rate": 0.002828, "loss": 1.336, "step": 250560 }, { "epoch": 18.808555347091932, "grad_norm": 0.526673436164856, "learning_rate": 0.002828, "loss": 1.3294, "step": 250624 }, { "epoch": 18.813358348968105, "grad_norm": 0.48416274785995483, "learning_rate": 0.002828, "loss": 1.3446, "step": 250688 }, { "epoch": 18.818161350844278, "grad_norm": 0.5560637712478638, "learning_rate": 0.002828, "loss": 1.3373, "step": 250752 }, { "epoch": 18.82296435272045, "grad_norm": 0.572029173374176, "learning_rate": 0.002828, "loss": 1.3367, "step": 250816 }, { "epoch": 18.827767354596624, "grad_norm": 0.4713887572288513, "learning_rate": 0.002828, "loss": 1.3418, "step": 250880 }, { "epoch": 18.832570356472797, "grad_norm": 0.5924293398857117, "learning_rate": 0.002828, "loss": 1.332, "step": 250944 }, { "epoch": 18.83737335834897, "grad_norm": 0.6764249205589294, "learning_rate": 0.002828, "loss": 1.3407, "step": 251008 }, { "epoch": 18.842176360225142, "grad_norm": 0.5857234001159668, "learning_rate": 0.002828, "loss": 1.3359, "step": 251072 }, { "epoch": 18.84697936210131, "grad_norm": 0.6040510535240173, "learning_rate": 0.002828, "loss": 1.3356, "step": 251136 }, { "epoch": 18.851782363977485, "grad_norm": 0.49172133207321167, "learning_rate": 0.002828, "loss": 1.3361, "step": 251200 }, { "epoch": 18.856585365853658, "grad_norm": 0.5720678567886353, "learning_rate": 0.002828, "loss": 1.3394, "step": 251264 }, { "epoch": 18.86138836772983, "grad_norm": 0.6578644514083862, "learning_rate": 0.002828, "loss": 1.3347, "step": 251328 }, { "epoch": 18.866191369606003, "grad_norm": 0.5127058029174805, "learning_rate": 0.002828, "loss": 1.3417, "step": 251392 }, { "epoch": 18.870994371482176, "grad_norm": 0.5229156613349915, "learning_rate": 0.002828, "loss": 1.3362, "step": 251456 }, { "epoch": 18.87579737335835, "grad_norm": 0.5593118667602539, "learning_rate": 0.002828, "loss": 1.3402, "step": 251520 }, { "epoch": 18.880600375234522, "grad_norm": 0.648236870765686, "learning_rate": 0.002828, "loss": 1.336, "step": 251584 }, { "epoch": 18.885403377110695, "grad_norm": 0.584019660949707, "learning_rate": 0.002828, "loss": 1.3407, "step": 251648 }, { "epoch": 18.890206378986868, "grad_norm": 0.5781084299087524, "learning_rate": 0.002828, "loss": 1.3381, "step": 251712 }, { "epoch": 18.89500938086304, "grad_norm": 0.5639113783836365, "learning_rate": 0.002828, "loss": 1.3416, "step": 251776 }, { "epoch": 18.899812382739213, "grad_norm": 0.5335075259208679, "learning_rate": 0.002828, "loss": 1.3373, "step": 251840 }, { "epoch": 18.904615384615383, "grad_norm": 0.5257242321968079, "learning_rate": 0.002828, "loss": 1.3389, "step": 251904 }, { "epoch": 18.909418386491556, "grad_norm": 0.5944527387619019, "learning_rate": 0.002828, "loss": 1.335, "step": 251968 }, { "epoch": 18.91422138836773, "grad_norm": 0.5301898121833801, "learning_rate": 0.002828, "loss": 1.3346, "step": 252032 }, { "epoch": 18.9190243902439, "grad_norm": 0.5499849915504456, "learning_rate": 0.002828, "loss": 1.3382, "step": 252096 }, { "epoch": 18.923827392120074, "grad_norm": 0.5929718017578125, "learning_rate": 0.002828, "loss": 1.3324, "step": 252160 }, { "epoch": 18.928630393996247, "grad_norm": 0.6182397603988647, "learning_rate": 0.002828, "loss": 1.3444, "step": 252224 }, { "epoch": 18.93343339587242, "grad_norm": 0.5975897908210754, "learning_rate": 0.002828, "loss": 1.3331, "step": 252288 }, { "epoch": 18.938236397748593, "grad_norm": 0.5303201675415039, "learning_rate": 0.002828, "loss": 1.3354, "step": 252352 }, { "epoch": 18.943039399624766, "grad_norm": 0.5473834872245789, "learning_rate": 0.002828, "loss": 1.3406, "step": 252416 }, { "epoch": 18.94784240150094, "grad_norm": 0.5609472990036011, "learning_rate": 0.002828, "loss": 1.3362, "step": 252480 }, { "epoch": 18.95264540337711, "grad_norm": 0.6121054291725159, "learning_rate": 0.002828, "loss": 1.341, "step": 252544 }, { "epoch": 18.957448405253285, "grad_norm": 0.5182227492332458, "learning_rate": 0.002828, "loss": 1.3422, "step": 252608 }, { "epoch": 18.962251407129457, "grad_norm": 0.5840742588043213, "learning_rate": 0.002828, "loss": 1.3372, "step": 252672 }, { "epoch": 18.967054409005627, "grad_norm": 0.5088547468185425, "learning_rate": 0.002828, "loss": 1.34, "step": 252736 }, { "epoch": 18.9718574108818, "grad_norm": 0.63606196641922, "learning_rate": 0.002828, "loss": 1.3368, "step": 252800 }, { "epoch": 18.976660412757973, "grad_norm": 0.5369198322296143, "learning_rate": 0.002828, "loss": 1.3366, "step": 252864 }, { "epoch": 18.981463414634145, "grad_norm": 0.5701738595962524, "learning_rate": 0.002828, "loss": 1.3416, "step": 252928 }, { "epoch": 18.98626641651032, "grad_norm": 0.7686113119125366, "learning_rate": 0.002828, "loss": 1.3326, "step": 252992 }, { "epoch": 18.99106941838649, "grad_norm": 0.5373234152793884, "learning_rate": 0.002828, "loss": 1.3331, "step": 253056 }, { "epoch": 18.995872420262664, "grad_norm": 0.5551041960716248, "learning_rate": 0.002828, "loss": 1.3396, "step": 253120 }, { "epoch": 19.000675422138837, "grad_norm": 0.47208505868911743, "learning_rate": 0.002828, "loss": 1.3379, "step": 253184 }, { "epoch": 19.00547842401501, "grad_norm": 0.6245517730712891, "learning_rate": 0.002828, "loss": 1.2966, "step": 253248 }, { "epoch": 19.010281425891183, "grad_norm": 0.5484210252761841, "learning_rate": 0.002828, "loss": 1.3004, "step": 253312 }, { "epoch": 19.015084427767356, "grad_norm": 0.583123505115509, "learning_rate": 0.002828, "loss": 1.2976, "step": 253376 }, { "epoch": 19.01988742964353, "grad_norm": 0.5009977221488953, "learning_rate": 0.002828, "loss": 1.3014, "step": 253440 }, { "epoch": 19.0246904315197, "grad_norm": 0.5530526638031006, "learning_rate": 0.002828, "loss": 1.2933, "step": 253504 }, { "epoch": 19.02949343339587, "grad_norm": 0.5873714089393616, "learning_rate": 0.002828, "loss": 1.2982, "step": 253568 }, { "epoch": 19.034296435272044, "grad_norm": 0.6122865080833435, "learning_rate": 0.002828, "loss": 1.2976, "step": 253632 }, { "epoch": 19.039099437148217, "grad_norm": 0.5970596671104431, "learning_rate": 0.002828, "loss": 1.3003, "step": 253696 }, { "epoch": 19.04390243902439, "grad_norm": 0.5157353281974792, "learning_rate": 0.002828, "loss": 1.3001, "step": 253760 }, { "epoch": 19.048705440900562, "grad_norm": 0.5586423873901367, "learning_rate": 0.002828, "loss": 1.2937, "step": 253824 }, { "epoch": 19.053508442776735, "grad_norm": 0.588364839553833, "learning_rate": 0.002828, "loss": 1.2976, "step": 253888 }, { "epoch": 19.058311444652908, "grad_norm": 0.6247104406356812, "learning_rate": 0.002828, "loss": 1.2913, "step": 253952 }, { "epoch": 19.06311444652908, "grad_norm": 0.6078224182128906, "learning_rate": 0.002828, "loss": 1.3058, "step": 254016 }, { "epoch": 19.067917448405254, "grad_norm": 0.5876947045326233, "learning_rate": 0.002828, "loss": 1.3009, "step": 254080 }, { "epoch": 19.072720450281427, "grad_norm": 0.5504317879676819, "learning_rate": 0.002828, "loss": 1.3055, "step": 254144 }, { "epoch": 19.0775234521576, "grad_norm": 0.5758258104324341, "learning_rate": 0.002828, "loss": 1.3036, "step": 254208 }, { "epoch": 19.082326454033772, "grad_norm": 0.6489672064781189, "learning_rate": 0.002828, "loss": 1.3009, "step": 254272 }, { "epoch": 19.087129455909945, "grad_norm": 0.41989511251449585, "learning_rate": 0.002828, "loss": 1.3018, "step": 254336 }, { "epoch": 19.091932457786115, "grad_norm": 0.48466578125953674, "learning_rate": 0.002828, "loss": 1.3025, "step": 254400 }, { "epoch": 19.096735459662288, "grad_norm": 0.5625917315483093, "learning_rate": 0.002828, "loss": 1.3011, "step": 254464 }, { "epoch": 19.10153846153846, "grad_norm": 0.7944159507751465, "learning_rate": 0.002828, "loss": 1.3037, "step": 254528 }, { "epoch": 19.106341463414633, "grad_norm": 0.612187385559082, "learning_rate": 0.002828, "loss": 1.3056, "step": 254592 }, { "epoch": 19.111144465290806, "grad_norm": 0.48978328704833984, "learning_rate": 0.002828, "loss": 1.3029, "step": 254656 }, { "epoch": 19.11594746716698, "grad_norm": 0.5934069156646729, "learning_rate": 0.002828, "loss": 1.3038, "step": 254720 }, { "epoch": 19.120750469043152, "grad_norm": 0.5898623466491699, "learning_rate": 0.002828, "loss": 1.2984, "step": 254784 }, { "epoch": 19.125553470919325, "grad_norm": 0.5596930384635925, "learning_rate": 0.002828, "loss": 1.3055, "step": 254848 }, { "epoch": 19.130356472795498, "grad_norm": 0.6557482481002808, "learning_rate": 0.002828, "loss": 1.3019, "step": 254912 }, { "epoch": 19.13515947467167, "grad_norm": 0.5334518551826477, "learning_rate": 0.002828, "loss": 1.3057, "step": 254976 }, { "epoch": 19.139962476547844, "grad_norm": 0.49692779779434204, "learning_rate": 0.002828, "loss": 1.3074, "step": 255040 }, { "epoch": 19.144765478424016, "grad_norm": 0.5714998245239258, "learning_rate": 0.002828, "loss": 1.3107, "step": 255104 }, { "epoch": 19.14956848030019, "grad_norm": 0.5583386421203613, "learning_rate": 0.002828, "loss": 1.303, "step": 255168 }, { "epoch": 19.15437148217636, "grad_norm": 0.5013801455497742, "learning_rate": 0.002828, "loss": 1.301, "step": 255232 }, { "epoch": 19.15917448405253, "grad_norm": 0.5694747567176819, "learning_rate": 0.002828, "loss": 1.3118, "step": 255296 }, { "epoch": 19.163977485928704, "grad_norm": 0.5212017893791199, "learning_rate": 0.002828, "loss": 1.3072, "step": 255360 }, { "epoch": 19.168780487804877, "grad_norm": 0.6951311230659485, "learning_rate": 0.002828, "loss": 1.3139, "step": 255424 }, { "epoch": 19.17358348968105, "grad_norm": 0.6076920628547668, "learning_rate": 0.002828, "loss": 1.3021, "step": 255488 }, { "epoch": 19.178386491557223, "grad_norm": 0.6477721929550171, "learning_rate": 0.002828, "loss": 1.3085, "step": 255552 }, { "epoch": 19.183189493433396, "grad_norm": 0.5557982325553894, "learning_rate": 0.002828, "loss": 1.3043, "step": 255616 }, { "epoch": 19.18799249530957, "grad_norm": 0.6123504638671875, "learning_rate": 0.002828, "loss": 1.3, "step": 255680 }, { "epoch": 19.19279549718574, "grad_norm": 0.5342661142349243, "learning_rate": 0.002828, "loss": 1.3066, "step": 255744 }, { "epoch": 19.197598499061915, "grad_norm": 0.5250298976898193, "learning_rate": 0.002828, "loss": 1.3057, "step": 255808 }, { "epoch": 19.202401500938088, "grad_norm": 0.577377438545227, "learning_rate": 0.002828, "loss": 1.3101, "step": 255872 }, { "epoch": 19.20720450281426, "grad_norm": 0.47325846552848816, "learning_rate": 0.002828, "loss": 1.3117, "step": 255936 }, { "epoch": 19.212007504690433, "grad_norm": 0.5749341249465942, "learning_rate": 0.002828, "loss": 1.3073, "step": 256000 }, { "epoch": 19.216810506566603, "grad_norm": 0.6236361861228943, "learning_rate": 0.002828, "loss": 1.299, "step": 256064 }, { "epoch": 19.221613508442775, "grad_norm": 0.5049059987068176, "learning_rate": 0.002828, "loss": 1.3105, "step": 256128 }, { "epoch": 19.22641651031895, "grad_norm": 0.5014175772666931, "learning_rate": 0.002828, "loss": 1.3136, "step": 256192 }, { "epoch": 19.23121951219512, "grad_norm": 0.6570005416870117, "learning_rate": 0.002828, "loss": 1.3021, "step": 256256 }, { "epoch": 19.236022514071294, "grad_norm": 0.5768321752548218, "learning_rate": 0.002828, "loss": 1.3102, "step": 256320 }, { "epoch": 19.240825515947467, "grad_norm": 0.5796010494232178, "learning_rate": 0.002828, "loss": 1.3108, "step": 256384 }, { "epoch": 19.24562851782364, "grad_norm": 0.5883336663246155, "learning_rate": 0.002828, "loss": 1.3029, "step": 256448 }, { "epoch": 19.250431519699813, "grad_norm": 0.5516217947006226, "learning_rate": 0.002828, "loss": 1.3038, "step": 256512 }, { "epoch": 19.255234521575986, "grad_norm": 0.5372753143310547, "learning_rate": 0.002828, "loss": 1.3082, "step": 256576 }, { "epoch": 19.26003752345216, "grad_norm": 0.5117861032485962, "learning_rate": 0.002828, "loss": 1.3106, "step": 256640 }, { "epoch": 19.26484052532833, "grad_norm": 0.5282958149909973, "learning_rate": 0.002828, "loss": 1.3051, "step": 256704 }, { "epoch": 19.269643527204504, "grad_norm": 0.5638288855552673, "learning_rate": 0.002828, "loss": 1.3112, "step": 256768 }, { "epoch": 19.274446529080674, "grad_norm": 0.5715838074684143, "learning_rate": 0.002828, "loss": 1.3119, "step": 256832 }, { "epoch": 19.279249530956847, "grad_norm": 0.5761364698410034, "learning_rate": 0.002828, "loss": 1.3069, "step": 256896 }, { "epoch": 19.28405253283302, "grad_norm": 0.5988823771476746, "learning_rate": 0.002828, "loss": 1.3132, "step": 256960 }, { "epoch": 19.288855534709192, "grad_norm": 0.7384783029556274, "learning_rate": 0.002828, "loss": 1.3094, "step": 257024 }, { "epoch": 19.293658536585365, "grad_norm": 0.5268698930740356, "learning_rate": 0.002828, "loss": 1.3098, "step": 257088 }, { "epoch": 19.298461538461538, "grad_norm": 0.6224947571754456, "learning_rate": 0.002828, "loss": 1.3122, "step": 257152 }, { "epoch": 19.30326454033771, "grad_norm": 0.5584701895713806, "learning_rate": 0.002828, "loss": 1.3127, "step": 257216 }, { "epoch": 19.308067542213884, "grad_norm": 0.5066078305244446, "learning_rate": 0.002828, "loss": 1.3117, "step": 257280 }, { "epoch": 19.312870544090057, "grad_norm": 0.6613951325416565, "learning_rate": 0.002828, "loss": 1.3134, "step": 257344 }, { "epoch": 19.31767354596623, "grad_norm": 0.4861260652542114, "learning_rate": 0.002828, "loss": 1.3143, "step": 257408 }, { "epoch": 19.322476547842403, "grad_norm": 0.5384355783462524, "learning_rate": 0.002828, "loss": 1.3144, "step": 257472 }, { "epoch": 19.327279549718575, "grad_norm": 0.6605150699615479, "learning_rate": 0.002828, "loss": 1.309, "step": 257536 }, { "epoch": 19.33208255159475, "grad_norm": 0.5852476358413696, "learning_rate": 0.002828, "loss": 1.3083, "step": 257600 }, { "epoch": 19.336885553470918, "grad_norm": 0.6060090065002441, "learning_rate": 0.002828, "loss": 1.3171, "step": 257664 }, { "epoch": 19.34168855534709, "grad_norm": 0.5727629661560059, "learning_rate": 0.002828, "loss": 1.3078, "step": 257728 }, { "epoch": 19.346491557223263, "grad_norm": 0.7055615782737732, "learning_rate": 0.002828, "loss": 1.3177, "step": 257792 }, { "epoch": 19.351294559099436, "grad_norm": 0.5629110932350159, "learning_rate": 0.002828, "loss": 1.3113, "step": 257856 }, { "epoch": 19.35609756097561, "grad_norm": 0.5396313667297363, "learning_rate": 0.002828, "loss": 1.313, "step": 257920 }, { "epoch": 19.360900562851782, "grad_norm": 0.5892098546028137, "learning_rate": 0.002828, "loss": 1.3139, "step": 257984 }, { "epoch": 19.365703564727955, "grad_norm": 0.5580728650093079, "learning_rate": 0.002828, "loss": 1.3034, "step": 258048 }, { "epoch": 19.370506566604128, "grad_norm": 0.561872124671936, "learning_rate": 0.002828, "loss": 1.3134, "step": 258112 }, { "epoch": 19.3753095684803, "grad_norm": 0.4941599667072296, "learning_rate": 0.002828, "loss": 1.3119, "step": 258176 }, { "epoch": 19.380112570356474, "grad_norm": 0.6258257627487183, "learning_rate": 0.002828, "loss": 1.3164, "step": 258240 }, { "epoch": 19.384915572232646, "grad_norm": 0.5165227651596069, "learning_rate": 0.002828, "loss": 1.3111, "step": 258304 }, { "epoch": 19.38971857410882, "grad_norm": 0.6330660581588745, "learning_rate": 0.002828, "loss": 1.3124, "step": 258368 }, { "epoch": 19.394521575984992, "grad_norm": 0.6326627135276794, "learning_rate": 0.002828, "loss": 1.3093, "step": 258432 }, { "epoch": 19.39932457786116, "grad_norm": 0.5849427580833435, "learning_rate": 0.002828, "loss": 1.3093, "step": 258496 }, { "epoch": 19.404127579737334, "grad_norm": 0.566590428352356, "learning_rate": 0.002828, "loss": 1.3096, "step": 258560 }, { "epoch": 19.408930581613507, "grad_norm": 0.5885806679725647, "learning_rate": 0.002828, "loss": 1.3137, "step": 258624 }, { "epoch": 19.41373358348968, "grad_norm": 0.5910857319831848, "learning_rate": 0.002828, "loss": 1.3125, "step": 258688 }, { "epoch": 19.418536585365853, "grad_norm": 0.6003797650337219, "learning_rate": 0.002828, "loss": 1.3155, "step": 258752 }, { "epoch": 19.423339587242026, "grad_norm": 0.5352999567985535, "learning_rate": 0.002828, "loss": 1.3114, "step": 258816 }, { "epoch": 19.4281425891182, "grad_norm": 0.5617457032203674, "learning_rate": 0.002828, "loss": 1.3167, "step": 258880 }, { "epoch": 19.432945590994372, "grad_norm": 0.6122784614562988, "learning_rate": 0.002828, "loss": 1.308, "step": 258944 }, { "epoch": 19.437748592870545, "grad_norm": 0.5012685656547546, "learning_rate": 0.002828, "loss": 1.3183, "step": 259008 }, { "epoch": 19.442551594746718, "grad_norm": 0.6228601336479187, "learning_rate": 0.002828, "loss": 1.312, "step": 259072 }, { "epoch": 19.44735459662289, "grad_norm": 0.6101039052009583, "learning_rate": 0.002828, "loss": 1.3033, "step": 259136 }, { "epoch": 19.452157598499063, "grad_norm": 0.5090866088867188, "learning_rate": 0.002828, "loss": 1.3085, "step": 259200 }, { "epoch": 19.456960600375236, "grad_norm": 0.5848556756973267, "learning_rate": 0.002828, "loss": 1.3176, "step": 259264 }, { "epoch": 19.461763602251406, "grad_norm": 0.6140968799591064, "learning_rate": 0.002828, "loss": 1.3206, "step": 259328 }, { "epoch": 19.46656660412758, "grad_norm": 0.6449792385101318, "learning_rate": 0.002828, "loss": 1.319, "step": 259392 }, { "epoch": 19.47136960600375, "grad_norm": 0.5546607375144958, "learning_rate": 0.002828, "loss": 1.3096, "step": 259456 }, { "epoch": 19.476172607879924, "grad_norm": 0.6009331345558167, "learning_rate": 0.002828, "loss": 1.3136, "step": 259520 }, { "epoch": 19.480975609756097, "grad_norm": 0.6098905801773071, "learning_rate": 0.002828, "loss": 1.3142, "step": 259584 }, { "epoch": 19.48577861163227, "grad_norm": 0.5157196521759033, "learning_rate": 0.002828, "loss": 1.3093, "step": 259648 }, { "epoch": 19.490581613508443, "grad_norm": 0.6030533313751221, "learning_rate": 0.002828, "loss": 1.3119, "step": 259712 }, { "epoch": 19.495384615384616, "grad_norm": 0.6334670186042786, "learning_rate": 0.002828, "loss": 1.3147, "step": 259776 }, { "epoch": 19.50018761726079, "grad_norm": 0.6147441267967224, "learning_rate": 0.002828, "loss": 1.3148, "step": 259840 }, { "epoch": 19.50499061913696, "grad_norm": 0.49021661281585693, "learning_rate": 0.002828, "loss": 1.3183, "step": 259904 }, { "epoch": 19.509793621013134, "grad_norm": 0.540419340133667, "learning_rate": 0.002828, "loss": 1.3155, "step": 259968 }, { "epoch": 19.514596622889307, "grad_norm": 0.5634638667106628, "learning_rate": 0.002828, "loss": 1.3161, "step": 260032 }, { "epoch": 19.519399624765477, "grad_norm": 0.6089213490486145, "learning_rate": 0.002828, "loss": 1.3147, "step": 260096 }, { "epoch": 19.52420262664165, "grad_norm": 0.488808274269104, "learning_rate": 0.002828, "loss": 1.3174, "step": 260160 }, { "epoch": 19.529005628517822, "grad_norm": 0.5810720324516296, "learning_rate": 0.002828, "loss": 1.3131, "step": 260224 }, { "epoch": 19.533808630393995, "grad_norm": 0.5251587629318237, "learning_rate": 0.002828, "loss": 1.3177, "step": 260288 }, { "epoch": 19.538611632270168, "grad_norm": 0.5949933528900146, "learning_rate": 0.002828, "loss": 1.3114, "step": 260352 }, { "epoch": 19.54341463414634, "grad_norm": 0.5253098607063293, "learning_rate": 0.002828, "loss": 1.3075, "step": 260416 }, { "epoch": 19.548217636022514, "grad_norm": 0.4630920886993408, "learning_rate": 0.002828, "loss": 1.3176, "step": 260480 }, { "epoch": 19.553020637898687, "grad_norm": 0.5550661683082581, "learning_rate": 0.002828, "loss": 1.317, "step": 260544 }, { "epoch": 19.55782363977486, "grad_norm": 0.50571608543396, "learning_rate": 0.002828, "loss": 1.3135, "step": 260608 }, { "epoch": 19.562626641651033, "grad_norm": 0.5230061411857605, "learning_rate": 0.002828, "loss": 1.318, "step": 260672 }, { "epoch": 19.567429643527205, "grad_norm": 0.4942905306816101, "learning_rate": 0.002828, "loss": 1.3102, "step": 260736 }, { "epoch": 19.57223264540338, "grad_norm": 0.5436031222343445, "learning_rate": 0.002828, "loss": 1.322, "step": 260800 }, { "epoch": 19.57703564727955, "grad_norm": 0.6137192845344543, "learning_rate": 0.002828, "loss": 1.3267, "step": 260864 }, { "epoch": 19.58183864915572, "grad_norm": 0.5399419665336609, "learning_rate": 0.002828, "loss": 1.3147, "step": 260928 }, { "epoch": 19.586641651031893, "grad_norm": 0.6851577758789062, "learning_rate": 0.002828, "loss": 1.312, "step": 260992 }, { "epoch": 19.591444652908066, "grad_norm": 0.5793424844741821, "learning_rate": 0.002828, "loss": 1.3104, "step": 261056 }, { "epoch": 19.59624765478424, "grad_norm": 0.486041784286499, "learning_rate": 0.002828, "loss": 1.3192, "step": 261120 }, { "epoch": 19.601050656660412, "grad_norm": 0.5898070335388184, "learning_rate": 0.002828, "loss": 1.3165, "step": 261184 }, { "epoch": 19.605853658536585, "grad_norm": 0.611981987953186, "learning_rate": 0.002828, "loss": 1.3169, "step": 261248 }, { "epoch": 19.610656660412758, "grad_norm": 0.5300493240356445, "learning_rate": 0.002828, "loss": 1.3128, "step": 261312 }, { "epoch": 19.61545966228893, "grad_norm": 0.6088967323303223, "learning_rate": 0.002828, "loss": 1.3145, "step": 261376 }, { "epoch": 19.620262664165104, "grad_norm": 0.5334705114364624, "learning_rate": 0.002828, "loss": 1.3172, "step": 261440 }, { "epoch": 19.625065666041277, "grad_norm": 0.5719045400619507, "learning_rate": 0.002828, "loss": 1.3176, "step": 261504 }, { "epoch": 19.62986866791745, "grad_norm": 0.5838398337364197, "learning_rate": 0.002828, "loss": 1.3199, "step": 261568 }, { "epoch": 19.634671669793622, "grad_norm": 0.565754234790802, "learning_rate": 0.002828, "loss": 1.3113, "step": 261632 }, { "epoch": 19.639474671669795, "grad_norm": 0.5282318592071533, "learning_rate": 0.002828, "loss": 1.319, "step": 261696 }, { "epoch": 19.644277673545965, "grad_norm": 0.5384307503700256, "learning_rate": 0.002828, "loss": 1.3143, "step": 261760 }, { "epoch": 19.649080675422137, "grad_norm": 0.5330702662467957, "learning_rate": 0.002828, "loss": 1.3165, "step": 261824 }, { "epoch": 19.65388367729831, "grad_norm": 0.521190345287323, "learning_rate": 0.002828, "loss": 1.3153, "step": 261888 }, { "epoch": 19.658686679174483, "grad_norm": 0.6237691640853882, "learning_rate": 0.002828, "loss": 1.3095, "step": 261952 }, { "epoch": 19.663489681050656, "grad_norm": 0.6528506278991699, "learning_rate": 0.002828, "loss": 1.3205, "step": 262016 }, { "epoch": 19.66829268292683, "grad_norm": 0.5690321922302246, "learning_rate": 0.002828, "loss": 1.3233, "step": 262080 }, { "epoch": 19.673095684803002, "grad_norm": 0.5753093957901001, "learning_rate": 0.002828, "loss": 1.3184, "step": 262144 }, { "epoch": 19.677898686679175, "grad_norm": 0.5182336568832397, "learning_rate": 0.002828, "loss": 1.3149, "step": 262208 }, { "epoch": 19.682701688555348, "grad_norm": 0.946252167224884, "learning_rate": 0.002828, "loss": 1.32, "step": 262272 }, { "epoch": 19.68750469043152, "grad_norm": 0.7170292735099792, "learning_rate": 0.002828, "loss": 1.3173, "step": 262336 }, { "epoch": 19.692307692307693, "grad_norm": 0.5056231021881104, "learning_rate": 0.002828, "loss": 1.3166, "step": 262400 }, { "epoch": 19.697110694183866, "grad_norm": 0.5264489054679871, "learning_rate": 0.002828, "loss": 1.3173, "step": 262464 }, { "epoch": 19.70191369606004, "grad_norm": 0.5630133152008057, "learning_rate": 0.002828, "loss": 1.3196, "step": 262528 }, { "epoch": 19.70671669793621, "grad_norm": 0.5217622518539429, "learning_rate": 0.002828, "loss": 1.3186, "step": 262592 }, { "epoch": 19.71151969981238, "grad_norm": 0.5659202337265015, "learning_rate": 0.002828, "loss": 1.3193, "step": 262656 }, { "epoch": 19.716322701688554, "grad_norm": 0.5758469104766846, "learning_rate": 0.002828, "loss": 1.3157, "step": 262720 }, { "epoch": 19.721125703564727, "grad_norm": 0.5595651865005493, "learning_rate": 0.002828, "loss": 1.3198, "step": 262784 }, { "epoch": 19.7259287054409, "grad_norm": 0.5688280463218689, "learning_rate": 0.002828, "loss": 1.3193, "step": 262848 }, { "epoch": 19.730731707317073, "grad_norm": 0.5543525815010071, "learning_rate": 0.002828, "loss": 1.3144, "step": 262912 }, { "epoch": 19.735534709193246, "grad_norm": 0.5669763684272766, "learning_rate": 0.002828, "loss": 1.3183, "step": 262976 }, { "epoch": 19.74033771106942, "grad_norm": 0.628093957901001, "learning_rate": 0.002828, "loss": 1.3183, "step": 263040 }, { "epoch": 19.74514071294559, "grad_norm": 0.5473799705505371, "learning_rate": 0.002828, "loss": 1.3212, "step": 263104 }, { "epoch": 19.749943714821764, "grad_norm": 0.5699611306190491, "learning_rate": 0.002828, "loss": 1.3144, "step": 263168 }, { "epoch": 19.754746716697937, "grad_norm": 0.5728660821914673, "learning_rate": 0.002828, "loss": 1.3159, "step": 263232 }, { "epoch": 19.75954971857411, "grad_norm": 0.5876243114471436, "learning_rate": 0.002828, "loss": 1.3225, "step": 263296 }, { "epoch": 19.764352720450283, "grad_norm": 0.6909279227256775, "learning_rate": 0.002828, "loss": 1.3139, "step": 263360 }, { "epoch": 19.769155722326452, "grad_norm": 0.5972185730934143, "learning_rate": 0.002828, "loss": 1.3166, "step": 263424 }, { "epoch": 19.773958724202625, "grad_norm": 0.5375490784645081, "learning_rate": 0.002828, "loss": 1.317, "step": 263488 }, { "epoch": 19.7787617260788, "grad_norm": 0.5809566378593445, "learning_rate": 0.002828, "loss": 1.3194, "step": 263552 }, { "epoch": 19.78356472795497, "grad_norm": 0.5196545124053955, "learning_rate": 0.002828, "loss": 1.3144, "step": 263616 }, { "epoch": 19.788367729831144, "grad_norm": 0.5775570869445801, "learning_rate": 0.002828, "loss": 1.3206, "step": 263680 }, { "epoch": 19.793170731707317, "grad_norm": 0.6974487900733948, "learning_rate": 0.002828, "loss": 1.3193, "step": 263744 }, { "epoch": 19.79797373358349, "grad_norm": 0.6145743727684021, "learning_rate": 0.002828, "loss": 1.3178, "step": 263808 }, { "epoch": 19.802776735459663, "grad_norm": 0.5650912523269653, "learning_rate": 0.002828, "loss": 1.3139, "step": 263872 }, { "epoch": 19.807579737335836, "grad_norm": 0.5052109956741333, "learning_rate": 0.002828, "loss": 1.314, "step": 263936 }, { "epoch": 19.81238273921201, "grad_norm": 0.5860249996185303, "learning_rate": 0.002828, "loss": 1.3167, "step": 264000 }, { "epoch": 19.81718574108818, "grad_norm": 0.5640794038772583, "learning_rate": 0.002828, "loss": 1.3223, "step": 264064 }, { "epoch": 19.821988742964354, "grad_norm": 0.6080434322357178, "learning_rate": 0.002828, "loss": 1.3229, "step": 264128 }, { "epoch": 19.826791744840527, "grad_norm": 0.48237085342407227, "learning_rate": 0.002828, "loss": 1.322, "step": 264192 }, { "epoch": 19.831594746716696, "grad_norm": 0.6327677965164185, "learning_rate": 0.002828, "loss": 1.321, "step": 264256 }, { "epoch": 19.83639774859287, "grad_norm": 0.6129979491233826, "learning_rate": 0.002828, "loss": 1.3129, "step": 264320 }, { "epoch": 19.841200750469042, "grad_norm": 0.5425165891647339, "learning_rate": 0.002828, "loss": 1.3244, "step": 264384 }, { "epoch": 19.846003752345215, "grad_norm": 0.47874894738197327, "learning_rate": 0.002828, "loss": 1.3237, "step": 264448 }, { "epoch": 19.850806754221388, "grad_norm": 0.635398805141449, "learning_rate": 0.002828, "loss": 1.3207, "step": 264512 }, { "epoch": 19.85560975609756, "grad_norm": 0.5302428603172302, "learning_rate": 0.002828, "loss": 1.3237, "step": 264576 }, { "epoch": 19.860412757973734, "grad_norm": 0.5885991454124451, "learning_rate": 0.002828, "loss": 1.3187, "step": 264640 }, { "epoch": 19.865215759849907, "grad_norm": 0.6125088334083557, "learning_rate": 0.002828, "loss": 1.3198, "step": 264704 }, { "epoch": 19.87001876172608, "grad_norm": 0.5166810154914856, "learning_rate": 0.002828, "loss": 1.318, "step": 264768 }, { "epoch": 19.874821763602252, "grad_norm": 0.5309271216392517, "learning_rate": 0.002828, "loss": 1.3206, "step": 264832 }, { "epoch": 19.879624765478425, "grad_norm": 0.5516149997711182, "learning_rate": 0.002828, "loss": 1.3206, "step": 264896 }, { "epoch": 19.884427767354598, "grad_norm": 0.4545881450176239, "learning_rate": 0.002828, "loss": 1.3209, "step": 264960 }, { "epoch": 19.88923076923077, "grad_norm": 0.7351852059364319, "learning_rate": 0.002828, "loss": 1.3182, "step": 265024 }, { "epoch": 19.89403377110694, "grad_norm": 0.5521069169044495, "learning_rate": 0.002828, "loss": 1.3113, "step": 265088 }, { "epoch": 19.898836772983113, "grad_norm": 0.5605167746543884, "learning_rate": 0.002828, "loss": 1.3185, "step": 265152 }, { "epoch": 19.903639774859286, "grad_norm": 0.4302228093147278, "learning_rate": 0.002828, "loss": 1.3168, "step": 265216 }, { "epoch": 19.90844277673546, "grad_norm": 0.6508972644805908, "learning_rate": 0.002828, "loss": 1.3318, "step": 265280 }, { "epoch": 19.913245778611632, "grad_norm": 0.5926183462142944, "learning_rate": 0.002828, "loss": 1.3169, "step": 265344 }, { "epoch": 19.918048780487805, "grad_norm": 0.6462618708610535, "learning_rate": 0.002828, "loss": 1.3203, "step": 265408 }, { "epoch": 19.922851782363978, "grad_norm": 0.5538685321807861, "learning_rate": 0.002828, "loss": 1.3143, "step": 265472 }, { "epoch": 19.92765478424015, "grad_norm": 0.5557839870452881, "learning_rate": 0.002828, "loss": 1.3187, "step": 265536 }, { "epoch": 19.932457786116323, "grad_norm": 0.5402202606201172, "learning_rate": 0.002828, "loss": 1.3221, "step": 265600 }, { "epoch": 19.937260787992496, "grad_norm": 0.6323610544204712, "learning_rate": 0.002828, "loss": 1.3179, "step": 265664 }, { "epoch": 19.94206378986867, "grad_norm": 0.5772566795349121, "learning_rate": 0.002828, "loss": 1.3186, "step": 265728 }, { "epoch": 19.946866791744842, "grad_norm": 0.5509674549102783, "learning_rate": 0.002828, "loss": 1.3251, "step": 265792 }, { "epoch": 19.95166979362101, "grad_norm": 0.5037369728088379, "learning_rate": 0.002828, "loss": 1.3192, "step": 265856 }, { "epoch": 19.956472795497184, "grad_norm": 0.6050003170967102, "learning_rate": 0.002828, "loss": 1.3198, "step": 265920 }, { "epoch": 19.961275797373357, "grad_norm": 0.519004225730896, "learning_rate": 0.002828, "loss": 1.3132, "step": 265984 }, { "epoch": 19.96607879924953, "grad_norm": 0.5484166741371155, "learning_rate": 0.002828, "loss": 1.3227, "step": 266048 }, { "epoch": 19.970881801125703, "grad_norm": 0.5241848826408386, "learning_rate": 0.002828, "loss": 1.321, "step": 266112 }, { "epoch": 19.975684803001876, "grad_norm": 0.5934039950370789, "learning_rate": 0.002828, "loss": 1.3175, "step": 266176 }, { "epoch": 19.98048780487805, "grad_norm": 0.5047505497932434, "learning_rate": 0.002828, "loss": 1.3216, "step": 266240 }, { "epoch": 19.98529080675422, "grad_norm": 0.5770825743675232, "learning_rate": 0.002828, "loss": 1.3225, "step": 266304 }, { "epoch": 19.990093808630395, "grad_norm": 0.6241472959518433, "learning_rate": 0.002828, "loss": 1.3177, "step": 266368 }, { "epoch": 19.994896810506567, "grad_norm": 0.6075835227966309, "learning_rate": 0.002828, "loss": 1.3214, "step": 266432 }, { "epoch": 19.99969981238274, "grad_norm": 0.559272050857544, "learning_rate": 0.002828, "loss": 1.3189, "step": 266496 }, { "epoch": 20.004502814258913, "grad_norm": 0.6129955053329468, "learning_rate": 0.002828, "loss": 1.2869, "step": 266560 }, { "epoch": 20.009305816135086, "grad_norm": 0.5431690812110901, "learning_rate": 0.002828, "loss": 1.281, "step": 266624 }, { "epoch": 20.014108818011255, "grad_norm": 0.657706618309021, "learning_rate": 0.002828, "loss": 1.2773, "step": 266688 }, { "epoch": 20.01891181988743, "grad_norm": 0.5088411569595337, "learning_rate": 0.002828, "loss": 1.2845, "step": 266752 }, { "epoch": 20.0237148217636, "grad_norm": 0.5734243392944336, "learning_rate": 0.002828, "loss": 1.2781, "step": 266816 }, { "epoch": 20.028517823639774, "grad_norm": 0.5754413604736328, "learning_rate": 0.002828, "loss": 1.2766, "step": 266880 }, { "epoch": 20.033320825515947, "grad_norm": 0.5661938190460205, "learning_rate": 0.002828, "loss": 1.287, "step": 266944 }, { "epoch": 20.03812382739212, "grad_norm": 0.6118998527526855, "learning_rate": 0.002828, "loss": 1.2851, "step": 267008 }, { "epoch": 20.042926829268293, "grad_norm": 0.5828491449356079, "learning_rate": 0.002828, "loss": 1.2842, "step": 267072 }, { "epoch": 20.047729831144466, "grad_norm": 0.6424586176872253, "learning_rate": 0.002828, "loss": 1.2801, "step": 267136 }, { "epoch": 20.05253283302064, "grad_norm": 0.5585246086120605, "learning_rate": 0.002828, "loss": 1.2882, "step": 267200 }, { "epoch": 20.05733583489681, "grad_norm": 0.6090246438980103, "learning_rate": 0.002828, "loss": 1.2753, "step": 267264 }, { "epoch": 20.062138836772984, "grad_norm": 0.5552029609680176, "learning_rate": 0.002828, "loss": 1.2748, "step": 267328 }, { "epoch": 20.066941838649157, "grad_norm": 0.588688850402832, "learning_rate": 0.002828, "loss": 1.2835, "step": 267392 }, { "epoch": 20.07174484052533, "grad_norm": 0.537607729434967, "learning_rate": 0.002828, "loss": 1.2879, "step": 267456 }, { "epoch": 20.0765478424015, "grad_norm": 0.5644827485084534, "learning_rate": 0.002828, "loss": 1.2814, "step": 267520 }, { "epoch": 20.081350844277672, "grad_norm": 0.631341278553009, "learning_rate": 0.002828, "loss": 1.2826, "step": 267584 }, { "epoch": 20.086153846153845, "grad_norm": 0.493162602186203, "learning_rate": 0.002828, "loss": 1.2781, "step": 267648 }, { "epoch": 20.090956848030018, "grad_norm": 0.5350943803787231, "learning_rate": 0.002828, "loss": 1.2857, "step": 267712 }, { "epoch": 20.09575984990619, "grad_norm": 0.6216751337051392, "learning_rate": 0.002828, "loss": 1.284, "step": 267776 }, { "epoch": 20.100562851782364, "grad_norm": 0.5709570050239563, "learning_rate": 0.002828, "loss": 1.2817, "step": 267840 }, { "epoch": 20.105365853658537, "grad_norm": 0.5807507634162903, "learning_rate": 0.002828, "loss": 1.2902, "step": 267904 }, { "epoch": 20.11016885553471, "grad_norm": 0.632093071937561, "learning_rate": 0.002828, "loss": 1.2926, "step": 267968 }, { "epoch": 20.114971857410882, "grad_norm": 0.5635599493980408, "learning_rate": 0.002828, "loss": 1.2876, "step": 268032 }, { "epoch": 20.119774859287055, "grad_norm": 0.547120213508606, "learning_rate": 0.002828, "loss": 1.2883, "step": 268096 }, { "epoch": 20.124577861163228, "grad_norm": 0.47603321075439453, "learning_rate": 0.002828, "loss": 1.2875, "step": 268160 }, { "epoch": 20.1293808630394, "grad_norm": 0.5889759063720703, "learning_rate": 0.002828, "loss": 1.2913, "step": 268224 }, { "epoch": 20.134183864915574, "grad_norm": 0.5772056579589844, "learning_rate": 0.002828, "loss": 1.287, "step": 268288 }, { "epoch": 20.138986866791743, "grad_norm": 0.6541629433631897, "learning_rate": 0.002828, "loss": 1.2917, "step": 268352 }, { "epoch": 20.143789868667916, "grad_norm": 0.5309247970581055, "learning_rate": 0.002828, "loss": 1.2911, "step": 268416 }, { "epoch": 20.14859287054409, "grad_norm": 0.5737690329551697, "learning_rate": 0.002828, "loss": 1.2924, "step": 268480 }, { "epoch": 20.153395872420262, "grad_norm": 0.5364387035369873, "learning_rate": 0.002828, "loss": 1.2874, "step": 268544 }, { "epoch": 20.158198874296435, "grad_norm": 0.5387442111968994, "learning_rate": 0.002828, "loss": 1.286, "step": 268608 }, { "epoch": 20.163001876172608, "grad_norm": 0.5343860983848572, "learning_rate": 0.002828, "loss": 1.2869, "step": 268672 }, { "epoch": 20.16780487804878, "grad_norm": 0.46021518111228943, "learning_rate": 0.002828, "loss": 1.293, "step": 268736 }, { "epoch": 20.172607879924954, "grad_norm": 0.6175848841667175, "learning_rate": 0.002828, "loss": 1.2891, "step": 268800 }, { "epoch": 20.177410881801126, "grad_norm": 0.5815446376800537, "learning_rate": 0.002828, "loss": 1.2934, "step": 268864 }, { "epoch": 20.1822138836773, "grad_norm": 0.689041018486023, "learning_rate": 0.002828, "loss": 1.2935, "step": 268928 }, { "epoch": 20.187016885553472, "grad_norm": 0.6535454392433167, "learning_rate": 0.002828, "loss": 1.2897, "step": 268992 }, { "epoch": 20.191819887429645, "grad_norm": 0.5424286723136902, "learning_rate": 0.002828, "loss": 1.2889, "step": 269056 }, { "epoch": 20.196622889305814, "grad_norm": 0.6392897367477417, "learning_rate": 0.002828, "loss": 1.2892, "step": 269120 }, { "epoch": 20.201425891181987, "grad_norm": 0.5682271122932434, "learning_rate": 0.002828, "loss": 1.2906, "step": 269184 }, { "epoch": 20.20622889305816, "grad_norm": 0.5338545441627502, "learning_rate": 0.002828, "loss": 1.2911, "step": 269248 }, { "epoch": 20.211031894934333, "grad_norm": 0.5464316606521606, "learning_rate": 0.002828, "loss": 1.2963, "step": 269312 }, { "epoch": 20.215834896810506, "grad_norm": 0.5830249786376953, "learning_rate": 0.002828, "loss": 1.2947, "step": 269376 }, { "epoch": 20.22063789868668, "grad_norm": 0.5794913172721863, "learning_rate": 0.002828, "loss": 1.2867, "step": 269440 }, { "epoch": 20.22544090056285, "grad_norm": 0.5707951784133911, "learning_rate": 0.002828, "loss": 1.2867, "step": 269504 }, { "epoch": 20.230243902439025, "grad_norm": 0.5334992408752441, "learning_rate": 0.002828, "loss": 1.2926, "step": 269568 }, { "epoch": 20.235046904315197, "grad_norm": 0.5544065833091736, "learning_rate": 0.002828, "loss": 1.2916, "step": 269632 }, { "epoch": 20.23984990619137, "grad_norm": 0.5692053437232971, "learning_rate": 0.002828, "loss": 1.2893, "step": 269696 }, { "epoch": 20.244652908067543, "grad_norm": 0.45951566100120544, "learning_rate": 0.002828, "loss": 1.2872, "step": 269760 }, { "epoch": 20.249455909943716, "grad_norm": 0.6202203631401062, "learning_rate": 0.002828, "loss": 1.2919, "step": 269824 }, { "epoch": 20.25425891181989, "grad_norm": 0.546671986579895, "learning_rate": 0.002828, "loss": 1.2948, "step": 269888 }, { "epoch": 20.25906191369606, "grad_norm": 0.5328788161277771, "learning_rate": 0.002828, "loss": 1.2969, "step": 269952 }, { "epoch": 20.26386491557223, "grad_norm": 0.5520268678665161, "learning_rate": 0.002828, "loss": 1.2964, "step": 270016 }, { "epoch": 20.268667917448404, "grad_norm": 0.5719852447509766, "learning_rate": 0.002828, "loss": 1.2941, "step": 270080 }, { "epoch": 20.273470919324577, "grad_norm": 0.5243651270866394, "learning_rate": 0.002828, "loss": 1.2941, "step": 270144 }, { "epoch": 20.27827392120075, "grad_norm": 0.5593424439430237, "learning_rate": 0.002828, "loss": 1.2931, "step": 270208 }, { "epoch": 20.283076923076923, "grad_norm": 0.5311948657035828, "learning_rate": 0.002828, "loss": 1.2953, "step": 270272 }, { "epoch": 20.287879924953096, "grad_norm": 0.7635405659675598, "learning_rate": 0.002828, "loss": 1.2937, "step": 270336 }, { "epoch": 20.29268292682927, "grad_norm": 0.5601173639297485, "learning_rate": 0.002828, "loss": 1.2888, "step": 270400 }, { "epoch": 20.29748592870544, "grad_norm": 0.5598116517066956, "learning_rate": 0.002828, "loss": 1.2926, "step": 270464 }, { "epoch": 20.302288930581614, "grad_norm": 0.551178514957428, "learning_rate": 0.002828, "loss": 1.2932, "step": 270528 }, { "epoch": 20.307091932457787, "grad_norm": 0.5252653956413269, "learning_rate": 0.002828, "loss": 1.2907, "step": 270592 }, { "epoch": 20.31189493433396, "grad_norm": 0.617731511592865, "learning_rate": 0.002828, "loss": 1.2957, "step": 270656 }, { "epoch": 20.316697936210133, "grad_norm": 0.5604806542396545, "learning_rate": 0.002828, "loss": 1.2946, "step": 270720 }, { "epoch": 20.321500938086302, "grad_norm": 0.5689629316329956, "learning_rate": 0.002828, "loss": 1.2946, "step": 270784 }, { "epoch": 20.326303939962475, "grad_norm": 0.5235245227813721, "learning_rate": 0.002828, "loss": 1.2964, "step": 270848 }, { "epoch": 20.331106941838648, "grad_norm": 0.5162010788917542, "learning_rate": 0.002828, "loss": 1.2939, "step": 270912 }, { "epoch": 20.33590994371482, "grad_norm": 0.4949166178703308, "learning_rate": 0.002828, "loss": 1.3024, "step": 270976 }, { "epoch": 20.340712945590994, "grad_norm": 0.5738046765327454, "learning_rate": 0.002828, "loss": 1.2948, "step": 271040 }, { "epoch": 20.345515947467167, "grad_norm": 0.5544030070304871, "learning_rate": 0.002828, "loss": 1.296, "step": 271104 }, { "epoch": 20.35031894934334, "grad_norm": 0.5360429883003235, "learning_rate": 0.002828, "loss": 1.2923, "step": 271168 }, { "epoch": 20.355121951219513, "grad_norm": 0.5099622011184692, "learning_rate": 0.002828, "loss": 1.3032, "step": 271232 }, { "epoch": 20.359924953095685, "grad_norm": 0.5253172516822815, "learning_rate": 0.002828, "loss": 1.29, "step": 271296 }, { "epoch": 20.36472795497186, "grad_norm": 0.5624486207962036, "learning_rate": 0.002828, "loss": 1.2991, "step": 271360 }, { "epoch": 20.36953095684803, "grad_norm": 0.575293242931366, "learning_rate": 0.002828, "loss": 1.297, "step": 271424 }, { "epoch": 20.374333958724204, "grad_norm": 0.6318356990814209, "learning_rate": 0.002828, "loss": 1.2949, "step": 271488 }, { "epoch": 20.379136960600377, "grad_norm": 0.5846620798110962, "learning_rate": 0.002828, "loss": 1.2944, "step": 271552 }, { "epoch": 20.383939962476546, "grad_norm": 0.7515684962272644, "learning_rate": 0.002828, "loss": 1.295, "step": 271616 }, { "epoch": 20.38874296435272, "grad_norm": 0.6497512459754944, "learning_rate": 0.002828, "loss": 1.2903, "step": 271680 }, { "epoch": 20.393545966228892, "grad_norm": 0.5049740076065063, "learning_rate": 0.002828, "loss": 1.3037, "step": 271744 }, { "epoch": 20.398348968105065, "grad_norm": 0.48297742009162903, "learning_rate": 0.002828, "loss": 1.3004, "step": 271808 }, { "epoch": 20.403151969981238, "grad_norm": 0.6171135902404785, "learning_rate": 0.002828, "loss": 1.294, "step": 271872 }, { "epoch": 20.40795497185741, "grad_norm": 0.5428125858306885, "learning_rate": 0.002828, "loss": 1.2954, "step": 271936 }, { "epoch": 20.412757973733584, "grad_norm": 0.5896263718605042, "learning_rate": 0.002828, "loss": 1.2969, "step": 272000 }, { "epoch": 20.417560975609756, "grad_norm": 0.6516025066375732, "learning_rate": 0.002828, "loss": 1.2992, "step": 272064 }, { "epoch": 20.42236397748593, "grad_norm": 0.5841122269630432, "learning_rate": 0.002828, "loss": 1.2928, "step": 272128 }, { "epoch": 20.427166979362102, "grad_norm": 0.6658204197883606, "learning_rate": 0.002828, "loss": 1.3003, "step": 272192 }, { "epoch": 20.431969981238275, "grad_norm": 0.7099432349205017, "learning_rate": 0.002828, "loss": 1.2908, "step": 272256 }, { "epoch": 20.436772983114448, "grad_norm": 0.6290414333343506, "learning_rate": 0.002828, "loss": 1.2968, "step": 272320 }, { "epoch": 20.44157598499062, "grad_norm": 0.6891517639160156, "learning_rate": 0.002828, "loss": 1.2992, "step": 272384 }, { "epoch": 20.44637898686679, "grad_norm": 0.5390793085098267, "learning_rate": 0.002828, "loss": 1.2989, "step": 272448 }, { "epoch": 20.451181988742963, "grad_norm": 0.5205430388450623, "learning_rate": 0.002828, "loss": 1.293, "step": 272512 }, { "epoch": 20.455984990619136, "grad_norm": 0.618954598903656, "learning_rate": 0.002828, "loss": 1.295, "step": 272576 }, { "epoch": 20.46078799249531, "grad_norm": 0.5977427959442139, "learning_rate": 0.002828, "loss": 1.2957, "step": 272640 }, { "epoch": 20.46559099437148, "grad_norm": 0.48537740111351013, "learning_rate": 0.002828, "loss": 1.2998, "step": 272704 }, { "epoch": 20.470393996247655, "grad_norm": 0.6163172125816345, "learning_rate": 0.002828, "loss": 1.2984, "step": 272768 }, { "epoch": 20.475196998123828, "grad_norm": 0.6726917028427124, "learning_rate": 0.002828, "loss": 1.3049, "step": 272832 }, { "epoch": 20.48, "grad_norm": 0.7405112385749817, "learning_rate": 0.002828, "loss": 1.2933, "step": 272896 }, { "epoch": 20.484803001876173, "grad_norm": 0.6282209753990173, "learning_rate": 0.002828, "loss": 1.2972, "step": 272960 }, { "epoch": 20.489606003752346, "grad_norm": 0.5530390739440918, "learning_rate": 0.002828, "loss": 1.2978, "step": 273024 }, { "epoch": 20.49440900562852, "grad_norm": 0.5814974308013916, "learning_rate": 0.002828, "loss": 1.2933, "step": 273088 }, { "epoch": 20.499212007504692, "grad_norm": 0.6190447211265564, "learning_rate": 0.002828, "loss": 1.3, "step": 273152 }, { "epoch": 20.504015009380865, "grad_norm": 0.5674455761909485, "learning_rate": 0.002828, "loss": 1.2985, "step": 273216 }, { "epoch": 20.508818011257034, "grad_norm": 0.6419714689254761, "learning_rate": 0.002828, "loss": 1.2984, "step": 273280 }, { "epoch": 20.513621013133207, "grad_norm": 0.5519217252731323, "learning_rate": 0.002828, "loss": 1.2982, "step": 273344 }, { "epoch": 20.51842401500938, "grad_norm": 0.46819642186164856, "learning_rate": 0.002828, "loss": 1.3034, "step": 273408 }, { "epoch": 20.523227016885553, "grad_norm": 0.5590828061103821, "learning_rate": 0.002828, "loss": 1.2998, "step": 273472 }, { "epoch": 20.528030018761726, "grad_norm": 0.6636494994163513, "learning_rate": 0.002828, "loss": 1.2943, "step": 273536 }, { "epoch": 20.5328330206379, "grad_norm": 0.613201916217804, "learning_rate": 0.002828, "loss": 1.2974, "step": 273600 }, { "epoch": 20.53763602251407, "grad_norm": 0.6075567007064819, "learning_rate": 0.002828, "loss": 1.2946, "step": 273664 }, { "epoch": 20.542439024390244, "grad_norm": 0.5289536714553833, "learning_rate": 0.002828, "loss": 1.2995, "step": 273728 }, { "epoch": 20.547242026266417, "grad_norm": 0.5214759111404419, "learning_rate": 0.002828, "loss": 1.3006, "step": 273792 }, { "epoch": 20.55204502814259, "grad_norm": 0.5907725095748901, "learning_rate": 0.002828, "loss": 1.301, "step": 273856 }, { "epoch": 20.556848030018763, "grad_norm": 0.4892508387565613, "learning_rate": 0.002828, "loss": 1.3025, "step": 273920 }, { "epoch": 20.561651031894936, "grad_norm": 0.6342883110046387, "learning_rate": 0.002828, "loss": 1.297, "step": 273984 }, { "epoch": 20.566454033771105, "grad_norm": 0.7062511444091797, "learning_rate": 0.002828, "loss": 1.3077, "step": 274048 }, { "epoch": 20.571257035647278, "grad_norm": 0.5731887221336365, "learning_rate": 0.002828, "loss": 1.2989, "step": 274112 }, { "epoch": 20.57606003752345, "grad_norm": 0.5922696590423584, "learning_rate": 0.002828, "loss": 1.2957, "step": 274176 }, { "epoch": 20.580863039399624, "grad_norm": 0.6939809322357178, "learning_rate": 0.002828, "loss": 1.2998, "step": 274240 }, { "epoch": 20.585666041275797, "grad_norm": 0.6065263152122498, "learning_rate": 0.002828, "loss": 1.297, "step": 274304 }, { "epoch": 20.59046904315197, "grad_norm": 0.4768809378147125, "learning_rate": 0.002828, "loss": 1.3048, "step": 274368 }, { "epoch": 20.595272045028143, "grad_norm": 0.6705830097198486, "learning_rate": 0.002828, "loss": 1.2991, "step": 274432 }, { "epoch": 20.600075046904315, "grad_norm": 0.6444434523582458, "learning_rate": 0.002828, "loss": 1.2978, "step": 274496 }, { "epoch": 20.60487804878049, "grad_norm": 0.5016328692436218, "learning_rate": 0.002828, "loss": 1.2974, "step": 274560 }, { "epoch": 20.60968105065666, "grad_norm": 0.5974975824356079, "learning_rate": 0.002828, "loss": 1.297, "step": 274624 }, { "epoch": 20.614484052532834, "grad_norm": 0.4709956645965576, "learning_rate": 0.002828, "loss": 1.3012, "step": 274688 }, { "epoch": 20.619287054409007, "grad_norm": 0.7240418195724487, "learning_rate": 0.002828, "loss": 1.3036, "step": 274752 }, { "epoch": 20.62409005628518, "grad_norm": 0.5455818772315979, "learning_rate": 0.002828, "loss": 1.3052, "step": 274816 }, { "epoch": 20.62889305816135, "grad_norm": 0.6497557759284973, "learning_rate": 0.002828, "loss": 1.3011, "step": 274880 }, { "epoch": 20.633696060037522, "grad_norm": 0.623468816280365, "learning_rate": 0.002828, "loss": 1.2998, "step": 274944 }, { "epoch": 20.638499061913695, "grad_norm": 0.5101485252380371, "learning_rate": 0.002828, "loss": 1.2985, "step": 275008 }, { "epoch": 20.643302063789868, "grad_norm": 0.6895449757575989, "learning_rate": 0.002828, "loss": 1.2969, "step": 275072 }, { "epoch": 20.64810506566604, "grad_norm": 0.6263419985771179, "learning_rate": 0.002828, "loss": 1.302, "step": 275136 }, { "epoch": 20.652908067542214, "grad_norm": 0.46888402104377747, "learning_rate": 0.002828, "loss": 1.2973, "step": 275200 }, { "epoch": 20.657711069418387, "grad_norm": 0.5644204616546631, "learning_rate": 0.002828, "loss": 1.3054, "step": 275264 }, { "epoch": 20.66251407129456, "grad_norm": 0.4956626892089844, "learning_rate": 0.002828, "loss": 1.3011, "step": 275328 }, { "epoch": 20.667317073170732, "grad_norm": 0.5327547192573547, "learning_rate": 0.002828, "loss": 1.3006, "step": 275392 }, { "epoch": 20.672120075046905, "grad_norm": 0.5446391105651855, "learning_rate": 0.002828, "loss": 1.3011, "step": 275456 }, { "epoch": 20.676923076923078, "grad_norm": 0.5319408178329468, "learning_rate": 0.002828, "loss": 1.3028, "step": 275520 }, { "epoch": 20.68172607879925, "grad_norm": 0.6537469625473022, "learning_rate": 0.002828, "loss": 1.2999, "step": 275584 }, { "epoch": 20.686529080675424, "grad_norm": 0.5841550827026367, "learning_rate": 0.002828, "loss": 1.3002, "step": 275648 }, { "epoch": 20.691332082551593, "grad_norm": 0.6798041462898254, "learning_rate": 0.002828, "loss": 1.3048, "step": 275712 }, { "epoch": 20.696135084427766, "grad_norm": 0.604843020439148, "learning_rate": 0.002828, "loss": 1.3019, "step": 275776 }, { "epoch": 20.70093808630394, "grad_norm": 0.5991361737251282, "learning_rate": 0.002828, "loss": 1.2993, "step": 275840 }, { "epoch": 20.705741088180112, "grad_norm": 0.5777614116668701, "learning_rate": 0.002828, "loss": 1.2961, "step": 275904 }, { "epoch": 20.710544090056285, "grad_norm": 0.5120457410812378, "learning_rate": 0.002828, "loss": 1.3075, "step": 275968 }, { "epoch": 20.715347091932458, "grad_norm": 0.5318966507911682, "learning_rate": 0.002828, "loss": 1.3067, "step": 276032 }, { "epoch": 20.72015009380863, "grad_norm": 0.5237390995025635, "learning_rate": 0.002828, "loss": 1.2978, "step": 276096 }, { "epoch": 20.724953095684803, "grad_norm": 0.4869547486305237, "learning_rate": 0.002828, "loss": 1.3004, "step": 276160 }, { "epoch": 20.729756097560976, "grad_norm": 0.51389080286026, "learning_rate": 0.002828, "loss": 1.3011, "step": 276224 }, { "epoch": 20.73455909943715, "grad_norm": 0.5911182761192322, "learning_rate": 0.002828, "loss": 1.2999, "step": 276288 }, { "epoch": 20.739362101313322, "grad_norm": 0.641406774520874, "learning_rate": 0.002828, "loss": 1.3015, "step": 276352 }, { "epoch": 20.744165103189495, "grad_norm": 0.5013548731803894, "learning_rate": 0.002828, "loss": 1.3015, "step": 276416 }, { "epoch": 20.748968105065664, "grad_norm": 0.6204653978347778, "learning_rate": 0.002828, "loss": 1.3002, "step": 276480 }, { "epoch": 20.753771106941837, "grad_norm": 0.49453988671302795, "learning_rate": 0.002828, "loss": 1.3045, "step": 276544 }, { "epoch": 20.75857410881801, "grad_norm": 0.5399415493011475, "learning_rate": 0.002828, "loss": 1.3039, "step": 276608 }, { "epoch": 20.763377110694183, "grad_norm": 0.5224660634994507, "learning_rate": 0.002828, "loss": 1.3077, "step": 276672 }, { "epoch": 20.768180112570356, "grad_norm": 0.5413986444473267, "learning_rate": 0.002828, "loss": 1.3068, "step": 276736 }, { "epoch": 20.77298311444653, "grad_norm": 0.6324653029441833, "learning_rate": 0.002828, "loss": 1.3022, "step": 276800 }, { "epoch": 20.7777861163227, "grad_norm": 0.6608572602272034, "learning_rate": 0.002828, "loss": 1.3042, "step": 276864 }, { "epoch": 20.782589118198874, "grad_norm": 0.5002397298812866, "learning_rate": 0.002828, "loss": 1.3065, "step": 276928 }, { "epoch": 20.787392120075047, "grad_norm": 0.5986420512199402, "learning_rate": 0.002828, "loss": 1.3046, "step": 276992 }, { "epoch": 20.79219512195122, "grad_norm": 0.5150978565216064, "learning_rate": 0.002828, "loss": 1.2971, "step": 277056 }, { "epoch": 20.796998123827393, "grad_norm": 0.7404240369796753, "learning_rate": 0.002828, "loss": 1.309, "step": 277120 }, { "epoch": 20.801801125703566, "grad_norm": 0.5593386888504028, "learning_rate": 0.002828, "loss": 1.3049, "step": 277184 }, { "epoch": 20.80660412757974, "grad_norm": 0.595770001411438, "learning_rate": 0.002828, "loss": 1.3039, "step": 277248 }, { "epoch": 20.811407129455908, "grad_norm": 0.7926905751228333, "learning_rate": 0.002828, "loss": 1.3033, "step": 277312 }, { "epoch": 20.81621013133208, "grad_norm": 0.5238022804260254, "learning_rate": 0.002828, "loss": 1.299, "step": 277376 }, { "epoch": 20.821013133208254, "grad_norm": 0.5705653429031372, "learning_rate": 0.002828, "loss": 1.3016, "step": 277440 }, { "epoch": 20.825816135084427, "grad_norm": 0.6585155725479126, "learning_rate": 0.002828, "loss": 1.3044, "step": 277504 }, { "epoch": 20.8306191369606, "grad_norm": 0.48894691467285156, "learning_rate": 0.002828, "loss": 1.3049, "step": 277568 }, { "epoch": 20.835422138836773, "grad_norm": 0.5562286972999573, "learning_rate": 0.002828, "loss": 1.2995, "step": 277632 }, { "epoch": 20.840225140712946, "grad_norm": 0.5397721529006958, "learning_rate": 0.002828, "loss": 1.3069, "step": 277696 }, { "epoch": 20.84502814258912, "grad_norm": 0.7238554954528809, "learning_rate": 0.002828, "loss": 1.3091, "step": 277760 }, { "epoch": 20.84983114446529, "grad_norm": 0.5862271785736084, "learning_rate": 0.002828, "loss": 1.3019, "step": 277824 }, { "epoch": 20.854634146341464, "grad_norm": 0.5831578969955444, "learning_rate": 0.002828, "loss": 1.3017, "step": 277888 }, { "epoch": 20.859437148217637, "grad_norm": 0.46079304814338684, "learning_rate": 0.002828, "loss": 1.3035, "step": 277952 }, { "epoch": 20.86424015009381, "grad_norm": 0.555037796497345, "learning_rate": 0.002828, "loss": 1.3041, "step": 278016 }, { "epoch": 20.869043151969983, "grad_norm": 0.5918899178504944, "learning_rate": 0.002828, "loss": 1.3068, "step": 278080 }, { "epoch": 20.873846153846152, "grad_norm": 0.4550643265247345, "learning_rate": 0.002828, "loss": 1.3038, "step": 278144 }, { "epoch": 20.878649155722325, "grad_norm": 0.7102483510971069, "learning_rate": 0.002828, "loss": 1.2991, "step": 278208 }, { "epoch": 20.883452157598498, "grad_norm": 0.543178141117096, "learning_rate": 0.002828, "loss": 1.3017, "step": 278272 }, { "epoch": 20.88825515947467, "grad_norm": 0.6087631583213806, "learning_rate": 0.002828, "loss": 1.3039, "step": 278336 }, { "epoch": 20.893058161350844, "grad_norm": 0.5347623229026794, "learning_rate": 0.002828, "loss": 1.3048, "step": 278400 }, { "epoch": 20.897861163227017, "grad_norm": 0.496459037065506, "learning_rate": 0.002828, "loss": 1.306, "step": 278464 }, { "epoch": 20.90266416510319, "grad_norm": 0.5459811687469482, "learning_rate": 0.002828, "loss": 1.3012, "step": 278528 }, { "epoch": 20.907467166979362, "grad_norm": 0.5910803079605103, "learning_rate": 0.002828, "loss": 1.2998, "step": 278592 }, { "epoch": 20.912270168855535, "grad_norm": 0.575896143913269, "learning_rate": 0.002828, "loss": 1.3012, "step": 278656 }, { "epoch": 20.917073170731708, "grad_norm": 0.5138533115386963, "learning_rate": 0.002828, "loss": 1.2963, "step": 278720 }, { "epoch": 20.92187617260788, "grad_norm": 0.6679477095603943, "learning_rate": 0.002828, "loss": 1.3056, "step": 278784 }, { "epoch": 20.926679174484054, "grad_norm": 0.5711171627044678, "learning_rate": 0.002828, "loss": 1.3042, "step": 278848 }, { "epoch": 20.931482176360227, "grad_norm": 0.5481835603713989, "learning_rate": 0.002828, "loss": 1.3011, "step": 278912 }, { "epoch": 20.936285178236396, "grad_norm": 0.6149061918258667, "learning_rate": 0.002828, "loss": 1.3, "step": 278976 }, { "epoch": 20.94108818011257, "grad_norm": 0.5665428042411804, "learning_rate": 0.002828, "loss": 1.3016, "step": 279040 }, { "epoch": 20.945891181988742, "grad_norm": 0.5922060608863831, "learning_rate": 0.002828, "loss": 1.3035, "step": 279104 }, { "epoch": 20.950694183864915, "grad_norm": 0.5712735056877136, "learning_rate": 0.002828, "loss": 1.3049, "step": 279168 }, { "epoch": 20.955497185741088, "grad_norm": 0.49438712000846863, "learning_rate": 0.002828, "loss": 1.2999, "step": 279232 }, { "epoch": 20.96030018761726, "grad_norm": 0.5500780344009399, "learning_rate": 0.002828, "loss": 1.3024, "step": 279296 }, { "epoch": 20.965103189493433, "grad_norm": 0.41028276085853577, "learning_rate": 0.002828, "loss": 1.3077, "step": 279360 }, { "epoch": 20.969906191369606, "grad_norm": 0.5960568785667419, "learning_rate": 0.002828, "loss": 1.3016, "step": 279424 }, { "epoch": 20.97470919324578, "grad_norm": 0.5779933929443359, "learning_rate": 0.002828, "loss": 1.3052, "step": 279488 }, { "epoch": 20.979512195121952, "grad_norm": 0.5713076591491699, "learning_rate": 0.002828, "loss": 1.3042, "step": 279552 }, { "epoch": 20.984315196998125, "grad_norm": 0.657752513885498, "learning_rate": 0.002828, "loss": 1.3039, "step": 279616 }, { "epoch": 20.989118198874298, "grad_norm": 0.6048377156257629, "learning_rate": 0.002828, "loss": 1.3052, "step": 279680 }, { "epoch": 20.99392120075047, "grad_norm": 0.5630027651786804, "learning_rate": 0.002828, "loss": 1.3045, "step": 279744 }, { "epoch": 20.99872420262664, "grad_norm": 0.5261644721031189, "learning_rate": 0.002828, "loss": 1.3037, "step": 279808 }, { "epoch": 21.003527204502813, "grad_norm": 0.7338108420372009, "learning_rate": 0.002828, "loss": 1.2755, "step": 279872 }, { "epoch": 21.008330206378986, "grad_norm": 0.5403333902359009, "learning_rate": 0.002828, "loss": 1.267, "step": 279936 }, { "epoch": 21.01313320825516, "grad_norm": 0.568750262260437, "learning_rate": 0.002828, "loss": 1.2625, "step": 280000 }, { "epoch": 21.01793621013133, "grad_norm": 0.5088416934013367, "learning_rate": 0.002828, "loss": 1.2696, "step": 280064 }, { "epoch": 21.022739212007505, "grad_norm": 0.5926600098609924, "learning_rate": 0.002828, "loss": 1.2638, "step": 280128 }, { "epoch": 21.027542213883677, "grad_norm": 0.5535401105880737, "learning_rate": 0.002828, "loss": 1.2676, "step": 280192 }, { "epoch": 21.03234521575985, "grad_norm": 0.5679246187210083, "learning_rate": 0.002828, "loss": 1.2622, "step": 280256 }, { "epoch": 21.037148217636023, "grad_norm": 0.6422357559204102, "learning_rate": 0.002828, "loss": 1.267, "step": 280320 }, { "epoch": 21.041951219512196, "grad_norm": 0.5129832625389099, "learning_rate": 0.002828, "loss": 1.268, "step": 280384 }, { "epoch": 21.04675422138837, "grad_norm": 0.6040861010551453, "learning_rate": 0.002828, "loss": 1.2665, "step": 280448 }, { "epoch": 21.051557223264542, "grad_norm": 0.6140467524528503, "learning_rate": 0.002828, "loss": 1.2644, "step": 280512 }, { "epoch": 21.056360225140715, "grad_norm": 0.5909779667854309, "learning_rate": 0.002828, "loss": 1.2658, "step": 280576 }, { "epoch": 21.061163227016884, "grad_norm": 0.5795265436172485, "learning_rate": 0.002828, "loss": 1.27, "step": 280640 }, { "epoch": 21.065966228893057, "grad_norm": 0.6152122616767883, "learning_rate": 0.002828, "loss": 1.2637, "step": 280704 }, { "epoch": 21.07076923076923, "grad_norm": 0.5055559277534485, "learning_rate": 0.002828, "loss": 1.2677, "step": 280768 }, { "epoch": 21.075572232645403, "grad_norm": 0.5432459115982056, "learning_rate": 0.002828, "loss": 1.2681, "step": 280832 }, { "epoch": 21.080375234521576, "grad_norm": 0.6607730388641357, "learning_rate": 0.002828, "loss": 1.2727, "step": 280896 }, { "epoch": 21.08517823639775, "grad_norm": 0.5829780697822571, "learning_rate": 0.002828, "loss": 1.2725, "step": 280960 }, { "epoch": 21.08998123827392, "grad_norm": 0.6266777515411377, "learning_rate": 0.002828, "loss": 1.2665, "step": 281024 }, { "epoch": 21.094784240150094, "grad_norm": 0.5578786730766296, "learning_rate": 0.002828, "loss": 1.2754, "step": 281088 }, { "epoch": 21.099587242026267, "grad_norm": 0.5670081973075867, "learning_rate": 0.002828, "loss": 1.2744, "step": 281152 }, { "epoch": 21.10439024390244, "grad_norm": 0.717337429523468, "learning_rate": 0.002828, "loss": 1.2719, "step": 281216 }, { "epoch": 21.109193245778613, "grad_norm": 0.7886514067649841, "learning_rate": 0.002828, "loss": 1.272, "step": 281280 }, { "epoch": 21.113996247654786, "grad_norm": 0.6627486944198608, "learning_rate": 0.002828, "loss": 1.2662, "step": 281344 }, { "epoch": 21.118799249530955, "grad_norm": 0.5494076609611511, "learning_rate": 0.002828, "loss": 1.2688, "step": 281408 }, { "epoch": 21.123602251407128, "grad_norm": 0.48781818151474, "learning_rate": 0.002828, "loss": 1.2748, "step": 281472 }, { "epoch": 21.1284052532833, "grad_norm": 0.6375535726547241, "learning_rate": 0.002828, "loss": 1.265, "step": 281536 }, { "epoch": 21.133208255159474, "grad_norm": 0.5140436887741089, "learning_rate": 0.002828, "loss": 1.2674, "step": 281600 }, { "epoch": 21.138011257035647, "grad_norm": 0.7258280515670776, "learning_rate": 0.002828, "loss": 1.2759, "step": 281664 }, { "epoch": 21.14281425891182, "grad_norm": 0.567965567111969, "learning_rate": 0.002828, "loss": 1.2768, "step": 281728 }, { "epoch": 21.147617260787992, "grad_norm": 0.5151071548461914, "learning_rate": 0.002828, "loss": 1.2714, "step": 281792 }, { "epoch": 21.152420262664165, "grad_norm": 0.5506834983825684, "learning_rate": 0.002828, "loss": 1.273, "step": 281856 }, { "epoch": 21.157223264540338, "grad_norm": 0.5140776038169861, "learning_rate": 0.002828, "loss": 1.2743, "step": 281920 }, { "epoch": 21.16202626641651, "grad_norm": 0.5231831669807434, "learning_rate": 0.002828, "loss": 1.2739, "step": 281984 }, { "epoch": 21.166829268292684, "grad_norm": 0.5400099754333496, "learning_rate": 0.002828, "loss": 1.2755, "step": 282048 }, { "epoch": 21.171632270168857, "grad_norm": 0.6175333261489868, "learning_rate": 0.002828, "loss": 1.2722, "step": 282112 }, { "epoch": 21.17643527204503, "grad_norm": 0.4814399182796478, "learning_rate": 0.002828, "loss": 1.2777, "step": 282176 }, { "epoch": 21.1812382739212, "grad_norm": 0.5004304051399231, "learning_rate": 0.002828, "loss": 1.2718, "step": 282240 }, { "epoch": 21.186041275797372, "grad_norm": 0.6475741267204285, "learning_rate": 0.002828, "loss": 1.2723, "step": 282304 }, { "epoch": 21.190844277673545, "grad_norm": 0.5444566607475281, "learning_rate": 0.002828, "loss": 1.2799, "step": 282368 }, { "epoch": 21.195647279549718, "grad_norm": 0.6097396016120911, "learning_rate": 0.002828, "loss": 1.2755, "step": 282432 }, { "epoch": 21.20045028142589, "grad_norm": 0.6205595135688782, "learning_rate": 0.002828, "loss": 1.2723, "step": 282496 }, { "epoch": 21.205253283302063, "grad_norm": 0.5144677758216858, "learning_rate": 0.002828, "loss": 1.276, "step": 282560 }, { "epoch": 21.210056285178236, "grad_norm": 0.5892837047576904, "learning_rate": 0.002828, "loss": 1.2726, "step": 282624 }, { "epoch": 21.21485928705441, "grad_norm": 0.576680064201355, "learning_rate": 0.002828, "loss": 1.274, "step": 282688 }, { "epoch": 21.219662288930582, "grad_norm": 0.503209114074707, "learning_rate": 0.002828, "loss": 1.2746, "step": 282752 }, { "epoch": 21.224465290806755, "grad_norm": 0.5236272215843201, "learning_rate": 0.002828, "loss": 1.2779, "step": 282816 }, { "epoch": 21.229268292682928, "grad_norm": 0.6208475828170776, "learning_rate": 0.002828, "loss": 1.2789, "step": 282880 }, { "epoch": 21.2340712945591, "grad_norm": 0.5424129962921143, "learning_rate": 0.002828, "loss": 1.2768, "step": 282944 }, { "epoch": 21.238874296435274, "grad_norm": 0.6895538568496704, "learning_rate": 0.002828, "loss": 1.2798, "step": 283008 }, { "epoch": 21.243677298311443, "grad_norm": 0.5749019384384155, "learning_rate": 0.002828, "loss": 1.2832, "step": 283072 }, { "epoch": 21.248480300187616, "grad_norm": 0.6259738206863403, "learning_rate": 0.002828, "loss": 1.272, "step": 283136 }, { "epoch": 21.25328330206379, "grad_norm": 0.4793575406074524, "learning_rate": 0.002828, "loss": 1.2798, "step": 283200 }, { "epoch": 21.25808630393996, "grad_norm": 0.5138023495674133, "learning_rate": 0.002828, "loss": 1.2779, "step": 283264 }, { "epoch": 21.262889305816135, "grad_norm": 0.49802491068840027, "learning_rate": 0.002828, "loss": 1.2779, "step": 283328 }, { "epoch": 21.267692307692307, "grad_norm": 0.6311704516410828, "learning_rate": 0.002828, "loss": 1.2791, "step": 283392 }, { "epoch": 21.27249530956848, "grad_norm": 0.5918208956718445, "learning_rate": 0.002828, "loss": 1.2738, "step": 283456 }, { "epoch": 21.277298311444653, "grad_norm": 0.5432458519935608, "learning_rate": 0.002828, "loss": 1.2774, "step": 283520 }, { "epoch": 21.282101313320826, "grad_norm": 0.5453625917434692, "learning_rate": 0.002828, "loss": 1.2743, "step": 283584 }, { "epoch": 21.286904315197, "grad_norm": 0.577239990234375, "learning_rate": 0.002828, "loss": 1.2859, "step": 283648 }, { "epoch": 21.291707317073172, "grad_norm": 0.5318759679794312, "learning_rate": 0.002828, "loss": 1.2773, "step": 283712 }, { "epoch": 21.296510318949345, "grad_norm": 0.5675327181816101, "learning_rate": 0.002828, "loss": 1.2796, "step": 283776 }, { "epoch": 21.301313320825518, "grad_norm": 0.5135879516601562, "learning_rate": 0.002828, "loss": 1.2753, "step": 283840 }, { "epoch": 21.306116322701687, "grad_norm": 0.6239807605743408, "learning_rate": 0.002828, "loss": 1.2774, "step": 283904 }, { "epoch": 21.31091932457786, "grad_norm": 0.5842599272727966, "learning_rate": 0.002828, "loss": 1.2775, "step": 283968 }, { "epoch": 21.315722326454033, "grad_norm": 0.6358805894851685, "learning_rate": 0.002828, "loss": 1.2707, "step": 284032 }, { "epoch": 21.320525328330206, "grad_norm": 0.6516125202178955, "learning_rate": 0.002828, "loss": 1.2791, "step": 284096 }, { "epoch": 21.32532833020638, "grad_norm": 0.5607619285583496, "learning_rate": 0.002828, "loss": 1.2773, "step": 284160 }, { "epoch": 21.33013133208255, "grad_norm": 0.596806526184082, "learning_rate": 0.002828, "loss": 1.2762, "step": 284224 }, { "epoch": 21.334934333958724, "grad_norm": 0.6293962001800537, "learning_rate": 0.002828, "loss": 1.277, "step": 284288 }, { "epoch": 21.339737335834897, "grad_norm": 0.5905919075012207, "learning_rate": 0.002828, "loss": 1.2821, "step": 284352 }, { "epoch": 21.34454033771107, "grad_norm": 0.6652618646621704, "learning_rate": 0.002828, "loss": 1.2787, "step": 284416 }, { "epoch": 21.349343339587243, "grad_norm": 0.648979663848877, "learning_rate": 0.002828, "loss": 1.2781, "step": 284480 }, { "epoch": 21.354146341463416, "grad_norm": 0.5890728831291199, "learning_rate": 0.002828, "loss": 1.2785, "step": 284544 }, { "epoch": 21.35894934333959, "grad_norm": 0.7403590083122253, "learning_rate": 0.002828, "loss": 1.285, "step": 284608 }, { "epoch": 21.363752345215758, "grad_norm": 0.5386162996292114, "learning_rate": 0.002828, "loss": 1.2775, "step": 284672 }, { "epoch": 21.36855534709193, "grad_norm": 0.5761041641235352, "learning_rate": 0.002828, "loss": 1.2713, "step": 284736 }, { "epoch": 21.373358348968104, "grad_norm": 0.6262449622154236, "learning_rate": 0.002828, "loss": 1.2774, "step": 284800 }, { "epoch": 21.378161350844277, "grad_norm": 0.5526410937309265, "learning_rate": 0.002828, "loss": 1.2798, "step": 284864 }, { "epoch": 21.38296435272045, "grad_norm": 0.4797821044921875, "learning_rate": 0.002828, "loss": 1.2822, "step": 284928 }, { "epoch": 21.387767354596622, "grad_norm": 0.5310882925987244, "learning_rate": 0.002828, "loss": 1.2833, "step": 284992 }, { "epoch": 21.392570356472795, "grad_norm": 0.5559850931167603, "learning_rate": 0.002828, "loss": 1.2823, "step": 285056 }, { "epoch": 21.39737335834897, "grad_norm": 0.6088487505912781, "learning_rate": 0.002828, "loss": 1.2767, "step": 285120 }, { "epoch": 21.40217636022514, "grad_norm": 0.6352992057800293, "learning_rate": 0.002828, "loss": 1.2831, "step": 285184 }, { "epoch": 21.406979362101314, "grad_norm": 0.5523979067802429, "learning_rate": 0.002828, "loss": 1.286, "step": 285248 }, { "epoch": 21.411782363977487, "grad_norm": 0.5660821795463562, "learning_rate": 0.002828, "loss": 1.2825, "step": 285312 }, { "epoch": 21.41658536585366, "grad_norm": 0.5910194516181946, "learning_rate": 0.002828, "loss": 1.2803, "step": 285376 }, { "epoch": 21.421388367729833, "grad_norm": 0.6662206649780273, "learning_rate": 0.002828, "loss": 1.2854, "step": 285440 }, { "epoch": 21.426191369606002, "grad_norm": 0.6278867125511169, "learning_rate": 0.002828, "loss": 1.2768, "step": 285504 }, { "epoch": 21.430994371482175, "grad_norm": 0.4952567219734192, "learning_rate": 0.002828, "loss": 1.2753, "step": 285568 }, { "epoch": 21.435797373358348, "grad_norm": 0.5748569369316101, "learning_rate": 0.002828, "loss": 1.2827, "step": 285632 }, { "epoch": 21.44060037523452, "grad_norm": 0.6461130976676941, "learning_rate": 0.002828, "loss": 1.2861, "step": 285696 }, { "epoch": 21.445403377110694, "grad_norm": 0.5584768652915955, "learning_rate": 0.002828, "loss": 1.2838, "step": 285760 }, { "epoch": 21.450206378986866, "grad_norm": 0.5767645239830017, "learning_rate": 0.002828, "loss": 1.2748, "step": 285824 }, { "epoch": 21.45500938086304, "grad_norm": 0.5557846426963806, "learning_rate": 0.002828, "loss": 1.2789, "step": 285888 }, { "epoch": 21.459812382739212, "grad_norm": 0.6409792304039001, "learning_rate": 0.002828, "loss": 1.2827, "step": 285952 }, { "epoch": 21.464615384615385, "grad_norm": 0.5421139597892761, "learning_rate": 0.002828, "loss": 1.2761, "step": 286016 }, { "epoch": 21.469418386491558, "grad_norm": 0.5195848941802979, "learning_rate": 0.002828, "loss": 1.2768, "step": 286080 }, { "epoch": 21.47422138836773, "grad_norm": 0.7797949314117432, "learning_rate": 0.002828, "loss": 1.2837, "step": 286144 }, { "epoch": 21.479024390243904, "grad_norm": 0.544447124004364, "learning_rate": 0.002828, "loss": 1.2879, "step": 286208 }, { "epoch": 21.483827392120077, "grad_norm": 0.6963238716125488, "learning_rate": 0.002828, "loss": 1.2911, "step": 286272 }, { "epoch": 21.488630393996246, "grad_norm": 0.6099432110786438, "learning_rate": 0.002828, "loss": 1.2825, "step": 286336 }, { "epoch": 21.49343339587242, "grad_norm": 0.5661420822143555, "learning_rate": 0.002828, "loss": 1.2758, "step": 286400 }, { "epoch": 21.49823639774859, "grad_norm": 0.5302500128746033, "learning_rate": 0.002828, "loss": 1.2813, "step": 286464 }, { "epoch": 21.503039399624765, "grad_norm": 0.5726025104522705, "learning_rate": 0.002828, "loss": 1.2836, "step": 286528 }, { "epoch": 21.507842401500938, "grad_norm": 0.6002224683761597, "learning_rate": 0.002828, "loss": 1.2858, "step": 286592 }, { "epoch": 21.51264540337711, "grad_norm": 0.5370309948921204, "learning_rate": 0.002828, "loss": 1.282, "step": 286656 }, { "epoch": 21.517448405253283, "grad_norm": 0.5378057360649109, "learning_rate": 0.002828, "loss": 1.2857, "step": 286720 }, { "epoch": 21.522251407129456, "grad_norm": 0.5748156309127808, "learning_rate": 0.002828, "loss": 1.2805, "step": 286784 }, { "epoch": 21.52705440900563, "grad_norm": 0.6204039454460144, "learning_rate": 0.002828, "loss": 1.2848, "step": 286848 }, { "epoch": 21.531857410881802, "grad_norm": 0.5553511381149292, "learning_rate": 0.002828, "loss": 1.285, "step": 286912 }, { "epoch": 21.536660412757975, "grad_norm": 0.5893772840499878, "learning_rate": 0.002828, "loss": 1.287, "step": 286976 }, { "epoch": 21.541463414634148, "grad_norm": 0.6876904964447021, "learning_rate": 0.002828, "loss": 1.2822, "step": 287040 }, { "epoch": 21.54626641651032, "grad_norm": 0.5917910933494568, "learning_rate": 0.002828, "loss": 1.2772, "step": 287104 }, { "epoch": 21.55106941838649, "grad_norm": 0.6226091980934143, "learning_rate": 0.002828, "loss": 1.2824, "step": 287168 }, { "epoch": 21.555872420262663, "grad_norm": 0.5301666259765625, "learning_rate": 0.002828, "loss": 1.2803, "step": 287232 }, { "epoch": 21.560675422138836, "grad_norm": 0.6062954664230347, "learning_rate": 0.002828, "loss": 1.2887, "step": 287296 }, { "epoch": 21.56547842401501, "grad_norm": 0.514739990234375, "learning_rate": 0.002828, "loss": 1.2813, "step": 287360 }, { "epoch": 21.57028142589118, "grad_norm": 0.5676846504211426, "learning_rate": 0.002828, "loss": 1.2752, "step": 287424 }, { "epoch": 21.575084427767354, "grad_norm": 0.5088371634483337, "learning_rate": 0.002828, "loss": 1.2806, "step": 287488 }, { "epoch": 21.579887429643527, "grad_norm": 0.5662075281143188, "learning_rate": 0.002828, "loss": 1.2808, "step": 287552 }, { "epoch": 21.5846904315197, "grad_norm": 0.5890974402427673, "learning_rate": 0.002828, "loss": 1.2865, "step": 287616 }, { "epoch": 21.589493433395873, "grad_norm": 0.7173524498939514, "learning_rate": 0.002828, "loss": 1.2864, "step": 287680 }, { "epoch": 21.594296435272046, "grad_norm": 0.5397960543632507, "learning_rate": 0.002828, "loss": 1.2835, "step": 287744 }, { "epoch": 21.59909943714822, "grad_norm": 0.6206925511360168, "learning_rate": 0.002828, "loss": 1.2801, "step": 287808 }, { "epoch": 21.60390243902439, "grad_norm": 0.5514209270477295, "learning_rate": 0.002828, "loss": 1.2858, "step": 287872 }, { "epoch": 21.608705440900565, "grad_norm": 0.4978121817111969, "learning_rate": 0.002828, "loss": 1.281, "step": 287936 }, { "epoch": 21.613508442776734, "grad_norm": 0.555472195148468, "learning_rate": 0.002828, "loss": 1.2894, "step": 288000 }, { "epoch": 21.618311444652907, "grad_norm": 0.6257559061050415, "learning_rate": 0.002828, "loss": 1.2929, "step": 288064 }, { "epoch": 21.62311444652908, "grad_norm": 0.5221234560012817, "learning_rate": 0.002828, "loss": 1.2959, "step": 288128 }, { "epoch": 21.627917448405253, "grad_norm": 0.6040545105934143, "learning_rate": 0.002828, "loss": 1.2855, "step": 288192 }, { "epoch": 21.632720450281425, "grad_norm": 0.6095830202102661, "learning_rate": 0.002828, "loss": 1.2895, "step": 288256 }, { "epoch": 21.6375234521576, "grad_norm": 0.6214110851287842, "learning_rate": 0.002828, "loss": 1.2862, "step": 288320 }, { "epoch": 21.64232645403377, "grad_norm": 0.6199238300323486, "learning_rate": 0.002828, "loss": 1.2877, "step": 288384 }, { "epoch": 21.647129455909944, "grad_norm": 0.637213945388794, "learning_rate": 0.002828, "loss": 1.2838, "step": 288448 }, { "epoch": 21.651932457786117, "grad_norm": 0.6265941262245178, "learning_rate": 0.002828, "loss": 1.2866, "step": 288512 }, { "epoch": 21.65673545966229, "grad_norm": 0.6128517985343933, "learning_rate": 0.002828, "loss": 1.2806, "step": 288576 }, { "epoch": 21.661538461538463, "grad_norm": 0.6448500752449036, "learning_rate": 0.002828, "loss": 1.2811, "step": 288640 }, { "epoch": 21.666341463414636, "grad_norm": 0.5811263918876648, "learning_rate": 0.002828, "loss": 1.2828, "step": 288704 }, { "epoch": 21.67114446529081, "grad_norm": 0.607732892036438, "learning_rate": 0.002828, "loss": 1.2848, "step": 288768 }, { "epoch": 21.675947467166978, "grad_norm": 0.50178462266922, "learning_rate": 0.002828, "loss": 1.2832, "step": 288832 }, { "epoch": 21.68075046904315, "grad_norm": 0.4868558347225189, "learning_rate": 0.002828, "loss": 1.28, "step": 288896 }, { "epoch": 21.685553470919324, "grad_norm": 0.5406298041343689, "learning_rate": 0.002828, "loss": 1.2916, "step": 288960 }, { "epoch": 21.690356472795497, "grad_norm": 0.5491721034049988, "learning_rate": 0.002828, "loss": 1.2882, "step": 289024 }, { "epoch": 21.69515947467167, "grad_norm": 0.5529964566230774, "learning_rate": 0.002828, "loss": 1.2872, "step": 289088 }, { "epoch": 21.699962476547842, "grad_norm": 0.5563608407974243, "learning_rate": 0.002828, "loss": 1.2884, "step": 289152 }, { "epoch": 21.704765478424015, "grad_norm": 0.5522491931915283, "learning_rate": 0.002828, "loss": 1.279, "step": 289216 }, { "epoch": 21.709568480300188, "grad_norm": 0.5772581696510315, "learning_rate": 0.002828, "loss": 1.2892, "step": 289280 }, { "epoch": 21.71437148217636, "grad_norm": 0.5620320439338684, "learning_rate": 0.002828, "loss": 1.2852, "step": 289344 }, { "epoch": 21.719174484052534, "grad_norm": 0.5383132100105286, "learning_rate": 0.002828, "loss": 1.2871, "step": 289408 }, { "epoch": 21.723977485928707, "grad_norm": 0.6416934132575989, "learning_rate": 0.002828, "loss": 1.2857, "step": 289472 }, { "epoch": 21.72878048780488, "grad_norm": 0.6494410037994385, "learning_rate": 0.002828, "loss": 1.2897, "step": 289536 }, { "epoch": 21.733583489681052, "grad_norm": 0.5881087183952332, "learning_rate": 0.002828, "loss": 1.2848, "step": 289600 }, { "epoch": 21.738386491557222, "grad_norm": 0.4864734411239624, "learning_rate": 0.002828, "loss": 1.2854, "step": 289664 }, { "epoch": 21.743189493433395, "grad_norm": 0.4938899874687195, "learning_rate": 0.002828, "loss": 1.2883, "step": 289728 }, { "epoch": 21.747992495309568, "grad_norm": 0.5875544548034668, "learning_rate": 0.002828, "loss": 1.2829, "step": 289792 }, { "epoch": 21.75279549718574, "grad_norm": 0.48364803194999695, "learning_rate": 0.002828, "loss": 1.2861, "step": 289856 }, { "epoch": 21.757598499061913, "grad_norm": 0.5670167207717896, "learning_rate": 0.002828, "loss": 1.2867, "step": 289920 }, { "epoch": 21.762401500938086, "grad_norm": 0.5402014851570129, "learning_rate": 0.002828, "loss": 1.2809, "step": 289984 }, { "epoch": 21.76720450281426, "grad_norm": 0.6188728213310242, "learning_rate": 0.002828, "loss": 1.2861, "step": 290048 }, { "epoch": 21.772007504690432, "grad_norm": 0.4771275222301483, "learning_rate": 0.002828, "loss": 1.2866, "step": 290112 }, { "epoch": 21.776810506566605, "grad_norm": 0.5720518827438354, "learning_rate": 0.002828, "loss": 1.2884, "step": 290176 }, { "epoch": 21.781613508442778, "grad_norm": 0.6083287596702576, "learning_rate": 0.002828, "loss": 1.2871, "step": 290240 }, { "epoch": 21.78641651031895, "grad_norm": 0.7133959531784058, "learning_rate": 0.002828, "loss": 1.2878, "step": 290304 }, { "epoch": 21.791219512195124, "grad_norm": 0.6493473052978516, "learning_rate": 0.002828, "loss": 1.2842, "step": 290368 }, { "epoch": 21.796022514071293, "grad_norm": 0.6049429774284363, "learning_rate": 0.002828, "loss": 1.2845, "step": 290432 }, { "epoch": 21.800825515947466, "grad_norm": 0.559673011302948, "learning_rate": 0.002828, "loss": 1.2919, "step": 290496 }, { "epoch": 21.80562851782364, "grad_norm": 0.5735464692115784, "learning_rate": 0.002828, "loss": 1.2887, "step": 290560 }, { "epoch": 21.81043151969981, "grad_norm": 0.633755087852478, "learning_rate": 0.002828, "loss": 1.2823, "step": 290624 }, { "epoch": 21.815234521575984, "grad_norm": 0.5874369740486145, "learning_rate": 0.002828, "loss": 1.2963, "step": 290688 }, { "epoch": 21.820037523452157, "grad_norm": 0.890895426273346, "learning_rate": 0.002828, "loss": 1.2936, "step": 290752 }, { "epoch": 21.82484052532833, "grad_norm": 0.7190248370170593, "learning_rate": 0.002828, "loss": 1.2889, "step": 290816 }, { "epoch": 21.829643527204503, "grad_norm": 0.5849973559379578, "learning_rate": 0.002828, "loss": 1.2882, "step": 290880 }, { "epoch": 21.834446529080676, "grad_norm": 0.5190068483352661, "learning_rate": 0.002828, "loss": 1.2789, "step": 290944 }, { "epoch": 21.83924953095685, "grad_norm": 0.5481669306755066, "learning_rate": 0.002828, "loss": 1.2915, "step": 291008 }, { "epoch": 21.84405253283302, "grad_norm": 0.5439419746398926, "learning_rate": 0.002828, "loss": 1.2856, "step": 291072 }, { "epoch": 21.848855534709195, "grad_norm": 0.6238653063774109, "learning_rate": 0.002828, "loss": 1.2911, "step": 291136 }, { "epoch": 21.853658536585368, "grad_norm": 0.4976770877838135, "learning_rate": 0.002828, "loss": 1.2947, "step": 291200 }, { "epoch": 21.858461538461537, "grad_norm": 0.578702449798584, "learning_rate": 0.002828, "loss": 1.2854, "step": 291264 }, { "epoch": 21.86326454033771, "grad_norm": 0.6257563829421997, "learning_rate": 0.002828, "loss": 1.2936, "step": 291328 }, { "epoch": 21.868067542213883, "grad_norm": 0.5133591294288635, "learning_rate": 0.002828, "loss": 1.2864, "step": 291392 }, { "epoch": 21.872870544090055, "grad_norm": 0.500873327255249, "learning_rate": 0.002828, "loss": 1.2851, "step": 291456 }, { "epoch": 21.87767354596623, "grad_norm": 0.7154791951179504, "learning_rate": 0.002828, "loss": 1.2836, "step": 291520 }, { "epoch": 21.8824765478424, "grad_norm": 0.5378066897392273, "learning_rate": 0.002828, "loss": 1.283, "step": 291584 }, { "epoch": 21.887279549718574, "grad_norm": 0.5295055508613586, "learning_rate": 0.002828, "loss": 1.2891, "step": 291648 }, { "epoch": 21.892082551594747, "grad_norm": 0.49576306343078613, "learning_rate": 0.002828, "loss": 1.2863, "step": 291712 }, { "epoch": 21.89688555347092, "grad_norm": 0.5278056859970093, "learning_rate": 0.002828, "loss": 1.29, "step": 291776 }, { "epoch": 21.901688555347093, "grad_norm": 0.639045000076294, "learning_rate": 0.002828, "loss": 1.287, "step": 291840 }, { "epoch": 21.906491557223266, "grad_norm": 0.5763165950775146, "learning_rate": 0.002828, "loss": 1.2885, "step": 291904 }, { "epoch": 21.91129455909944, "grad_norm": 0.7740374207496643, "learning_rate": 0.002828, "loss": 1.2925, "step": 291968 }, { "epoch": 21.91609756097561, "grad_norm": 0.4997633993625641, "learning_rate": 0.002828, "loss": 1.2869, "step": 292032 }, { "epoch": 21.92090056285178, "grad_norm": 0.5590546131134033, "learning_rate": 0.002828, "loss": 1.2802, "step": 292096 }, { "epoch": 21.925703564727954, "grad_norm": 0.4461499750614166, "learning_rate": 0.002828, "loss": 1.2903, "step": 292160 }, { "epoch": 21.930506566604127, "grad_norm": 0.550574541091919, "learning_rate": 0.002828, "loss": 1.2864, "step": 292224 }, { "epoch": 21.9353095684803, "grad_norm": 0.6598172783851624, "learning_rate": 0.002828, "loss": 1.2873, "step": 292288 }, { "epoch": 21.940112570356472, "grad_norm": 0.6326315999031067, "learning_rate": 0.002828, "loss": 1.2884, "step": 292352 }, { "epoch": 21.944915572232645, "grad_norm": 0.5537628531455994, "learning_rate": 0.002828, "loss": 1.2879, "step": 292416 }, { "epoch": 21.949718574108818, "grad_norm": 0.4792967736721039, "learning_rate": 0.002828, "loss": 1.2877, "step": 292480 }, { "epoch": 21.95452157598499, "grad_norm": 0.529313325881958, "learning_rate": 0.002828, "loss": 1.2802, "step": 292544 }, { "epoch": 21.959324577861164, "grad_norm": 0.48615750670433044, "learning_rate": 0.002828, "loss": 1.2925, "step": 292608 }, { "epoch": 21.964127579737337, "grad_norm": 0.5281648635864258, "learning_rate": 0.002828, "loss": 1.289, "step": 292672 }, { "epoch": 21.96893058161351, "grad_norm": 0.5400027632713318, "learning_rate": 0.002828, "loss": 1.2874, "step": 292736 }, { "epoch": 21.973733583489683, "grad_norm": 0.5279366374015808, "learning_rate": 0.002828, "loss": 1.2917, "step": 292800 }, { "epoch": 21.978536585365852, "grad_norm": 0.5460038185119629, "learning_rate": 0.002828, "loss": 1.2986, "step": 292864 }, { "epoch": 21.983339587242025, "grad_norm": 0.6252233982086182, "learning_rate": 0.002828, "loss": 1.2908, "step": 292928 }, { "epoch": 21.988142589118198, "grad_norm": 0.6752356290817261, "learning_rate": 0.002828, "loss": 1.2835, "step": 292992 }, { "epoch": 21.99294559099437, "grad_norm": 0.6158551573753357, "learning_rate": 0.002828, "loss": 1.2886, "step": 293056 }, { "epoch": 21.997748592870543, "grad_norm": 0.6006995439529419, "learning_rate": 0.002828, "loss": 1.2917, "step": 293120 }, { "epoch": 22.002551594746716, "grad_norm": 0.5210825204849243, "learning_rate": 0.002828, "loss": 1.2682, "step": 293184 }, { "epoch": 22.00735459662289, "grad_norm": 0.655004620552063, "learning_rate": 0.002828, "loss": 1.2534, "step": 293248 }, { "epoch": 22.012157598499062, "grad_norm": 0.46713322401046753, "learning_rate": 0.002828, "loss": 1.2493, "step": 293312 }, { "epoch": 22.016960600375235, "grad_norm": 0.6327230334281921, "learning_rate": 0.002828, "loss": 1.2514, "step": 293376 }, { "epoch": 22.021763602251408, "grad_norm": 0.5137706995010376, "learning_rate": 0.002828, "loss": 1.2518, "step": 293440 }, { "epoch": 22.02656660412758, "grad_norm": 0.5789818167686462, "learning_rate": 0.002828, "loss": 1.2493, "step": 293504 }, { "epoch": 22.031369606003754, "grad_norm": 0.5683451294898987, "learning_rate": 0.002828, "loss": 1.2462, "step": 293568 }, { "epoch": 22.036172607879926, "grad_norm": 0.6083554029464722, "learning_rate": 0.002828, "loss": 1.2404, "step": 293632 }, { "epoch": 22.040975609756096, "grad_norm": 0.5888787508010864, "learning_rate": 0.002828, "loss": 1.2517, "step": 293696 }, { "epoch": 22.04577861163227, "grad_norm": 0.6548123955726624, "learning_rate": 0.002828, "loss": 1.2501, "step": 293760 }, { "epoch": 22.05058161350844, "grad_norm": 0.6625087857246399, "learning_rate": 0.002828, "loss": 1.2545, "step": 293824 }, { "epoch": 22.055384615384614, "grad_norm": 0.6816575527191162, "learning_rate": 0.002828, "loss": 1.2508, "step": 293888 }, { "epoch": 22.060187617260787, "grad_norm": 0.6531043648719788, "learning_rate": 0.002828, "loss": 1.2507, "step": 293952 }, { "epoch": 22.06499061913696, "grad_norm": 0.5314988493919373, "learning_rate": 0.002828, "loss": 1.2502, "step": 294016 }, { "epoch": 22.069793621013133, "grad_norm": 0.5735332369804382, "learning_rate": 0.002828, "loss": 1.257, "step": 294080 }, { "epoch": 22.074596622889306, "grad_norm": 0.6446841955184937, "learning_rate": 0.002828, "loss": 1.2588, "step": 294144 }, { "epoch": 22.07939962476548, "grad_norm": 0.5817360281944275, "learning_rate": 0.002828, "loss": 1.255, "step": 294208 }, { "epoch": 22.084202626641652, "grad_norm": 0.5101385712623596, "learning_rate": 0.002828, "loss": 1.2557, "step": 294272 }, { "epoch": 22.089005628517825, "grad_norm": 0.5546039342880249, "learning_rate": 0.002828, "loss": 1.254, "step": 294336 }, { "epoch": 22.093808630393998, "grad_norm": 0.5416100025177002, "learning_rate": 0.002828, "loss": 1.256, "step": 294400 }, { "epoch": 22.09861163227017, "grad_norm": 0.5552825331687927, "learning_rate": 0.002828, "loss": 1.2531, "step": 294464 }, { "epoch": 22.10341463414634, "grad_norm": 0.5232527256011963, "learning_rate": 0.002828, "loss": 1.2585, "step": 294528 }, { "epoch": 22.108217636022513, "grad_norm": 0.5346928834915161, "learning_rate": 0.002828, "loss": 1.2496, "step": 294592 }, { "epoch": 22.113020637898686, "grad_norm": 0.5848618149757385, "learning_rate": 0.002828, "loss": 1.2561, "step": 294656 }, { "epoch": 22.11782363977486, "grad_norm": 0.5828258991241455, "learning_rate": 0.002828, "loss": 1.2582, "step": 294720 }, { "epoch": 22.12262664165103, "grad_norm": 0.5497180223464966, "learning_rate": 0.002828, "loss": 1.2537, "step": 294784 }, { "epoch": 22.127429643527204, "grad_norm": 0.5274826884269714, "learning_rate": 0.002828, "loss": 1.2553, "step": 294848 }, { "epoch": 22.132232645403377, "grad_norm": 0.5913788676261902, "learning_rate": 0.002828, "loss": 1.2567, "step": 294912 }, { "epoch": 22.13703564727955, "grad_norm": 0.5481519103050232, "learning_rate": 0.002828, "loss": 1.2555, "step": 294976 }, { "epoch": 22.141838649155723, "grad_norm": 0.625358521938324, "learning_rate": 0.002828, "loss": 1.2606, "step": 295040 }, { "epoch": 22.146641651031896, "grad_norm": 0.5830749869346619, "learning_rate": 0.002828, "loss": 1.2586, "step": 295104 }, { "epoch": 22.15144465290807, "grad_norm": 0.5845195651054382, "learning_rate": 0.002828, "loss": 1.2582, "step": 295168 }, { "epoch": 22.15624765478424, "grad_norm": 0.5070827007293701, "learning_rate": 0.002828, "loss": 1.2565, "step": 295232 }, { "epoch": 22.161050656660414, "grad_norm": 0.5942825675010681, "learning_rate": 0.002828, "loss": 1.2552, "step": 295296 }, { "epoch": 22.165853658536584, "grad_norm": 0.4849054515361786, "learning_rate": 0.002828, "loss": 1.2574, "step": 295360 }, { "epoch": 22.170656660412757, "grad_norm": 0.630711019039154, "learning_rate": 0.002828, "loss": 1.2635, "step": 295424 }, { "epoch": 22.17545966228893, "grad_norm": 0.6968967914581299, "learning_rate": 0.002828, "loss": 1.2608, "step": 295488 }, { "epoch": 22.180262664165102, "grad_norm": 0.5041672587394714, "learning_rate": 0.002828, "loss": 1.2606, "step": 295552 }, { "epoch": 22.185065666041275, "grad_norm": 0.49958130717277527, "learning_rate": 0.002828, "loss": 1.2538, "step": 295616 }, { "epoch": 22.189868667917448, "grad_norm": 0.6439744234085083, "learning_rate": 0.002828, "loss": 1.262, "step": 295680 }, { "epoch": 22.19467166979362, "grad_norm": 0.6664990186691284, "learning_rate": 0.002828, "loss": 1.2547, "step": 295744 }, { "epoch": 22.199474671669794, "grad_norm": 0.6299195885658264, "learning_rate": 0.002828, "loss": 1.2589, "step": 295808 }, { "epoch": 22.204277673545967, "grad_norm": 0.581447422504425, "learning_rate": 0.002828, "loss": 1.2592, "step": 295872 }, { "epoch": 22.20908067542214, "grad_norm": 0.5462125539779663, "learning_rate": 0.002828, "loss": 1.2623, "step": 295936 }, { "epoch": 22.213883677298313, "grad_norm": 0.5986690521240234, "learning_rate": 0.002828, "loss": 1.2597, "step": 296000 }, { "epoch": 22.218686679174485, "grad_norm": 0.5245385766029358, "learning_rate": 0.002828, "loss": 1.2536, "step": 296064 }, { "epoch": 22.22348968105066, "grad_norm": 0.7947522401809692, "learning_rate": 0.002828, "loss": 1.2589, "step": 296128 }, { "epoch": 22.228292682926828, "grad_norm": 0.5781105756759644, "learning_rate": 0.002828, "loss": 1.2614, "step": 296192 }, { "epoch": 22.233095684803, "grad_norm": 0.5033446550369263, "learning_rate": 0.002828, "loss": 1.2594, "step": 296256 }, { "epoch": 22.237898686679173, "grad_norm": 0.5990842580795288, "learning_rate": 0.002828, "loss": 1.2634, "step": 296320 }, { "epoch": 22.242701688555346, "grad_norm": 0.549052357673645, "learning_rate": 0.002828, "loss": 1.2574, "step": 296384 }, { "epoch": 22.24750469043152, "grad_norm": 0.4947829842567444, "learning_rate": 0.002828, "loss": 1.2612, "step": 296448 }, { "epoch": 22.252307692307692, "grad_norm": 0.5878117084503174, "learning_rate": 0.002828, "loss": 1.2578, "step": 296512 }, { "epoch": 22.257110694183865, "grad_norm": 0.5507844686508179, "learning_rate": 0.002828, "loss": 1.261, "step": 296576 }, { "epoch": 22.261913696060038, "grad_norm": 0.5388092398643494, "learning_rate": 0.002828, "loss": 1.2651, "step": 296640 }, { "epoch": 22.26671669793621, "grad_norm": 0.46271833777427673, "learning_rate": 0.002828, "loss": 1.2599, "step": 296704 }, { "epoch": 22.271519699812384, "grad_norm": 0.7457178235054016, "learning_rate": 0.002828, "loss": 1.2606, "step": 296768 }, { "epoch": 22.276322701688557, "grad_norm": 0.6202179789543152, "learning_rate": 0.002828, "loss": 1.2644, "step": 296832 }, { "epoch": 22.28112570356473, "grad_norm": 0.5775248408317566, "learning_rate": 0.002828, "loss": 1.2643, "step": 296896 }, { "epoch": 22.285928705440902, "grad_norm": 0.7320228219032288, "learning_rate": 0.002828, "loss": 1.2631, "step": 296960 }, { "epoch": 22.29073170731707, "grad_norm": 0.6378146409988403, "learning_rate": 0.002828, "loss": 1.2634, "step": 297024 }, { "epoch": 22.295534709193245, "grad_norm": 0.5487433075904846, "learning_rate": 0.002828, "loss": 1.2545, "step": 297088 }, { "epoch": 22.300337711069417, "grad_norm": 0.5953262448310852, "learning_rate": 0.002828, "loss": 1.265, "step": 297152 }, { "epoch": 22.30514071294559, "grad_norm": 0.6441254615783691, "learning_rate": 0.002828, "loss": 1.2678, "step": 297216 }, { "epoch": 22.309943714821763, "grad_norm": 0.6376492381095886, "learning_rate": 0.002828, "loss": 1.2668, "step": 297280 }, { "epoch": 22.314746716697936, "grad_norm": 0.535435676574707, "learning_rate": 0.002828, "loss": 1.2593, "step": 297344 }, { "epoch": 22.31954971857411, "grad_norm": 0.6632246971130371, "learning_rate": 0.002828, "loss": 1.2677, "step": 297408 }, { "epoch": 22.324352720450282, "grad_norm": 0.46085768938064575, "learning_rate": 0.002828, "loss": 1.2647, "step": 297472 }, { "epoch": 22.329155722326455, "grad_norm": 0.5245596170425415, "learning_rate": 0.002828, "loss": 1.2665, "step": 297536 }, { "epoch": 22.333958724202628, "grad_norm": 0.6162623167037964, "learning_rate": 0.002828, "loss": 1.2651, "step": 297600 }, { "epoch": 22.3387617260788, "grad_norm": 0.5637834072113037, "learning_rate": 0.002828, "loss": 1.2705, "step": 297664 }, { "epoch": 22.343564727954973, "grad_norm": 0.575354814529419, "learning_rate": 0.002828, "loss": 1.262, "step": 297728 }, { "epoch": 22.348367729831143, "grad_norm": 0.4700267016887665, "learning_rate": 0.002828, "loss": 1.2641, "step": 297792 }, { "epoch": 22.353170731707316, "grad_norm": 0.6297546029090881, "learning_rate": 0.002828, "loss": 1.2629, "step": 297856 }, { "epoch": 22.35797373358349, "grad_norm": 0.5787842273712158, "learning_rate": 0.002828, "loss": 1.2688, "step": 297920 }, { "epoch": 22.36277673545966, "grad_norm": 0.5737005472183228, "learning_rate": 0.002828, "loss": 1.2678, "step": 297984 }, { "epoch": 22.367579737335834, "grad_norm": 0.5520718693733215, "learning_rate": 0.002828, "loss": 1.2641, "step": 298048 }, { "epoch": 22.372382739212007, "grad_norm": 0.5324095487594604, "learning_rate": 0.002828, "loss": 1.2664, "step": 298112 }, { "epoch": 22.37718574108818, "grad_norm": 0.6231042742729187, "learning_rate": 0.002828, "loss": 1.2626, "step": 298176 }, { "epoch": 22.381988742964353, "grad_norm": 0.5680738091468811, "learning_rate": 0.002828, "loss": 1.2663, "step": 298240 }, { "epoch": 22.386791744840526, "grad_norm": 0.5304076671600342, "learning_rate": 0.002828, "loss": 1.2431, "step": 298304 }, { "epoch": 22.3915947467167, "grad_norm": 0.4264357089996338, "learning_rate": 0.002828, "loss": 1.2415, "step": 298368 }, { "epoch": 22.39639774859287, "grad_norm": 0.5016809105873108, "learning_rate": 0.002828, "loss": 1.248, "step": 298432 }, { "epoch": 22.401200750469044, "grad_norm": 0.5329827666282654, "learning_rate": 0.002828, "loss": 1.2457, "step": 298496 }, { "epoch": 22.406003752345217, "grad_norm": 0.5466082692146301, "learning_rate": 0.002828, "loss": 1.2464, "step": 298560 }, { "epoch": 22.410806754221387, "grad_norm": 0.5093466639518738, "learning_rate": 0.002828, "loss": 1.2508, "step": 298624 }, { "epoch": 22.41560975609756, "grad_norm": 0.5469045639038086, "learning_rate": 0.002828, "loss": 1.247, "step": 298688 }, { "epoch": 22.420412757973732, "grad_norm": 0.5645148158073425, "learning_rate": 0.002828, "loss": 1.2553, "step": 298752 }, { "epoch": 22.425215759849905, "grad_norm": 0.6576793193817139, "learning_rate": 0.002828, "loss": 1.2499, "step": 298816 }, { "epoch": 22.43001876172608, "grad_norm": 0.726359486579895, "learning_rate": 0.002828, "loss": 1.2541, "step": 298880 }, { "epoch": 22.43482176360225, "grad_norm": 0.6088193655014038, "learning_rate": 0.002828, "loss": 1.2569, "step": 298944 }, { "epoch": 22.439624765478424, "grad_norm": 0.6478526592254639, "learning_rate": 0.002828, "loss": 1.2528, "step": 299008 }, { "epoch": 22.444427767354597, "grad_norm": 0.6103188991546631, "learning_rate": 0.002828, "loss": 1.2513, "step": 299072 }, { "epoch": 22.44923076923077, "grad_norm": 0.5749058127403259, "learning_rate": 0.002828, "loss": 1.2569, "step": 299136 }, { "epoch": 22.454033771106943, "grad_norm": 0.5394167900085449, "learning_rate": 0.002828, "loss": 1.25, "step": 299200 }, { "epoch": 22.458836772983116, "grad_norm": 0.650351881980896, "learning_rate": 0.002828, "loss": 1.2553, "step": 299264 }, { "epoch": 22.46363977485929, "grad_norm": 0.5926783680915833, "learning_rate": 0.002828, "loss": 1.2544, "step": 299328 }, { "epoch": 22.46844277673546, "grad_norm": 0.6527881622314453, "learning_rate": 0.002828, "loss": 1.2531, "step": 299392 }, { "epoch": 22.47324577861163, "grad_norm": 0.6134124994277954, "learning_rate": 0.002828, "loss": 1.2544, "step": 299456 }, { "epoch": 22.478048780487804, "grad_norm": 0.5028894543647766, "learning_rate": 0.002828, "loss": 1.253, "step": 299520 }, { "epoch": 22.482851782363976, "grad_norm": 0.6587144136428833, "learning_rate": 0.002828, "loss": 1.2562, "step": 299584 }, { "epoch": 22.48765478424015, "grad_norm": 0.6202356219291687, "learning_rate": 0.002828, "loss": 1.2548, "step": 299648 }, { "epoch": 22.492457786116322, "grad_norm": 0.5491941571235657, "learning_rate": 0.002828, "loss": 1.2548, "step": 299712 }, { "epoch": 22.497260787992495, "grad_norm": 0.5050047039985657, "learning_rate": 0.002828, "loss": 1.2542, "step": 299776 }, { "epoch": 22.502063789868668, "grad_norm": 0.5200276970863342, "learning_rate": 0.002828, "loss": 1.2547, "step": 299840 }, { "epoch": 22.50686679174484, "grad_norm": 0.5613254308700562, "learning_rate": 0.002828, "loss": 1.2535, "step": 299904 }, { "epoch": 22.511669793621014, "grad_norm": 0.574245274066925, "learning_rate": 0.002828, "loss": 1.2503, "step": 299968 }, { "epoch": 22.516472795497187, "grad_norm": 0.6551904678344727, "learning_rate": 0.002828, "loss": 1.259, "step": 300032 }, { "epoch": 22.52127579737336, "grad_norm": 0.7811374664306641, "learning_rate": 0.002828, "loss": 1.2488, "step": 300096 }, { "epoch": 22.526078799249532, "grad_norm": 0.5673550367355347, "learning_rate": 0.002828, "loss": 1.2572, "step": 300160 }, { "epoch": 22.530881801125705, "grad_norm": 0.6635274887084961, "learning_rate": 0.002828, "loss": 1.2563, "step": 300224 }, { "epoch": 22.535684803001875, "grad_norm": 0.702856719493866, "learning_rate": 0.002828, "loss": 1.2637, "step": 300288 }, { "epoch": 22.540487804878047, "grad_norm": 0.571111798286438, "learning_rate": 0.002828, "loss": 1.2551, "step": 300352 }, { "epoch": 22.54529080675422, "grad_norm": 0.5393403172492981, "learning_rate": 0.002828, "loss": 1.2558, "step": 300416 }, { "epoch": 22.550093808630393, "grad_norm": 0.5712943077087402, "learning_rate": 0.002828, "loss": 1.2518, "step": 300480 }, { "epoch": 22.554896810506566, "grad_norm": 0.5832908749580383, "learning_rate": 0.002828, "loss": 1.2515, "step": 300544 }, { "epoch": 22.55969981238274, "grad_norm": 0.7355929613113403, "learning_rate": 0.002828, "loss": 1.2559, "step": 300608 }, { "epoch": 22.564502814258912, "grad_norm": 0.5435024499893188, "learning_rate": 0.002828, "loss": 1.2595, "step": 300672 }, { "epoch": 22.569305816135085, "grad_norm": 0.49227020144462585, "learning_rate": 0.002828, "loss": 1.2578, "step": 300736 }, { "epoch": 22.574108818011258, "grad_norm": 0.5402520895004272, "learning_rate": 0.002828, "loss": 1.2539, "step": 300800 }, { "epoch": 22.57891181988743, "grad_norm": 0.6322745084762573, "learning_rate": 0.002828, "loss": 1.2583, "step": 300864 }, { "epoch": 22.583714821763603, "grad_norm": 0.5023521184921265, "learning_rate": 0.002828, "loss": 1.2526, "step": 300928 }, { "epoch": 22.588517823639776, "grad_norm": 0.5673877000808716, "learning_rate": 0.002828, "loss": 1.2597, "step": 300992 }, { "epoch": 22.593320825515946, "grad_norm": 0.6613234281539917, "learning_rate": 0.002828, "loss": 1.2573, "step": 301056 }, { "epoch": 22.59812382739212, "grad_norm": 0.7267638444900513, "learning_rate": 0.002828, "loss": 1.2582, "step": 301120 }, { "epoch": 22.60292682926829, "grad_norm": 0.5590457916259766, "learning_rate": 0.002828, "loss": 1.2539, "step": 301184 }, { "epoch": 22.607729831144464, "grad_norm": 0.7561535239219666, "learning_rate": 0.002828, "loss": 1.2589, "step": 301248 }, { "epoch": 22.612532833020637, "grad_norm": 0.7155293226242065, "learning_rate": 0.002828, "loss": 1.2557, "step": 301312 }, { "epoch": 22.61733583489681, "grad_norm": 0.5476259589195251, "learning_rate": 0.002828, "loss": 1.2555, "step": 301376 }, { "epoch": 22.622138836772983, "grad_norm": 0.6327958703041077, "learning_rate": 0.002828, "loss": 1.2592, "step": 301440 }, { "epoch": 22.626941838649156, "grad_norm": 0.6359103322029114, "learning_rate": 0.002828, "loss": 1.2686, "step": 301504 }, { "epoch": 22.63174484052533, "grad_norm": 0.5994049906730652, "learning_rate": 0.002828, "loss": 1.2623, "step": 301568 }, { "epoch": 22.6365478424015, "grad_norm": 0.5330276489257812, "learning_rate": 0.002828, "loss": 1.2582, "step": 301632 }, { "epoch": 22.641350844277675, "grad_norm": 0.5444290637969971, "learning_rate": 0.002828, "loss": 1.2632, "step": 301696 }, { "epoch": 22.646153846153847, "grad_norm": 0.5603839159011841, "learning_rate": 0.002828, "loss": 1.2537, "step": 301760 }, { "epoch": 22.65095684803002, "grad_norm": 0.5644785165786743, "learning_rate": 0.002828, "loss": 1.2584, "step": 301824 }, { "epoch": 22.65575984990619, "grad_norm": 0.5198463201522827, "learning_rate": 0.002828, "loss": 1.266, "step": 301888 }, { "epoch": 22.660562851782363, "grad_norm": 0.4774089455604553, "learning_rate": 0.002828, "loss": 1.2642, "step": 301952 }, { "epoch": 22.665365853658535, "grad_norm": 0.5863246321678162, "learning_rate": 0.002828, "loss": 1.2603, "step": 302016 }, { "epoch": 22.67016885553471, "grad_norm": 0.6068688631057739, "learning_rate": 0.002828, "loss": 1.2679, "step": 302080 }, { "epoch": 22.67497185741088, "grad_norm": 0.6282370686531067, "learning_rate": 0.002828, "loss": 1.2609, "step": 302144 }, { "epoch": 22.679774859287054, "grad_norm": 0.6089537739753723, "learning_rate": 0.002828, "loss": 1.2618, "step": 302208 }, { "epoch": 22.684577861163227, "grad_norm": 0.4584980607032776, "learning_rate": 0.002828, "loss": 1.2587, "step": 302272 }, { "epoch": 22.6893808630394, "grad_norm": 0.6628486514091492, "learning_rate": 0.002828, "loss": 1.2617, "step": 302336 }, { "epoch": 22.694183864915573, "grad_norm": 0.5792520642280579, "learning_rate": 0.002828, "loss": 1.2596, "step": 302400 }, { "epoch": 22.698986866791746, "grad_norm": 0.6892536878585815, "learning_rate": 0.002828, "loss": 1.2658, "step": 302464 }, { "epoch": 22.70378986866792, "grad_norm": 0.6139593720436096, "learning_rate": 0.002828, "loss": 1.258, "step": 302528 }, { "epoch": 22.70859287054409, "grad_norm": 0.7230052947998047, "learning_rate": 0.002828, "loss": 1.2583, "step": 302592 }, { "epoch": 22.713395872420264, "grad_norm": 0.65574049949646, "learning_rate": 0.002828, "loss": 1.2598, "step": 302656 }, { "epoch": 22.718198874296434, "grad_norm": 0.5445919036865234, "learning_rate": 0.002828, "loss": 1.2613, "step": 302720 }, { "epoch": 22.723001876172606, "grad_norm": 0.5585668087005615, "learning_rate": 0.002828, "loss": 1.2664, "step": 302784 }, { "epoch": 22.72780487804878, "grad_norm": 0.46071845293045044, "learning_rate": 0.002828, "loss": 1.2616, "step": 302848 }, { "epoch": 22.732607879924952, "grad_norm": 0.5296932458877563, "learning_rate": 0.002828, "loss": 1.2626, "step": 302912 }, { "epoch": 22.737410881801125, "grad_norm": 0.592583954334259, "learning_rate": 0.002828, "loss": 1.2579, "step": 302976 }, { "epoch": 22.742213883677298, "grad_norm": 0.4957088828086853, "learning_rate": 0.002828, "loss": 1.2663, "step": 303040 }, { "epoch": 22.74701688555347, "grad_norm": 0.6246522068977356, "learning_rate": 0.002828, "loss": 1.2614, "step": 303104 }, { "epoch": 22.751819887429644, "grad_norm": 0.6576674580574036, "learning_rate": 0.002828, "loss": 1.2598, "step": 303168 }, { "epoch": 22.756622889305817, "grad_norm": 0.48002710938453674, "learning_rate": 0.002828, "loss": 1.2548, "step": 303232 }, { "epoch": 22.76142589118199, "grad_norm": 0.6484505534172058, "learning_rate": 0.002828, "loss": 1.2603, "step": 303296 }, { "epoch": 22.766228893058162, "grad_norm": 0.5288729071617126, "learning_rate": 0.002828, "loss": 1.2497, "step": 303360 }, { "epoch": 22.771031894934335, "grad_norm": 0.595810055732727, "learning_rate": 0.002828, "loss": 1.2533, "step": 303424 }, { "epoch": 22.775834896810508, "grad_norm": 0.5582146048545837, "learning_rate": 0.002828, "loss": 1.2586, "step": 303488 }, { "epoch": 22.780637898686678, "grad_norm": 0.6199321746826172, "learning_rate": 0.002828, "loss": 1.252, "step": 303552 }, { "epoch": 22.78544090056285, "grad_norm": 0.5382596254348755, "learning_rate": 0.002828, "loss": 1.2543, "step": 303616 }, { "epoch": 22.790243902439023, "grad_norm": 0.5364497303962708, "learning_rate": 0.002828, "loss": 1.253, "step": 303680 }, { "epoch": 22.795046904315196, "grad_norm": 0.6804397106170654, "learning_rate": 0.002828, "loss": 1.2511, "step": 303744 }, { "epoch": 22.79984990619137, "grad_norm": 0.6165252923965454, "learning_rate": 0.002828, "loss": 1.2578, "step": 303808 }, { "epoch": 22.804652908067542, "grad_norm": 0.6188555359840393, "learning_rate": 0.002828, "loss": 1.2545, "step": 303872 }, { "epoch": 22.809455909943715, "grad_norm": 0.5402896404266357, "learning_rate": 0.002828, "loss": 1.257, "step": 303936 }, { "epoch": 22.814258911819888, "grad_norm": 0.5374215245246887, "learning_rate": 0.002828, "loss": 1.2541, "step": 304000 }, { "epoch": 22.81906191369606, "grad_norm": 0.6006323099136353, "learning_rate": 0.002828, "loss": 1.2603, "step": 304064 }, { "epoch": 22.823864915572234, "grad_norm": 0.570731520652771, "learning_rate": 0.002828, "loss": 1.2523, "step": 304128 }, { "epoch": 22.828667917448406, "grad_norm": 0.7125750184059143, "learning_rate": 0.002828, "loss": 1.2607, "step": 304192 }, { "epoch": 22.83347091932458, "grad_norm": 0.4166480004787445, "learning_rate": 0.002828, "loss": 1.254, "step": 304256 }, { "epoch": 22.838273921200752, "grad_norm": 0.6014498472213745, "learning_rate": 0.002828, "loss": 1.2516, "step": 304320 }, { "epoch": 22.84307692307692, "grad_norm": 0.6451423764228821, "learning_rate": 0.002828, "loss": 1.2588, "step": 304384 }, { "epoch": 22.847879924953094, "grad_norm": 0.5513681769371033, "learning_rate": 0.002828, "loss": 1.2577, "step": 304448 }, { "epoch": 22.852682926829267, "grad_norm": 0.5224577188491821, "learning_rate": 0.002828, "loss": 1.257, "step": 304512 }, { "epoch": 22.85748592870544, "grad_norm": 0.7520785331726074, "learning_rate": 0.002828, "loss": 1.2573, "step": 304576 }, { "epoch": 22.862288930581613, "grad_norm": 0.7311927080154419, "learning_rate": 0.002828, "loss": 1.2556, "step": 304640 }, { "epoch": 22.867091932457786, "grad_norm": 0.6609572768211365, "learning_rate": 0.002828, "loss": 1.2586, "step": 304704 }, { "epoch": 22.87189493433396, "grad_norm": 0.5938644409179688, "learning_rate": 0.002828, "loss": 1.2518, "step": 304768 }, { "epoch": 22.87669793621013, "grad_norm": 0.7628321647644043, "learning_rate": 0.002828, "loss": 1.2622, "step": 304832 }, { "epoch": 22.881500938086305, "grad_norm": 0.5340333580970764, "learning_rate": 0.002828, "loss": 1.2495, "step": 304896 }, { "epoch": 22.886303939962477, "grad_norm": 0.6140053868293762, "learning_rate": 0.002828, "loss": 1.2565, "step": 304960 }, { "epoch": 22.89110694183865, "grad_norm": 0.53498774766922, "learning_rate": 0.002828, "loss": 1.2614, "step": 305024 }, { "epoch": 22.895909943714823, "grad_norm": 0.5342109203338623, "learning_rate": 0.002828, "loss": 1.2576, "step": 305088 }, { "epoch": 22.900712945590996, "grad_norm": 0.4914964735507965, "learning_rate": 0.002828, "loss": 1.2587, "step": 305152 }, { "epoch": 22.905515947467165, "grad_norm": 0.6862971186637878, "learning_rate": 0.002828, "loss": 1.2657, "step": 305216 }, { "epoch": 22.91031894934334, "grad_norm": 0.5172854065895081, "learning_rate": 0.002828, "loss": 1.2579, "step": 305280 }, { "epoch": 22.91512195121951, "grad_norm": 0.6534024477005005, "learning_rate": 0.002828, "loss": 1.2566, "step": 305344 }, { "epoch": 22.919924953095684, "grad_norm": 0.5077025890350342, "learning_rate": 0.002828, "loss": 1.2571, "step": 305408 }, { "epoch": 22.924727954971857, "grad_norm": 0.5739777684211731, "learning_rate": 0.002828, "loss": 1.2576, "step": 305472 }, { "epoch": 22.92953095684803, "grad_norm": 0.5454936623573303, "learning_rate": 0.002828, "loss": 1.2545, "step": 305536 }, { "epoch": 22.934333958724203, "grad_norm": 0.5912908315658569, "learning_rate": 0.002828, "loss": 1.2565, "step": 305600 }, { "epoch": 22.939136960600376, "grad_norm": 0.7086852788925171, "learning_rate": 0.002828, "loss": 1.2585, "step": 305664 }, { "epoch": 22.94393996247655, "grad_norm": 0.6017676591873169, "learning_rate": 0.002828, "loss": 1.2558, "step": 305728 }, { "epoch": 22.94874296435272, "grad_norm": 0.4917507767677307, "learning_rate": 0.002828, "loss": 1.2609, "step": 305792 }, { "epoch": 22.953545966228894, "grad_norm": 0.6148297786712646, "learning_rate": 0.002828, "loss": 1.2641, "step": 305856 }, { "epoch": 22.958348968105067, "grad_norm": 0.4830070436000824, "learning_rate": 0.002828, "loss": 1.2533, "step": 305920 }, { "epoch": 22.96315196998124, "grad_norm": 0.6182305216789246, "learning_rate": 0.002828, "loss": 1.2576, "step": 305984 }, { "epoch": 22.96795497185741, "grad_norm": 0.628701388835907, "learning_rate": 0.002828, "loss": 1.2589, "step": 306048 }, { "epoch": 22.972757973733582, "grad_norm": 0.586304247379303, "learning_rate": 0.002828, "loss": 1.2603, "step": 306112 }, { "epoch": 22.977560975609755, "grad_norm": 0.5976091027259827, "learning_rate": 0.002828, "loss": 1.2561, "step": 306176 }, { "epoch": 22.982363977485928, "grad_norm": 0.6377904415130615, "learning_rate": 0.002828, "loss": 1.2572, "step": 306240 }, { "epoch": 22.9871669793621, "grad_norm": 0.6798734664916992, "learning_rate": 0.002828, "loss": 1.2614, "step": 306304 }, { "epoch": 22.991969981238274, "grad_norm": 0.6504051685333252, "learning_rate": 0.002828, "loss": 1.2579, "step": 306368 }, { "epoch": 22.996772983114447, "grad_norm": 0.6908726096153259, "learning_rate": 0.002828, "loss": 1.2567, "step": 306432 }, { "epoch": 23.00157598499062, "grad_norm": 0.54974764585495, "learning_rate": 0.002828, "loss": 1.2522, "step": 306496 }, { "epoch": 23.006378986866793, "grad_norm": 0.5098195672035217, "learning_rate": 0.002828, "loss": 1.24, "step": 306560 }, { "epoch": 23.011181988742965, "grad_norm": 0.5870168805122375, "learning_rate": 0.002828, "loss": 1.2337, "step": 306624 }, { "epoch": 23.01598499061914, "grad_norm": 0.6402837634086609, "learning_rate": 0.002828, "loss": 1.2408, "step": 306688 }, { "epoch": 23.02078799249531, "grad_norm": 0.680777907371521, "learning_rate": 0.002828, "loss": 1.2426, "step": 306752 }, { "epoch": 23.02559099437148, "grad_norm": 0.69080650806427, "learning_rate": 0.002828, "loss": 1.2435, "step": 306816 }, { "epoch": 23.030393996247653, "grad_norm": 0.5605624914169312, "learning_rate": 0.002828, "loss": 1.2378, "step": 306880 }, { "epoch": 23.035196998123826, "grad_norm": 0.5387503504753113, "learning_rate": 0.002828, "loss": 1.2391, "step": 306944 }, { "epoch": 23.04, "grad_norm": 0.6666533946990967, "learning_rate": 0.002828, "loss": 1.2399, "step": 307008 }, { "epoch": 23.044803001876172, "grad_norm": 0.6080389022827148, "learning_rate": 0.002828, "loss": 1.2466, "step": 307072 }, { "epoch": 23.049606003752345, "grad_norm": 0.562734842300415, "learning_rate": 0.002828, "loss": 1.2389, "step": 307136 }, { "epoch": 23.054409005628518, "grad_norm": 0.6044827103614807, "learning_rate": 0.002828, "loss": 1.2388, "step": 307200 }, { "epoch": 23.05921200750469, "grad_norm": 0.6313736438751221, "learning_rate": 0.002828, "loss": 1.2342, "step": 307264 }, { "epoch": 23.064015009380864, "grad_norm": 0.5507990717887878, "learning_rate": 0.002828, "loss": 1.2448, "step": 307328 }, { "epoch": 23.068818011257036, "grad_norm": 0.49053919315338135, "learning_rate": 0.002828, "loss": 1.2453, "step": 307392 }, { "epoch": 23.07362101313321, "grad_norm": 0.5089336633682251, "learning_rate": 0.002828, "loss": 1.237, "step": 307456 }, { "epoch": 23.078424015009382, "grad_norm": 0.5594099164009094, "learning_rate": 0.002828, "loss": 1.2404, "step": 307520 }, { "epoch": 23.083227016885555, "grad_norm": 0.6270295977592468, "learning_rate": 0.002828, "loss": 1.2442, "step": 307584 }, { "epoch": 23.088030018761724, "grad_norm": 0.5999992489814758, "learning_rate": 0.002828, "loss": 1.2422, "step": 307648 }, { "epoch": 23.092833020637897, "grad_norm": 0.6353987455368042, "learning_rate": 0.002828, "loss": 1.241, "step": 307712 }, { "epoch": 23.09763602251407, "grad_norm": 0.6352692246437073, "learning_rate": 0.002828, "loss": 1.2475, "step": 307776 }, { "epoch": 23.102439024390243, "grad_norm": 0.4999005198478699, "learning_rate": 0.002828, "loss": 1.2432, "step": 307840 }, { "epoch": 23.107242026266416, "grad_norm": 0.5238738656044006, "learning_rate": 0.002828, "loss": 1.2434, "step": 307904 }, { "epoch": 23.11204502814259, "grad_norm": 0.46657606959342957, "learning_rate": 0.002828, "loss": 1.2468, "step": 307968 }, { "epoch": 23.11684803001876, "grad_norm": 0.7083847522735596, "learning_rate": 0.002828, "loss": 1.2546, "step": 308032 }, { "epoch": 23.121651031894935, "grad_norm": 0.7005468606948853, "learning_rate": 0.002828, "loss": 1.2384, "step": 308096 }, { "epoch": 23.126454033771108, "grad_norm": 0.5426920652389526, "learning_rate": 0.002828, "loss": 1.2453, "step": 308160 }, { "epoch": 23.13125703564728, "grad_norm": 0.5448518395423889, "learning_rate": 0.002828, "loss": 1.2519, "step": 308224 }, { "epoch": 23.136060037523453, "grad_norm": 0.6012153625488281, "learning_rate": 0.002828, "loss": 1.2515, "step": 308288 }, { "epoch": 23.140863039399626, "grad_norm": 0.5151079893112183, "learning_rate": 0.002828, "loss": 1.2365, "step": 308352 }, { "epoch": 23.1456660412758, "grad_norm": 0.704038143157959, "learning_rate": 0.002828, "loss": 1.2418, "step": 308416 }, { "epoch": 23.15046904315197, "grad_norm": 0.6568431854248047, "learning_rate": 0.002828, "loss": 1.2478, "step": 308480 }, { "epoch": 23.15527204502814, "grad_norm": 0.6518442034721375, "learning_rate": 0.002828, "loss": 1.2481, "step": 308544 }, { "epoch": 23.160075046904314, "grad_norm": 0.5150387287139893, "learning_rate": 0.002828, "loss": 1.2459, "step": 308608 }, { "epoch": 23.164878048780487, "grad_norm": 0.5261139273643494, "learning_rate": 0.002828, "loss": 1.2471, "step": 308672 }, { "epoch": 23.16968105065666, "grad_norm": 0.6531349420547485, "learning_rate": 0.002828, "loss": 1.2472, "step": 308736 }, { "epoch": 23.174484052532833, "grad_norm": 0.5209482312202454, "learning_rate": 0.002828, "loss": 1.2441, "step": 308800 }, { "epoch": 23.179287054409006, "grad_norm": 0.5978618860244751, "learning_rate": 0.002828, "loss": 1.252, "step": 308864 }, { "epoch": 23.18409005628518, "grad_norm": 0.5009728670120239, "learning_rate": 0.002828, "loss": 1.2424, "step": 308928 }, { "epoch": 23.18889305816135, "grad_norm": 0.5005267262458801, "learning_rate": 0.002828, "loss": 1.2517, "step": 308992 }, { "epoch": 23.193696060037524, "grad_norm": 0.5890987515449524, "learning_rate": 0.002828, "loss": 1.2473, "step": 309056 }, { "epoch": 23.198499061913697, "grad_norm": 0.5844420194625854, "learning_rate": 0.002828, "loss": 1.2487, "step": 309120 }, { "epoch": 23.20330206378987, "grad_norm": 0.6215989589691162, "learning_rate": 0.002828, "loss": 1.248, "step": 309184 }, { "epoch": 23.208105065666043, "grad_norm": 0.6483440399169922, "learning_rate": 0.002828, "loss": 1.2436, "step": 309248 }, { "epoch": 23.212908067542212, "grad_norm": 0.773412823677063, "learning_rate": 0.002828, "loss": 1.2488, "step": 309312 }, { "epoch": 23.217711069418385, "grad_norm": 0.6054885387420654, "learning_rate": 0.002828, "loss": 1.2511, "step": 309376 }, { "epoch": 23.222514071294558, "grad_norm": 0.6024729013442993, "learning_rate": 0.002828, "loss": 1.2474, "step": 309440 }, { "epoch": 23.22731707317073, "grad_norm": 0.6172342896461487, "learning_rate": 0.002828, "loss": 1.2559, "step": 309504 }, { "epoch": 23.232120075046904, "grad_norm": 0.5660899877548218, "learning_rate": 0.002828, "loss": 1.2453, "step": 309568 }, { "epoch": 23.236923076923077, "grad_norm": 0.5336431860923767, "learning_rate": 0.002828, "loss": 1.2432, "step": 309632 }, { "epoch": 23.24172607879925, "grad_norm": 0.5058355927467346, "learning_rate": 0.002828, "loss": 1.2503, "step": 309696 }, { "epoch": 23.246529080675423, "grad_norm": 0.6016878485679626, "learning_rate": 0.002828, "loss": 1.2588, "step": 309760 }, { "epoch": 23.251332082551595, "grad_norm": 0.5316280722618103, "learning_rate": 0.002828, "loss": 1.2511, "step": 309824 }, { "epoch": 23.25613508442777, "grad_norm": 0.6197585463523865, "learning_rate": 0.002828, "loss": 1.2472, "step": 309888 }, { "epoch": 23.26093808630394, "grad_norm": 0.5783255100250244, "learning_rate": 0.002828, "loss": 1.2519, "step": 309952 }, { "epoch": 23.265741088180114, "grad_norm": 0.6217781901359558, "learning_rate": 0.002828, "loss": 1.2505, "step": 310016 }, { "epoch": 23.270544090056283, "grad_norm": 0.5191296339035034, "learning_rate": 0.002828, "loss": 1.2459, "step": 310080 }, { "epoch": 23.275347091932456, "grad_norm": 0.6412822008132935, "learning_rate": 0.002828, "loss": 1.2503, "step": 310144 }, { "epoch": 23.28015009380863, "grad_norm": 0.5884774327278137, "learning_rate": 0.002828, "loss": 1.2491, "step": 310208 }, { "epoch": 23.284953095684802, "grad_norm": 0.621462345123291, "learning_rate": 0.002828, "loss": 1.2527, "step": 310272 }, { "epoch": 23.289756097560975, "grad_norm": 0.5567516684532166, "learning_rate": 0.002828, "loss": 1.2462, "step": 310336 }, { "epoch": 23.294559099437148, "grad_norm": 0.5859234929084778, "learning_rate": 0.002828, "loss": 1.2466, "step": 310400 }, { "epoch": 23.29936210131332, "grad_norm": 0.5633601546287537, "learning_rate": 0.002828, "loss": 1.2518, "step": 310464 }, { "epoch": 23.304165103189494, "grad_norm": 0.6056027412414551, "learning_rate": 0.002828, "loss": 1.249, "step": 310528 }, { "epoch": 23.308968105065667, "grad_norm": 0.6314893960952759, "learning_rate": 0.002828, "loss": 1.2521, "step": 310592 }, { "epoch": 23.31377110694184, "grad_norm": 0.47842320799827576, "learning_rate": 0.002828, "loss": 1.2545, "step": 310656 }, { "epoch": 23.318574108818012, "grad_norm": 0.5698492527008057, "learning_rate": 0.002828, "loss": 1.2455, "step": 310720 }, { "epoch": 23.323377110694185, "grad_norm": 0.6335123777389526, "learning_rate": 0.002828, "loss": 1.2493, "step": 310784 }, { "epoch": 23.328180112570358, "grad_norm": 0.7336736917495728, "learning_rate": 0.002828, "loss": 1.244, "step": 310848 }, { "epoch": 23.332983114446527, "grad_norm": 0.5209872126579285, "learning_rate": 0.002828, "loss": 1.2477, "step": 310912 }, { "epoch": 23.3377861163227, "grad_norm": 0.5684576630592346, "learning_rate": 0.002828, "loss": 1.2531, "step": 310976 }, { "epoch": 23.342589118198873, "grad_norm": 0.5723300576210022, "learning_rate": 0.002828, "loss": 1.2519, "step": 311040 }, { "epoch": 23.347392120075046, "grad_norm": 0.48642081022262573, "learning_rate": 0.002828, "loss": 1.2462, "step": 311104 }, { "epoch": 23.35219512195122, "grad_norm": 0.5746228098869324, "learning_rate": 0.002828, "loss": 1.2518, "step": 311168 }, { "epoch": 23.356998123827392, "grad_norm": 0.6249984502792358, "learning_rate": 0.002828, "loss": 1.249, "step": 311232 }, { "epoch": 23.361801125703565, "grad_norm": 0.4942440092563629, "learning_rate": 0.002828, "loss": 1.252, "step": 311296 }, { "epoch": 23.366604127579738, "grad_norm": 0.610084056854248, "learning_rate": 0.002828, "loss": 1.2458, "step": 311360 }, { "epoch": 23.37140712945591, "grad_norm": 0.5461624264717102, "learning_rate": 0.002828, "loss": 1.2532, "step": 311424 }, { "epoch": 23.376210131332083, "grad_norm": 0.5444890856742859, "learning_rate": 0.002828, "loss": 1.2524, "step": 311488 }, { "epoch": 23.381013133208256, "grad_norm": 0.5665737986564636, "learning_rate": 0.002828, "loss": 1.2473, "step": 311552 }, { "epoch": 23.38581613508443, "grad_norm": 0.6717258095741272, "learning_rate": 0.002828, "loss": 1.2475, "step": 311616 }, { "epoch": 23.390619136960602, "grad_norm": 0.6941819787025452, "learning_rate": 0.002828, "loss": 1.2523, "step": 311680 }, { "epoch": 23.39542213883677, "grad_norm": 0.6986182332038879, "learning_rate": 0.002828, "loss": 1.2545, "step": 311744 }, { "epoch": 23.400225140712944, "grad_norm": 0.5212686657905579, "learning_rate": 0.002828, "loss": 1.2437, "step": 311808 }, { "epoch": 23.405028142589117, "grad_norm": 0.5907437205314636, "learning_rate": 0.002828, "loss": 1.251, "step": 311872 }, { "epoch": 23.40983114446529, "grad_norm": 0.5176777243614197, "learning_rate": 0.002828, "loss": 1.2488, "step": 311936 }, { "epoch": 23.414634146341463, "grad_norm": 0.5791288614273071, "learning_rate": 0.002828, "loss": 1.2541, "step": 312000 }, { "epoch": 23.419437148217636, "grad_norm": 0.6610795855522156, "learning_rate": 0.002828, "loss": 1.2544, "step": 312064 }, { "epoch": 23.42424015009381, "grad_norm": 0.5516941547393799, "learning_rate": 0.002828, "loss": 1.2451, "step": 312128 }, { "epoch": 23.42904315196998, "grad_norm": 0.5505845546722412, "learning_rate": 0.002828, "loss": 1.2558, "step": 312192 }, { "epoch": 23.433846153846154, "grad_norm": 0.6624215841293335, "learning_rate": 0.002828, "loss": 1.2516, "step": 312256 }, { "epoch": 23.438649155722327, "grad_norm": 0.560444176197052, "learning_rate": 0.002828, "loss": 1.2552, "step": 312320 }, { "epoch": 23.4434521575985, "grad_norm": 0.5208444595336914, "learning_rate": 0.002828, "loss": 1.2489, "step": 312384 }, { "epoch": 23.448255159474673, "grad_norm": 0.6262831091880798, "learning_rate": 0.002828, "loss": 1.2564, "step": 312448 }, { "epoch": 23.453058161350846, "grad_norm": 0.5905798673629761, "learning_rate": 0.002828, "loss": 1.2508, "step": 312512 }, { "epoch": 23.457861163227015, "grad_norm": 0.6314270496368408, "learning_rate": 0.002828, "loss": 1.2546, "step": 312576 }, { "epoch": 23.462664165103188, "grad_norm": 0.5014112591743469, "learning_rate": 0.002828, "loss": 1.2592, "step": 312640 }, { "epoch": 23.46746716697936, "grad_norm": 0.5422669053077698, "learning_rate": 0.002828, "loss": 1.2542, "step": 312704 }, { "epoch": 23.472270168855534, "grad_norm": 0.6367536187171936, "learning_rate": 0.002828, "loss": 1.2526, "step": 312768 }, { "epoch": 23.477073170731707, "grad_norm": 0.4678763151168823, "learning_rate": 0.002828, "loss": 1.2562, "step": 312832 }, { "epoch": 23.48187617260788, "grad_norm": 0.5110854506492615, "learning_rate": 0.002828, "loss": 1.2489, "step": 312896 }, { "epoch": 23.486679174484053, "grad_norm": 0.6120699644088745, "learning_rate": 0.002828, "loss": 1.2528, "step": 312960 }, { "epoch": 23.491482176360226, "grad_norm": 0.5177861452102661, "learning_rate": 0.002828, "loss": 1.2532, "step": 313024 }, { "epoch": 23.4962851782364, "grad_norm": 0.5200073719024658, "learning_rate": 0.002828, "loss": 1.2581, "step": 313088 }, { "epoch": 23.50108818011257, "grad_norm": 0.5814197063446045, "learning_rate": 0.002828, "loss": 1.2497, "step": 313152 }, { "epoch": 23.505891181988744, "grad_norm": 0.5872126221656799, "learning_rate": 0.002828, "loss": 1.2538, "step": 313216 }, { "epoch": 23.510694183864917, "grad_norm": 0.7856559157371521, "learning_rate": 0.002828, "loss": 1.2546, "step": 313280 }, { "epoch": 23.51549718574109, "grad_norm": 0.4535023868083954, "learning_rate": 0.002828, "loss": 1.2582, "step": 313344 }, { "epoch": 23.52030018761726, "grad_norm": 0.5431272387504578, "learning_rate": 0.002828, "loss": 1.2562, "step": 313408 }, { "epoch": 23.525103189493432, "grad_norm": 0.6125060319900513, "learning_rate": 0.002828, "loss": 1.2514, "step": 313472 }, { "epoch": 23.529906191369605, "grad_norm": 0.6155609488487244, "learning_rate": 0.002828, "loss": 1.2592, "step": 313536 }, { "epoch": 23.534709193245778, "grad_norm": 0.7311649322509766, "learning_rate": 0.002828, "loss": 1.2579, "step": 313600 }, { "epoch": 23.53951219512195, "grad_norm": 0.5599619746208191, "learning_rate": 0.002828, "loss": 1.2623, "step": 313664 }, { "epoch": 23.544315196998124, "grad_norm": 0.5777077078819275, "learning_rate": 0.002828, "loss": 1.2585, "step": 313728 }, { "epoch": 23.549118198874297, "grad_norm": 0.5461658239364624, "learning_rate": 0.002828, "loss": 1.2617, "step": 313792 }, { "epoch": 23.55392120075047, "grad_norm": 0.5293754935264587, "learning_rate": 0.002828, "loss": 1.2525, "step": 313856 }, { "epoch": 23.558724202626642, "grad_norm": 0.5593359470367432, "learning_rate": 0.002828, "loss": 1.2644, "step": 313920 }, { "epoch": 23.563527204502815, "grad_norm": 0.6732170581817627, "learning_rate": 0.002828, "loss": 1.2615, "step": 313984 }, { "epoch": 23.568330206378988, "grad_norm": 0.6268848776817322, "learning_rate": 0.002828, "loss": 1.257, "step": 314048 }, { "epoch": 23.57313320825516, "grad_norm": 0.5705792307853699, "learning_rate": 0.002828, "loss": 1.2537, "step": 314112 }, { "epoch": 23.577936210131334, "grad_norm": 0.5307909846305847, "learning_rate": 0.002828, "loss": 1.2529, "step": 314176 }, { "epoch": 23.582739212007503, "grad_norm": 0.7117211222648621, "learning_rate": 0.002828, "loss": 1.2586, "step": 314240 }, { "epoch": 23.587542213883676, "grad_norm": 0.5914830565452576, "learning_rate": 0.002828, "loss": 1.2587, "step": 314304 }, { "epoch": 23.59234521575985, "grad_norm": 0.5162680149078369, "learning_rate": 0.002828, "loss": 1.2541, "step": 314368 }, { "epoch": 23.597148217636022, "grad_norm": 0.5690885186195374, "learning_rate": 0.002828, "loss": 1.2557, "step": 314432 }, { "epoch": 23.601951219512195, "grad_norm": 0.45376938581466675, "learning_rate": 0.002828, "loss": 1.2629, "step": 314496 }, { "epoch": 23.606754221388368, "grad_norm": 0.6359778642654419, "learning_rate": 0.002828, "loss": 1.2536, "step": 314560 }, { "epoch": 23.61155722326454, "grad_norm": 0.6286948919296265, "learning_rate": 0.002828, "loss": 1.2577, "step": 314624 }, { "epoch": 23.616360225140713, "grad_norm": 0.6775930523872375, "learning_rate": 0.002828, "loss": 1.2615, "step": 314688 }, { "epoch": 23.621163227016886, "grad_norm": 0.6079651713371277, "learning_rate": 0.002828, "loss": 1.2559, "step": 314752 }, { "epoch": 23.62596622889306, "grad_norm": 0.6116461157798767, "learning_rate": 0.002828, "loss": 1.261, "step": 314816 }, { "epoch": 23.630769230769232, "grad_norm": 0.6348520517349243, "learning_rate": 0.002828, "loss": 1.2597, "step": 314880 }, { "epoch": 23.635572232645405, "grad_norm": 0.6520925164222717, "learning_rate": 0.002828, "loss": 1.256, "step": 314944 }, { "epoch": 23.640375234521574, "grad_norm": 0.5682945847511292, "learning_rate": 0.002828, "loss": 1.2589, "step": 315008 }, { "epoch": 23.645178236397747, "grad_norm": 0.6923990249633789, "learning_rate": 0.002828, "loss": 1.2578, "step": 315072 }, { "epoch": 23.64998123827392, "grad_norm": 0.5347400903701782, "learning_rate": 0.002828, "loss": 1.25, "step": 315136 }, { "epoch": 23.654784240150093, "grad_norm": 0.6759274005889893, "learning_rate": 0.002828, "loss": 1.2533, "step": 315200 }, { "epoch": 23.659587242026266, "grad_norm": 0.587490439414978, "learning_rate": 0.002828, "loss": 1.2617, "step": 315264 }, { "epoch": 23.66439024390244, "grad_norm": 0.5441737174987793, "learning_rate": 0.002828, "loss": 1.2507, "step": 315328 }, { "epoch": 23.66919324577861, "grad_norm": 0.5269403457641602, "learning_rate": 0.002828, "loss": 1.2596, "step": 315392 }, { "epoch": 23.673996247654784, "grad_norm": 0.5322028398513794, "learning_rate": 0.002828, "loss": 1.256, "step": 315456 }, { "epoch": 23.678799249530957, "grad_norm": 0.493210107088089, "learning_rate": 0.002828, "loss": 1.2605, "step": 315520 }, { "epoch": 23.68360225140713, "grad_norm": 0.5588217377662659, "learning_rate": 0.002828, "loss": 1.2501, "step": 315584 }, { "epoch": 23.688405253283303, "grad_norm": 0.49217018485069275, "learning_rate": 0.002828, "loss": 1.2596, "step": 315648 }, { "epoch": 23.693208255159476, "grad_norm": 0.7225490808486938, "learning_rate": 0.002828, "loss": 1.256, "step": 315712 }, { "epoch": 23.69801125703565, "grad_norm": 0.5031096935272217, "learning_rate": 0.002828, "loss": 1.2539, "step": 315776 }, { "epoch": 23.70281425891182, "grad_norm": 0.502692461013794, "learning_rate": 0.002828, "loss": 1.2592, "step": 315840 }, { "epoch": 23.70761726078799, "grad_norm": 0.6043107509613037, "learning_rate": 0.002828, "loss": 1.2516, "step": 315904 }, { "epoch": 23.712420262664164, "grad_norm": 0.5847479104995728, "learning_rate": 0.002828, "loss": 1.2596, "step": 315968 }, { "epoch": 23.717223264540337, "grad_norm": 0.5990855693817139, "learning_rate": 0.002828, "loss": 1.2573, "step": 316032 }, { "epoch": 23.72202626641651, "grad_norm": 0.5547415614128113, "learning_rate": 0.002828, "loss": 1.2683, "step": 316096 }, { "epoch": 23.726829268292683, "grad_norm": 0.5649647116661072, "learning_rate": 0.002828, "loss": 1.2652, "step": 316160 }, { "epoch": 23.731632270168856, "grad_norm": 0.5840405225753784, "learning_rate": 0.002828, "loss": 1.2621, "step": 316224 }, { "epoch": 23.73643527204503, "grad_norm": 0.5046823620796204, "learning_rate": 0.002828, "loss": 1.2634, "step": 316288 }, { "epoch": 23.7412382739212, "grad_norm": 0.6047646403312683, "learning_rate": 0.002828, "loss": 1.2597, "step": 316352 }, { "epoch": 23.746041275797374, "grad_norm": 0.7113666534423828, "learning_rate": 0.002828, "loss": 1.2622, "step": 316416 }, { "epoch": 23.750844277673547, "grad_norm": 0.6494774222373962, "learning_rate": 0.002828, "loss": 1.2525, "step": 316480 }, { "epoch": 23.75564727954972, "grad_norm": 0.5977435111999512, "learning_rate": 0.002828, "loss": 1.2635, "step": 316544 }, { "epoch": 23.760450281425893, "grad_norm": 0.5716019868850708, "learning_rate": 0.002828, "loss": 1.2613, "step": 316608 }, { "epoch": 23.765253283302062, "grad_norm": 0.5523091554641724, "learning_rate": 0.002828, "loss": 1.2553, "step": 316672 }, { "epoch": 23.770056285178235, "grad_norm": 0.5492827296257019, "learning_rate": 0.002828, "loss": 1.2604, "step": 316736 }, { "epoch": 23.774859287054408, "grad_norm": 0.5364034175872803, "learning_rate": 0.002828, "loss": 1.2621, "step": 316800 }, { "epoch": 23.77966228893058, "grad_norm": 0.5898091197013855, "learning_rate": 0.002828, "loss": 1.2528, "step": 316864 }, { "epoch": 23.784465290806754, "grad_norm": 0.5669413208961487, "learning_rate": 0.002828, "loss": 1.2593, "step": 316928 }, { "epoch": 23.789268292682927, "grad_norm": 0.600856602191925, "learning_rate": 0.002828, "loss": 1.2639, "step": 316992 }, { "epoch": 23.7940712945591, "grad_norm": 0.47627902030944824, "learning_rate": 0.002828, "loss": 1.2625, "step": 317056 }, { "epoch": 23.798874296435272, "grad_norm": 0.5335204005241394, "learning_rate": 0.002828, "loss": 1.2665, "step": 317120 }, { "epoch": 23.803677298311445, "grad_norm": 0.622696042060852, "learning_rate": 0.002828, "loss": 1.2573, "step": 317184 }, { "epoch": 23.808480300187618, "grad_norm": 0.6902386546134949, "learning_rate": 0.002828, "loss": 1.2627, "step": 317248 }, { "epoch": 23.81328330206379, "grad_norm": 0.579950213432312, "learning_rate": 0.002828, "loss": 1.2636, "step": 317312 }, { "epoch": 23.818086303939964, "grad_norm": 0.5069259405136108, "learning_rate": 0.002828, "loss": 1.2591, "step": 317376 }, { "epoch": 23.822889305816133, "grad_norm": 0.8760371804237366, "learning_rate": 0.002828, "loss": 1.2586, "step": 317440 }, { "epoch": 23.827692307692306, "grad_norm": 0.6794724464416504, "learning_rate": 0.002828, "loss": 1.2548, "step": 317504 }, { "epoch": 23.83249530956848, "grad_norm": 0.5844535231590271, "learning_rate": 0.002828, "loss": 1.2602, "step": 317568 }, { "epoch": 23.837298311444652, "grad_norm": 0.5497626662254333, "learning_rate": 0.002828, "loss": 1.2613, "step": 317632 }, { "epoch": 23.842101313320825, "grad_norm": 0.5619154572486877, "learning_rate": 0.002828, "loss": 1.2647, "step": 317696 }, { "epoch": 23.846904315196998, "grad_norm": 0.49801504611968994, "learning_rate": 0.002828, "loss": 1.2678, "step": 317760 }, { "epoch": 23.85170731707317, "grad_norm": 0.5336838364601135, "learning_rate": 0.002828, "loss": 1.264, "step": 317824 }, { "epoch": 23.856510318949343, "grad_norm": 0.5173972845077515, "learning_rate": 0.002828, "loss": 1.2602, "step": 317888 }, { "epoch": 23.861313320825516, "grad_norm": 0.5220203399658203, "learning_rate": 0.002828, "loss": 1.2601, "step": 317952 }, { "epoch": 23.86611632270169, "grad_norm": 0.5974367260932922, "learning_rate": 0.002828, "loss": 1.2599, "step": 318016 }, { "epoch": 23.870919324577862, "grad_norm": 0.6067824363708496, "learning_rate": 0.002828, "loss": 1.2618, "step": 318080 }, { "epoch": 23.875722326454035, "grad_norm": 0.6112476587295532, "learning_rate": 0.002828, "loss": 1.261, "step": 318144 }, { "epoch": 23.880525328330208, "grad_norm": 0.5728985071182251, "learning_rate": 0.002828, "loss": 1.2524, "step": 318208 }, { "epoch": 23.885328330206377, "grad_norm": 0.5144209265708923, "learning_rate": 0.002828, "loss": 1.2581, "step": 318272 }, { "epoch": 23.89013133208255, "grad_norm": 0.5003145337104797, "learning_rate": 0.002828, "loss": 1.2571, "step": 318336 }, { "epoch": 23.894934333958723, "grad_norm": 0.5286693572998047, "learning_rate": 0.002828, "loss": 1.2614, "step": 318400 }, { "epoch": 23.899737335834896, "grad_norm": 0.6406574249267578, "learning_rate": 0.002828, "loss": 1.2615, "step": 318464 }, { "epoch": 23.90454033771107, "grad_norm": 0.6060843467712402, "learning_rate": 0.002828, "loss": 1.2617, "step": 318528 }, { "epoch": 23.90934333958724, "grad_norm": 0.5950831174850464, "learning_rate": 0.002828, "loss": 1.2646, "step": 318592 }, { "epoch": 23.914146341463415, "grad_norm": 0.6237718462944031, "learning_rate": 0.002828, "loss": 1.2632, "step": 318656 }, { "epoch": 23.918949343339587, "grad_norm": 0.6165586709976196, "learning_rate": 0.002828, "loss": 1.2601, "step": 318720 }, { "epoch": 23.92375234521576, "grad_norm": 0.6163604259490967, "learning_rate": 0.002828, "loss": 1.2625, "step": 318784 }, { "epoch": 23.928555347091933, "grad_norm": 0.4493919014930725, "learning_rate": 0.002828, "loss": 1.2594, "step": 318848 }, { "epoch": 23.933358348968106, "grad_norm": 0.5826929211616516, "learning_rate": 0.002828, "loss": 1.2648, "step": 318912 }, { "epoch": 23.93816135084428, "grad_norm": 0.5376659631729126, "learning_rate": 0.002828, "loss": 1.2665, "step": 318976 }, { "epoch": 23.942964352720452, "grad_norm": 0.6579872369766235, "learning_rate": 0.002828, "loss": 1.2586, "step": 319040 }, { "epoch": 23.94776735459662, "grad_norm": 0.5282186269760132, "learning_rate": 0.002828, "loss": 1.2665, "step": 319104 }, { "epoch": 23.952570356472794, "grad_norm": 0.666273295879364, "learning_rate": 0.002828, "loss": 1.2608, "step": 319168 }, { "epoch": 23.957373358348967, "grad_norm": 0.5335906147956848, "learning_rate": 0.002828, "loss": 1.2585, "step": 319232 }, { "epoch": 23.96217636022514, "grad_norm": 0.576663613319397, "learning_rate": 0.002828, "loss": 1.26, "step": 319296 }, { "epoch": 23.966979362101313, "grad_norm": 0.565240204334259, "learning_rate": 0.002828, "loss": 1.2635, "step": 319360 }, { "epoch": 23.971782363977486, "grad_norm": 0.5416858196258545, "learning_rate": 0.002828, "loss": 1.262, "step": 319424 }, { "epoch": 23.97658536585366, "grad_norm": 0.6308193206787109, "learning_rate": 0.002828, "loss": 1.259, "step": 319488 }, { "epoch": 23.98138836772983, "grad_norm": 0.6142536401748657, "learning_rate": 0.002828, "loss": 1.2629, "step": 319552 }, { "epoch": 23.986191369606004, "grad_norm": 0.5130268335342407, "learning_rate": 0.002828, "loss": 1.2638, "step": 319616 }, { "epoch": 23.990994371482177, "grad_norm": 0.5770696401596069, "learning_rate": 0.002828, "loss": 1.2651, "step": 319680 }, { "epoch": 23.99579737335835, "grad_norm": 0.5990517139434814, "learning_rate": 0.002828, "loss": 1.2555, "step": 319744 }, { "epoch": 24.000600375234523, "grad_norm": 0.6668261885643005, "learning_rate": 0.002828, "loss": 1.2589, "step": 319808 }, { "epoch": 24.005403377110696, "grad_norm": 0.511762797832489, "learning_rate": 0.002828, "loss": 1.2231, "step": 319872 }, { "epoch": 24.010206378986865, "grad_norm": 0.5417555570602417, "learning_rate": 0.002828, "loss": 1.2193, "step": 319936 }, { "epoch": 24.015009380863038, "grad_norm": 0.5588164925575256, "learning_rate": 0.002828, "loss": 1.2193, "step": 320000 }, { "epoch": 24.01981238273921, "grad_norm": 0.6115003824234009, "learning_rate": 0.002828, "loss": 1.2227, "step": 320064 }, { "epoch": 24.024615384615384, "grad_norm": 0.5852314829826355, "learning_rate": 0.002828, "loss": 1.2229, "step": 320128 }, { "epoch": 24.029418386491557, "grad_norm": 0.5571886301040649, "learning_rate": 0.002828, "loss": 1.2246, "step": 320192 }, { "epoch": 24.03422138836773, "grad_norm": 0.5183240175247192, "learning_rate": 0.002828, "loss": 1.2263, "step": 320256 }, { "epoch": 24.039024390243902, "grad_norm": 0.5464686751365662, "learning_rate": 0.002828, "loss": 1.224, "step": 320320 }, { "epoch": 24.043827392120075, "grad_norm": 0.6346117258071899, "learning_rate": 0.002828, "loss": 1.2244, "step": 320384 }, { "epoch": 24.04863039399625, "grad_norm": 0.5489941835403442, "learning_rate": 0.002828, "loss": 1.2294, "step": 320448 }, { "epoch": 24.05343339587242, "grad_norm": 0.5453773140907288, "learning_rate": 0.002828, "loss": 1.224, "step": 320512 }, { "epoch": 24.058236397748594, "grad_norm": 0.5428609251976013, "learning_rate": 0.002828, "loss": 1.226, "step": 320576 }, { "epoch": 24.063039399624767, "grad_norm": 0.5705650448799133, "learning_rate": 0.002828, "loss": 1.2266, "step": 320640 }, { "epoch": 24.06784240150094, "grad_norm": 0.6298850774765015, "learning_rate": 0.002828, "loss": 1.2264, "step": 320704 }, { "epoch": 24.07264540337711, "grad_norm": 0.6152244210243225, "learning_rate": 0.002828, "loss": 1.2287, "step": 320768 }, { "epoch": 24.077448405253282, "grad_norm": 0.47920313477516174, "learning_rate": 0.002828, "loss": 1.2313, "step": 320832 }, { "epoch": 24.082251407129455, "grad_norm": 0.6054471731185913, "learning_rate": 0.002828, "loss": 1.2287, "step": 320896 }, { "epoch": 24.087054409005628, "grad_norm": 0.6222940683364868, "learning_rate": 0.002828, "loss": 1.2284, "step": 320960 }, { "epoch": 24.0918574108818, "grad_norm": 0.6720530390739441, "learning_rate": 0.002828, "loss": 1.2318, "step": 321024 }, { "epoch": 24.096660412757974, "grad_norm": 0.5883446335792542, "learning_rate": 0.002828, "loss": 1.2312, "step": 321088 }, { "epoch": 24.101463414634146, "grad_norm": 0.5007783770561218, "learning_rate": 0.002828, "loss": 1.2261, "step": 321152 }, { "epoch": 24.10626641651032, "grad_norm": 0.56673264503479, "learning_rate": 0.002828, "loss": 1.2274, "step": 321216 }, { "epoch": 24.111069418386492, "grad_norm": 0.5843421816825867, "learning_rate": 0.002828, "loss": 1.2324, "step": 321280 }, { "epoch": 24.115872420262665, "grad_norm": 0.5719348192214966, "learning_rate": 0.002828, "loss": 1.2279, "step": 321344 }, { "epoch": 24.120675422138838, "grad_norm": 0.5110663771629333, "learning_rate": 0.002828, "loss": 1.2298, "step": 321408 }, { "epoch": 24.12547842401501, "grad_norm": 0.5339451432228088, "learning_rate": 0.002828, "loss": 1.2315, "step": 321472 }, { "epoch": 24.130281425891184, "grad_norm": 0.574852705001831, "learning_rate": 0.002828, "loss": 1.2359, "step": 321536 }, { "epoch": 24.135084427767353, "grad_norm": 0.5415715575218201, "learning_rate": 0.002828, "loss": 1.2232, "step": 321600 }, { "epoch": 24.139887429643526, "grad_norm": 0.5436941385269165, "learning_rate": 0.002828, "loss": 1.2306, "step": 321664 }, { "epoch": 24.1446904315197, "grad_norm": 0.6057150959968567, "learning_rate": 0.002828, "loss": 1.2322, "step": 321728 }, { "epoch": 24.14949343339587, "grad_norm": 0.5295004844665527, "learning_rate": 0.002828, "loss": 1.2261, "step": 321792 }, { "epoch": 24.154296435272045, "grad_norm": 0.573469340801239, "learning_rate": 0.002828, "loss": 1.23, "step": 321856 }, { "epoch": 24.159099437148218, "grad_norm": 0.5898007750511169, "learning_rate": 0.002828, "loss": 1.2311, "step": 321920 }, { "epoch": 24.16390243902439, "grad_norm": 0.6019816994667053, "learning_rate": 0.002828, "loss": 1.233, "step": 321984 }, { "epoch": 24.168705440900563, "grad_norm": 0.7136012315750122, "learning_rate": 0.002828, "loss": 1.2301, "step": 322048 }, { "epoch": 24.173508442776736, "grad_norm": 0.7115374803543091, "learning_rate": 0.002828, "loss": 1.2339, "step": 322112 }, { "epoch": 24.17831144465291, "grad_norm": 0.6407468914985657, "learning_rate": 0.002828, "loss": 1.2349, "step": 322176 }, { "epoch": 24.183114446529082, "grad_norm": 0.718950629234314, "learning_rate": 0.002828, "loss": 1.227, "step": 322240 }, { "epoch": 24.187917448405255, "grad_norm": 0.4775286912918091, "learning_rate": 0.002828, "loss": 1.2332, "step": 322304 }, { "epoch": 24.192720450281424, "grad_norm": 0.6076687574386597, "learning_rate": 0.002828, "loss": 1.2331, "step": 322368 }, { "epoch": 24.197523452157597, "grad_norm": 0.5814255475997925, "learning_rate": 0.002828, "loss": 1.2329, "step": 322432 }, { "epoch": 24.20232645403377, "grad_norm": 0.5358974933624268, "learning_rate": 0.002828, "loss": 1.2366, "step": 322496 }, { "epoch": 24.207129455909943, "grad_norm": 0.5103739500045776, "learning_rate": 0.002828, "loss": 1.23, "step": 322560 }, { "epoch": 24.211932457786116, "grad_norm": 0.570276141166687, "learning_rate": 0.002828, "loss": 1.2315, "step": 322624 }, { "epoch": 24.21673545966229, "grad_norm": 0.6150188446044922, "learning_rate": 0.002828, "loss": 1.2246, "step": 322688 }, { "epoch": 24.22153846153846, "grad_norm": 0.4611063301563263, "learning_rate": 0.002828, "loss": 1.2341, "step": 322752 }, { "epoch": 24.226341463414634, "grad_norm": 0.6687694191932678, "learning_rate": 0.002828, "loss": 1.227, "step": 322816 }, { "epoch": 24.231144465290807, "grad_norm": 0.522638201713562, "learning_rate": 0.002828, "loss": 1.235, "step": 322880 }, { "epoch": 24.23594746716698, "grad_norm": 0.6681767106056213, "learning_rate": 0.002828, "loss": 1.2383, "step": 322944 }, { "epoch": 24.240750469043153, "grad_norm": 0.6990424394607544, "learning_rate": 0.002828, "loss": 1.232, "step": 323008 }, { "epoch": 24.245553470919326, "grad_norm": 0.635747492313385, "learning_rate": 0.002828, "loss": 1.2327, "step": 323072 }, { "epoch": 24.2503564727955, "grad_norm": 0.603436291217804, "learning_rate": 0.002828, "loss": 1.238, "step": 323136 }, { "epoch": 24.255159474671668, "grad_norm": 0.603233277797699, "learning_rate": 0.002828, "loss": 1.23, "step": 323200 }, { "epoch": 24.25996247654784, "grad_norm": 0.5681001543998718, "learning_rate": 0.002828, "loss": 1.2315, "step": 323264 }, { "epoch": 24.264765478424014, "grad_norm": 0.5758222937583923, "learning_rate": 0.002828, "loss": 1.2319, "step": 323328 }, { "epoch": 24.269568480300187, "grad_norm": 0.6049182415008545, "learning_rate": 0.002828, "loss": 1.2345, "step": 323392 }, { "epoch": 24.27437148217636, "grad_norm": 0.5548405051231384, "learning_rate": 0.002828, "loss": 1.2415, "step": 323456 }, { "epoch": 24.279174484052533, "grad_norm": 0.48350638151168823, "learning_rate": 0.002828, "loss": 1.2378, "step": 323520 }, { "epoch": 24.283977485928705, "grad_norm": 0.5970533490180969, "learning_rate": 0.002828, "loss": 1.2375, "step": 323584 }, { "epoch": 24.28878048780488, "grad_norm": 0.5122383236885071, "learning_rate": 0.002828, "loss": 1.2393, "step": 323648 }, { "epoch": 24.29358348968105, "grad_norm": 0.5411232113838196, "learning_rate": 0.002828, "loss": 1.2321, "step": 323712 }, { "epoch": 24.298386491557224, "grad_norm": 0.6688981056213379, "learning_rate": 0.002828, "loss": 1.2389, "step": 323776 }, { "epoch": 24.303189493433397, "grad_norm": 0.594946563243866, "learning_rate": 0.002828, "loss": 1.2364, "step": 323840 }, { "epoch": 24.30799249530957, "grad_norm": 0.5973834991455078, "learning_rate": 0.002828, "loss": 1.2354, "step": 323904 }, { "epoch": 24.312795497185743, "grad_norm": 0.6772333383560181, "learning_rate": 0.002828, "loss": 1.2408, "step": 323968 }, { "epoch": 24.317598499061912, "grad_norm": 0.5157610774040222, "learning_rate": 0.002828, "loss": 1.2302, "step": 324032 }, { "epoch": 24.322401500938085, "grad_norm": 0.6570464968681335, "learning_rate": 0.002828, "loss": 1.235, "step": 324096 }, { "epoch": 24.327204502814258, "grad_norm": 0.6352835297584534, "learning_rate": 0.002828, "loss": 1.2415, "step": 324160 }, { "epoch": 24.33200750469043, "grad_norm": 0.504051148891449, "learning_rate": 0.002828, "loss": 1.2316, "step": 324224 }, { "epoch": 24.336810506566604, "grad_norm": 0.5722164511680603, "learning_rate": 0.002828, "loss": 1.2435, "step": 324288 }, { "epoch": 24.341613508442776, "grad_norm": 0.6468031406402588, "learning_rate": 0.002828, "loss": 1.2351, "step": 324352 }, { "epoch": 24.34641651031895, "grad_norm": 0.4418530762195587, "learning_rate": 0.002828, "loss": 1.2363, "step": 324416 }, { "epoch": 24.351219512195122, "grad_norm": 0.49656835198402405, "learning_rate": 0.002828, "loss": 1.2377, "step": 324480 }, { "epoch": 24.356022514071295, "grad_norm": 0.6155210733413696, "learning_rate": 0.002828, "loss": 1.2396, "step": 324544 }, { "epoch": 24.360825515947468, "grad_norm": 0.49608752131462097, "learning_rate": 0.002828, "loss": 1.24, "step": 324608 }, { "epoch": 24.36562851782364, "grad_norm": 0.46983015537261963, "learning_rate": 0.002828, "loss": 1.2425, "step": 324672 }, { "epoch": 24.370431519699814, "grad_norm": 0.6393495798110962, "learning_rate": 0.002828, "loss": 1.2359, "step": 324736 }, { "epoch": 24.375234521575987, "grad_norm": 0.6129648089408875, "learning_rate": 0.002828, "loss": 1.238, "step": 324800 }, { "epoch": 24.380037523452156, "grad_norm": 0.7626722455024719, "learning_rate": 0.002828, "loss": 1.2364, "step": 324864 }, { "epoch": 24.38484052532833, "grad_norm": 0.4975043535232544, "learning_rate": 0.002828, "loss": 1.2371, "step": 324928 }, { "epoch": 24.389643527204502, "grad_norm": 0.6254850625991821, "learning_rate": 0.002828, "loss": 1.2428, "step": 324992 }, { "epoch": 24.394446529080675, "grad_norm": 0.5704091787338257, "learning_rate": 0.002828, "loss": 1.2426, "step": 325056 }, { "epoch": 24.399249530956848, "grad_norm": 0.5449159145355225, "learning_rate": 0.002828, "loss": 1.2353, "step": 325120 }, { "epoch": 24.40405253283302, "grad_norm": 0.5547662377357483, "learning_rate": 0.002828, "loss": 1.2367, "step": 325184 }, { "epoch": 24.408855534709193, "grad_norm": 0.7620545029640198, "learning_rate": 0.002828, "loss": 1.2379, "step": 325248 }, { "epoch": 24.413658536585366, "grad_norm": 0.5453926920890808, "learning_rate": 0.002828, "loss": 1.2407, "step": 325312 }, { "epoch": 24.41846153846154, "grad_norm": 0.6322466135025024, "learning_rate": 0.002828, "loss": 1.2394, "step": 325376 }, { "epoch": 24.423264540337712, "grad_norm": 0.47954753041267395, "learning_rate": 0.002828, "loss": 1.2482, "step": 325440 }, { "epoch": 24.428067542213885, "grad_norm": 0.5336704850196838, "learning_rate": 0.002828, "loss": 1.2374, "step": 325504 }, { "epoch": 24.432870544090058, "grad_norm": 0.6320717334747314, "learning_rate": 0.002828, "loss": 1.2398, "step": 325568 }, { "epoch": 24.437673545966227, "grad_norm": 0.540543258190155, "learning_rate": 0.002828, "loss": 1.2385, "step": 325632 }, { "epoch": 24.4424765478424, "grad_norm": 0.5124441981315613, "learning_rate": 0.002828, "loss": 1.2398, "step": 325696 }, { "epoch": 24.447279549718573, "grad_norm": 0.6076022386550903, "learning_rate": 0.002828, "loss": 1.2363, "step": 325760 }, { "epoch": 24.452082551594746, "grad_norm": 0.4827095568180084, "learning_rate": 0.002828, "loss": 1.2444, "step": 325824 }, { "epoch": 24.45688555347092, "grad_norm": 0.6274040937423706, "learning_rate": 0.002828, "loss": 1.2423, "step": 325888 }, { "epoch": 24.46168855534709, "grad_norm": 0.49718886613845825, "learning_rate": 0.002828, "loss": 1.2441, "step": 325952 }, { "epoch": 24.466491557223264, "grad_norm": 0.5871378779411316, "learning_rate": 0.002828, "loss": 1.2401, "step": 326016 }, { "epoch": 24.471294559099437, "grad_norm": 0.6377558708190918, "learning_rate": 0.002828, "loss": 1.2387, "step": 326080 }, { "epoch": 24.47609756097561, "grad_norm": 0.6675707697868347, "learning_rate": 0.002828, "loss": 1.2369, "step": 326144 }, { "epoch": 24.480900562851783, "grad_norm": 0.5000851154327393, "learning_rate": 0.002828, "loss": 1.2403, "step": 326208 }, { "epoch": 24.485703564727956, "grad_norm": 0.6159429550170898, "learning_rate": 0.002828, "loss": 1.2411, "step": 326272 }, { "epoch": 24.49050656660413, "grad_norm": 0.5147876143455505, "learning_rate": 0.002828, "loss": 1.2425, "step": 326336 }, { "epoch": 24.4953095684803, "grad_norm": 0.625784695148468, "learning_rate": 0.002828, "loss": 1.2449, "step": 326400 }, { "epoch": 24.50011257035647, "grad_norm": 0.5830493569374084, "learning_rate": 0.002828, "loss": 1.2395, "step": 326464 }, { "epoch": 24.504915572232644, "grad_norm": 0.5036903023719788, "learning_rate": 0.002828, "loss": 1.237, "step": 326528 }, { "epoch": 24.509718574108817, "grad_norm": 0.5821319222450256, "learning_rate": 0.002828, "loss": 1.2448, "step": 326592 }, { "epoch": 24.51452157598499, "grad_norm": 0.4889194667339325, "learning_rate": 0.002828, "loss": 1.2449, "step": 326656 }, { "epoch": 24.519324577861163, "grad_norm": 0.5718082189559937, "learning_rate": 0.002828, "loss": 1.2452, "step": 326720 }, { "epoch": 24.524127579737335, "grad_norm": 0.5081624984741211, "learning_rate": 0.002828, "loss": 1.24, "step": 326784 }, { "epoch": 24.52893058161351, "grad_norm": 0.5082750916481018, "learning_rate": 0.002828, "loss": 1.2424, "step": 326848 }, { "epoch": 24.53373358348968, "grad_norm": 0.592952311038971, "learning_rate": 0.002828, "loss": 1.2394, "step": 326912 }, { "epoch": 24.538536585365854, "grad_norm": 0.6645016670227051, "learning_rate": 0.002828, "loss": 1.2419, "step": 326976 }, { "epoch": 24.543339587242027, "grad_norm": 0.5564192533493042, "learning_rate": 0.002828, "loss": 1.2457, "step": 327040 }, { "epoch": 24.5481425891182, "grad_norm": 0.6197208166122437, "learning_rate": 0.002828, "loss": 1.2441, "step": 327104 }, { "epoch": 24.552945590994373, "grad_norm": 0.6238077878952026, "learning_rate": 0.002828, "loss": 1.2384, "step": 327168 }, { "epoch": 24.557748592870546, "grad_norm": 0.5843613743782043, "learning_rate": 0.002828, "loss": 1.2416, "step": 327232 }, { "epoch": 24.562551594746715, "grad_norm": 0.6086361408233643, "learning_rate": 0.002828, "loss": 1.2429, "step": 327296 }, { "epoch": 24.567354596622888, "grad_norm": 0.5316898822784424, "learning_rate": 0.002828, "loss": 1.2424, "step": 327360 }, { "epoch": 24.57215759849906, "grad_norm": 0.6011339426040649, "learning_rate": 0.002828, "loss": 1.2401, "step": 327424 }, { "epoch": 24.576960600375234, "grad_norm": 0.6036356687545776, "learning_rate": 0.002828, "loss": 1.2383, "step": 327488 }, { "epoch": 24.581763602251407, "grad_norm": 0.5892400145530701, "learning_rate": 0.002828, "loss": 1.2411, "step": 327552 }, { "epoch": 24.58656660412758, "grad_norm": 0.5821129083633423, "learning_rate": 0.002828, "loss": 1.2485, "step": 327616 }, { "epoch": 24.591369606003752, "grad_norm": 0.570139467716217, "learning_rate": 0.002828, "loss": 1.2372, "step": 327680 }, { "epoch": 24.596172607879925, "grad_norm": 0.54395991563797, "learning_rate": 0.002828, "loss": 1.2416, "step": 327744 }, { "epoch": 24.600975609756098, "grad_norm": 0.5703491568565369, "learning_rate": 0.002828, "loss": 1.2434, "step": 327808 }, { "epoch": 24.60577861163227, "grad_norm": 0.5862521529197693, "learning_rate": 0.002828, "loss": 1.2428, "step": 327872 }, { "epoch": 24.610581613508444, "grad_norm": 0.5634772777557373, "learning_rate": 0.002828, "loss": 1.2417, "step": 327936 }, { "epoch": 24.615384615384617, "grad_norm": 0.5159964561462402, "learning_rate": 0.002828, "loss": 1.2453, "step": 328000 }, { "epoch": 24.62018761726079, "grad_norm": 0.5875333547592163, "learning_rate": 0.002828, "loss": 1.247, "step": 328064 }, { "epoch": 24.62499061913696, "grad_norm": 0.6296870708465576, "learning_rate": 0.002828, "loss": 1.2458, "step": 328128 }, { "epoch": 24.629793621013132, "grad_norm": 0.5884017944335938, "learning_rate": 0.002828, "loss": 1.253, "step": 328192 }, { "epoch": 24.634596622889305, "grad_norm": 0.749247670173645, "learning_rate": 0.002828, "loss": 1.2331, "step": 328256 }, { "epoch": 24.639399624765478, "grad_norm": 0.5577207803726196, "learning_rate": 0.002828, "loss": 1.2441, "step": 328320 }, { "epoch": 24.64420262664165, "grad_norm": 0.6233530044555664, "learning_rate": 0.002828, "loss": 1.2486, "step": 328384 }, { "epoch": 24.649005628517823, "grad_norm": 0.5876480340957642, "learning_rate": 0.002828, "loss": 1.2415, "step": 328448 }, { "epoch": 24.653808630393996, "grad_norm": 0.6091268062591553, "learning_rate": 0.002828, "loss": 1.2382, "step": 328512 }, { "epoch": 24.65861163227017, "grad_norm": 0.5153234004974365, "learning_rate": 0.002828, "loss": 1.246, "step": 328576 }, { "epoch": 24.663414634146342, "grad_norm": 0.6070051193237305, "learning_rate": 0.002828, "loss": 1.249, "step": 328640 }, { "epoch": 24.668217636022515, "grad_norm": 0.6957892179489136, "learning_rate": 0.002828, "loss": 1.2457, "step": 328704 }, { "epoch": 24.673020637898688, "grad_norm": 0.6002076268196106, "learning_rate": 0.002828, "loss": 1.2389, "step": 328768 }, { "epoch": 24.67782363977486, "grad_norm": 0.5522453188896179, "learning_rate": 0.002828, "loss": 1.2462, "step": 328832 }, { "epoch": 24.682626641651034, "grad_norm": 0.6673900485038757, "learning_rate": 0.002828, "loss": 1.2483, "step": 328896 }, { "epoch": 24.687429643527203, "grad_norm": 0.5456265807151794, "learning_rate": 0.002828, "loss": 1.2353, "step": 328960 }, { "epoch": 24.692232645403376, "grad_norm": 0.6234123110771179, "learning_rate": 0.002828, "loss": 1.2536, "step": 329024 }, { "epoch": 24.69703564727955, "grad_norm": 0.5306026339530945, "learning_rate": 0.002828, "loss": 1.2464, "step": 329088 }, { "epoch": 24.70183864915572, "grad_norm": 0.5644907355308533, "learning_rate": 0.002828, "loss": 1.2485, "step": 329152 }, { "epoch": 24.706641651031894, "grad_norm": 0.5596889853477478, "learning_rate": 0.002828, "loss": 1.2375, "step": 329216 }, { "epoch": 24.711444652908067, "grad_norm": 0.5232831835746765, "learning_rate": 0.002828, "loss": 1.2446, "step": 329280 }, { "epoch": 24.71624765478424, "grad_norm": 0.557348906993866, "learning_rate": 0.002828, "loss": 1.25, "step": 329344 }, { "epoch": 24.721050656660413, "grad_norm": 0.5692952871322632, "learning_rate": 0.002828, "loss": 1.2447, "step": 329408 }, { "epoch": 24.725853658536586, "grad_norm": 0.5626512169837952, "learning_rate": 0.002828, "loss": 1.2452, "step": 329472 }, { "epoch": 24.73065666041276, "grad_norm": 0.5778016448020935, "learning_rate": 0.002828, "loss": 1.2429, "step": 329536 }, { "epoch": 24.735459662288932, "grad_norm": 0.6249626278877258, "learning_rate": 0.002828, "loss": 1.2449, "step": 329600 }, { "epoch": 24.740262664165105, "grad_norm": 0.6157994270324707, "learning_rate": 0.002828, "loss": 1.2446, "step": 329664 }, { "epoch": 24.745065666041278, "grad_norm": 0.6171292662620544, "learning_rate": 0.002828, "loss": 1.2384, "step": 329728 }, { "epoch": 24.749868667917447, "grad_norm": 0.5539014935493469, "learning_rate": 0.002828, "loss": 1.2455, "step": 329792 }, { "epoch": 24.75467166979362, "grad_norm": 0.5088152885437012, "learning_rate": 0.002828, "loss": 1.251, "step": 329856 }, { "epoch": 24.759474671669793, "grad_norm": 0.6428780555725098, "learning_rate": 0.002828, "loss": 1.2421, "step": 329920 }, { "epoch": 24.764277673545966, "grad_norm": 0.6033629179000854, "learning_rate": 0.002828, "loss": 1.2461, "step": 329984 }, { "epoch": 24.76908067542214, "grad_norm": 0.6996398568153381, "learning_rate": 0.002828, "loss": 1.2451, "step": 330048 }, { "epoch": 24.77388367729831, "grad_norm": 0.5007488131523132, "learning_rate": 0.002828, "loss": 1.247, "step": 330112 }, { "epoch": 24.778686679174484, "grad_norm": 0.6352538466453552, "learning_rate": 0.002828, "loss": 1.2454, "step": 330176 }, { "epoch": 24.783489681050657, "grad_norm": 0.6288419961929321, "learning_rate": 0.002828, "loss": 1.2469, "step": 330240 }, { "epoch": 24.78829268292683, "grad_norm": 0.5228093266487122, "learning_rate": 0.002828, "loss": 1.2371, "step": 330304 }, { "epoch": 24.793095684803003, "grad_norm": 0.5313441753387451, "learning_rate": 0.002828, "loss": 1.2481, "step": 330368 }, { "epoch": 24.797898686679176, "grad_norm": 0.5033171772956848, "learning_rate": 0.002828, "loss": 1.2441, "step": 330432 }, { "epoch": 24.80270168855535, "grad_norm": 0.6732704043388367, "learning_rate": 0.002828, "loss": 1.2424, "step": 330496 }, { "epoch": 24.80750469043152, "grad_norm": 0.6865556240081787, "learning_rate": 0.002828, "loss": 1.2479, "step": 330560 }, { "epoch": 24.81230769230769, "grad_norm": 0.6815218925476074, "learning_rate": 0.002828, "loss": 1.2452, "step": 330624 }, { "epoch": 24.817110694183864, "grad_norm": 0.5610247254371643, "learning_rate": 0.002828, "loss": 1.2476, "step": 330688 }, { "epoch": 24.821913696060037, "grad_norm": 0.5337264537811279, "learning_rate": 0.002828, "loss": 1.2421, "step": 330752 }, { "epoch": 24.82671669793621, "grad_norm": 0.6259786486625671, "learning_rate": 0.002828, "loss": 1.2493, "step": 330816 }, { "epoch": 24.831519699812382, "grad_norm": 0.5326517820358276, "learning_rate": 0.002828, "loss": 1.2528, "step": 330880 }, { "epoch": 24.836322701688555, "grad_norm": 0.6553822159767151, "learning_rate": 0.002828, "loss": 1.2551, "step": 330944 }, { "epoch": 24.841125703564728, "grad_norm": 0.47039973735809326, "learning_rate": 0.002828, "loss": 1.2456, "step": 331008 }, { "epoch": 24.8459287054409, "grad_norm": 0.5827360153198242, "learning_rate": 0.002828, "loss": 1.2433, "step": 331072 }, { "epoch": 24.850731707317074, "grad_norm": 0.660505473613739, "learning_rate": 0.002828, "loss": 1.2538, "step": 331136 }, { "epoch": 24.855534709193247, "grad_norm": 0.5613062977790833, "learning_rate": 0.002828, "loss": 1.2443, "step": 331200 }, { "epoch": 24.86033771106942, "grad_norm": 0.6975498795509338, "learning_rate": 0.002828, "loss": 1.2495, "step": 331264 }, { "epoch": 24.865140712945593, "grad_norm": 0.6232232451438904, "learning_rate": 0.002828, "loss": 1.2478, "step": 331328 }, { "epoch": 24.869943714821762, "grad_norm": 0.6001636385917664, "learning_rate": 0.002828, "loss": 1.2479, "step": 331392 }, { "epoch": 24.874746716697935, "grad_norm": 0.596153199672699, "learning_rate": 0.002828, "loss": 1.2536, "step": 331456 }, { "epoch": 24.879549718574108, "grad_norm": 0.6497877836227417, "learning_rate": 0.002828, "loss": 1.2482, "step": 331520 }, { "epoch": 24.88435272045028, "grad_norm": 0.5464568734169006, "learning_rate": 0.002828, "loss": 1.2473, "step": 331584 }, { "epoch": 24.889155722326453, "grad_norm": 0.5703956484794617, "learning_rate": 0.002828, "loss": 1.2567, "step": 331648 }, { "epoch": 24.893958724202626, "grad_norm": 0.5373716354370117, "learning_rate": 0.002828, "loss": 1.2563, "step": 331712 }, { "epoch": 24.8987617260788, "grad_norm": 0.5806722640991211, "learning_rate": 0.002828, "loss": 1.2453, "step": 331776 }, { "epoch": 24.903564727954972, "grad_norm": 0.6477004289627075, "learning_rate": 0.002828, "loss": 1.2567, "step": 331840 }, { "epoch": 24.908367729831145, "grad_norm": 0.6425051689147949, "learning_rate": 0.002828, "loss": 1.2456, "step": 331904 }, { "epoch": 24.913170731707318, "grad_norm": 0.46345874667167664, "learning_rate": 0.002828, "loss": 1.2482, "step": 331968 }, { "epoch": 24.91797373358349, "grad_norm": 0.6676378846168518, "learning_rate": 0.002828, "loss": 1.2454, "step": 332032 }, { "epoch": 24.922776735459664, "grad_norm": 0.5868575572967529, "learning_rate": 0.002828, "loss": 1.2476, "step": 332096 }, { "epoch": 24.927579737335837, "grad_norm": 0.5779089331626892, "learning_rate": 0.002828, "loss": 1.25, "step": 332160 }, { "epoch": 24.932382739212006, "grad_norm": 0.5757585763931274, "learning_rate": 0.002828, "loss": 1.2473, "step": 332224 }, { "epoch": 24.93718574108818, "grad_norm": 0.6109635233879089, "learning_rate": 0.002828, "loss": 1.2469, "step": 332288 }, { "epoch": 24.94198874296435, "grad_norm": 0.5922868847846985, "learning_rate": 0.002828, "loss": 1.253, "step": 332352 }, { "epoch": 24.946791744840525, "grad_norm": 0.5897438526153564, "learning_rate": 0.002828, "loss": 1.2477, "step": 332416 }, { "epoch": 24.951594746716697, "grad_norm": 0.54740971326828, "learning_rate": 0.002828, "loss": 1.2475, "step": 332480 }, { "epoch": 24.95639774859287, "grad_norm": 0.6911497712135315, "learning_rate": 0.002828, "loss": 1.2538, "step": 332544 }, { "epoch": 24.961200750469043, "grad_norm": 0.6406940221786499, "learning_rate": 0.002828, "loss": 1.2447, "step": 332608 }, { "epoch": 24.966003752345216, "grad_norm": 0.6405665874481201, "learning_rate": 0.002828, "loss": 1.2461, "step": 332672 }, { "epoch": 24.97080675422139, "grad_norm": 0.6722344160079956, "learning_rate": 0.002828, "loss": 1.2477, "step": 332736 }, { "epoch": 24.975609756097562, "grad_norm": 0.6267404556274414, "learning_rate": 0.002828, "loss": 1.2528, "step": 332800 }, { "epoch": 24.980412757973735, "grad_norm": 0.6228541135787964, "learning_rate": 0.002828, "loss": 1.2497, "step": 332864 }, { "epoch": 24.985215759849908, "grad_norm": 0.5420615673065186, "learning_rate": 0.002828, "loss": 1.2507, "step": 332928 }, { "epoch": 24.99001876172608, "grad_norm": 0.5710049271583557, "learning_rate": 0.002828, "loss": 1.2493, "step": 332992 }, { "epoch": 24.99482176360225, "grad_norm": 0.6155769228935242, "learning_rate": 0.002828, "loss": 1.25, "step": 333056 }, { "epoch": 24.999624765478423, "grad_norm": 0.6931199431419373, "learning_rate": 0.002828, "loss": 1.2476, "step": 333120 }, { "epoch": 25.004427767354596, "grad_norm": 0.5726258158683777, "learning_rate": 0.002828, "loss": 1.215, "step": 333184 }, { "epoch": 25.00923076923077, "grad_norm": 0.6955495476722717, "learning_rate": 0.002828, "loss": 1.2106, "step": 333248 }, { "epoch": 25.01403377110694, "grad_norm": 0.5848275423049927, "learning_rate": 0.002828, "loss": 1.2055, "step": 333312 }, { "epoch": 25.018836772983114, "grad_norm": 0.5955660939216614, "learning_rate": 0.002828, "loss": 1.2115, "step": 333376 }, { "epoch": 25.023639774859287, "grad_norm": 0.7096522450447083, "learning_rate": 0.002828, "loss": 1.2125, "step": 333440 }, { "epoch": 25.02844277673546, "grad_norm": 0.6311249136924744, "learning_rate": 0.002828, "loss": 1.214, "step": 333504 }, { "epoch": 25.033245778611633, "grad_norm": 0.5731686949729919, "learning_rate": 0.002828, "loss": 1.2164, "step": 333568 }, { "epoch": 25.038048780487806, "grad_norm": 0.5741409063339233, "learning_rate": 0.002828, "loss": 1.2083, "step": 333632 }, { "epoch": 25.04285178236398, "grad_norm": 0.6162007451057434, "learning_rate": 0.002828, "loss": 1.2087, "step": 333696 }, { "epoch": 25.04765478424015, "grad_norm": 0.5544477701187134, "learning_rate": 0.002828, "loss": 1.2106, "step": 333760 }, { "epoch": 25.052457786116324, "grad_norm": 0.5872480273246765, "learning_rate": 0.002828, "loss": 1.2141, "step": 333824 }, { "epoch": 25.057260787992494, "grad_norm": 0.575803279876709, "learning_rate": 0.002828, "loss": 1.2121, "step": 333888 }, { "epoch": 25.062063789868667, "grad_norm": 0.5969261527061462, "learning_rate": 0.002828, "loss": 1.2123, "step": 333952 }, { "epoch": 25.06686679174484, "grad_norm": 0.5465327501296997, "learning_rate": 0.002828, "loss": 1.2156, "step": 334016 }, { "epoch": 25.071669793621012, "grad_norm": 0.619968056678772, "learning_rate": 0.002828, "loss": 1.2128, "step": 334080 }, { "epoch": 25.076472795497185, "grad_norm": 0.5793817639350891, "learning_rate": 0.002828, "loss": 1.2147, "step": 334144 }, { "epoch": 25.081275797373358, "grad_norm": 0.6986529231071472, "learning_rate": 0.002828, "loss": 1.216, "step": 334208 }, { "epoch": 25.08607879924953, "grad_norm": 0.657088041305542, "learning_rate": 0.002828, "loss": 1.2194, "step": 334272 }, { "epoch": 25.090881801125704, "grad_norm": 0.5881126523017883, "learning_rate": 0.002828, "loss": 1.2164, "step": 334336 }, { "epoch": 25.095684803001877, "grad_norm": 0.7006486654281616, "learning_rate": 0.002828, "loss": 1.2176, "step": 334400 }, { "epoch": 25.10048780487805, "grad_norm": 0.659379243850708, "learning_rate": 0.002828, "loss": 1.2179, "step": 334464 }, { "epoch": 25.105290806754223, "grad_norm": 0.6826364994049072, "learning_rate": 0.002828, "loss": 1.2147, "step": 334528 }, { "epoch": 25.110093808630396, "grad_norm": 0.48427775502204895, "learning_rate": 0.002828, "loss": 1.2108, "step": 334592 }, { "epoch": 25.114896810506565, "grad_norm": 0.6580747961997986, "learning_rate": 0.002828, "loss": 1.216, "step": 334656 }, { "epoch": 25.119699812382738, "grad_norm": 0.639634370803833, "learning_rate": 0.002828, "loss": 1.2165, "step": 334720 }, { "epoch": 25.12450281425891, "grad_norm": 0.6104490160942078, "learning_rate": 0.002828, "loss": 1.2168, "step": 334784 }, { "epoch": 25.129305816135084, "grad_norm": 0.4889427423477173, "learning_rate": 0.002828, "loss": 1.2198, "step": 334848 }, { "epoch": 25.134108818011256, "grad_norm": 0.722379207611084, "learning_rate": 0.002828, "loss": 1.2154, "step": 334912 }, { "epoch": 25.13891181988743, "grad_norm": 0.6009499430656433, "learning_rate": 0.002828, "loss": 1.2233, "step": 334976 }, { "epoch": 25.143714821763602, "grad_norm": 0.6094868183135986, "learning_rate": 0.002828, "loss": 1.2134, "step": 335040 }, { "epoch": 25.148517823639775, "grad_norm": 0.5987445712089539, "learning_rate": 0.002828, "loss": 1.2195, "step": 335104 }, { "epoch": 25.153320825515948, "grad_norm": 0.5814436674118042, "learning_rate": 0.002828, "loss": 1.2146, "step": 335168 }, { "epoch": 25.15812382739212, "grad_norm": 0.7082455158233643, "learning_rate": 0.002828, "loss": 1.2162, "step": 335232 }, { "epoch": 25.162926829268294, "grad_norm": 0.6125720143318176, "learning_rate": 0.002828, "loss": 1.2214, "step": 335296 }, { "epoch": 25.167729831144467, "grad_norm": 0.715304970741272, "learning_rate": 0.002828, "loss": 1.2233, "step": 335360 }, { "epoch": 25.17253283302064, "grad_norm": 0.5754349231719971, "learning_rate": 0.002828, "loss": 1.2147, "step": 335424 }, { "epoch": 25.17733583489681, "grad_norm": 0.5818027853965759, "learning_rate": 0.002828, "loss": 1.2178, "step": 335488 }, { "epoch": 25.18213883677298, "grad_norm": 0.5796423554420471, "learning_rate": 0.002828, "loss": 1.2182, "step": 335552 }, { "epoch": 25.186941838649155, "grad_norm": 0.5709074139595032, "learning_rate": 0.002828, "loss": 1.2223, "step": 335616 }, { "epoch": 25.191744840525327, "grad_norm": 0.5902493596076965, "learning_rate": 0.002828, "loss": 1.222, "step": 335680 }, { "epoch": 25.1965478424015, "grad_norm": 0.6953119039535522, "learning_rate": 0.002828, "loss": 1.2207, "step": 335744 }, { "epoch": 25.201350844277673, "grad_norm": 0.6451302766799927, "learning_rate": 0.002828, "loss": 1.22, "step": 335808 }, { "epoch": 25.206153846153846, "grad_norm": 0.6847954392433167, "learning_rate": 0.002828, "loss": 1.2209, "step": 335872 }, { "epoch": 25.21095684803002, "grad_norm": 0.5568794012069702, "learning_rate": 0.002828, "loss": 1.2244, "step": 335936 }, { "epoch": 25.215759849906192, "grad_norm": 0.4961014986038208, "learning_rate": 0.002828, "loss": 1.2113, "step": 336000 }, { "epoch": 25.220562851782365, "grad_norm": 0.5914636850357056, "learning_rate": 0.002828, "loss": 1.2225, "step": 336064 }, { "epoch": 25.225365853658538, "grad_norm": 0.7324196100234985, "learning_rate": 0.002828, "loss": 1.2229, "step": 336128 }, { "epoch": 25.23016885553471, "grad_norm": 0.5581563711166382, "learning_rate": 0.002828, "loss": 1.2198, "step": 336192 }, { "epoch": 25.234971857410883, "grad_norm": 0.5884627103805542, "learning_rate": 0.002828, "loss": 1.2228, "step": 336256 }, { "epoch": 25.239774859287053, "grad_norm": 0.6624916195869446, "learning_rate": 0.002828, "loss": 1.2297, "step": 336320 }, { "epoch": 25.244577861163226, "grad_norm": 0.6953412890434265, "learning_rate": 0.002828, "loss": 1.2244, "step": 336384 }, { "epoch": 25.2493808630394, "grad_norm": 0.5076711773872375, "learning_rate": 0.002828, "loss": 1.2222, "step": 336448 }, { "epoch": 25.25418386491557, "grad_norm": 0.6712566614151001, "learning_rate": 0.002828, "loss": 1.2276, "step": 336512 }, { "epoch": 25.258986866791744, "grad_norm": 0.5484094023704529, "learning_rate": 0.002828, "loss": 1.2217, "step": 336576 }, { "epoch": 25.263789868667917, "grad_norm": 0.5387206673622131, "learning_rate": 0.002828, "loss": 1.2231, "step": 336640 }, { "epoch": 25.26859287054409, "grad_norm": 0.47983431816101074, "learning_rate": 0.002828, "loss": 1.2184, "step": 336704 }, { "epoch": 25.273395872420263, "grad_norm": 0.6589762568473816, "learning_rate": 0.002828, "loss": 1.2182, "step": 336768 }, { "epoch": 25.278198874296436, "grad_norm": 0.7007054686546326, "learning_rate": 0.002828, "loss": 1.2193, "step": 336832 }, { "epoch": 25.28300187617261, "grad_norm": 0.6451464891433716, "learning_rate": 0.002828, "loss": 1.2238, "step": 336896 }, { "epoch": 25.28780487804878, "grad_norm": 0.5933736562728882, "learning_rate": 0.002828, "loss": 1.2259, "step": 336960 }, { "epoch": 25.292607879924955, "grad_norm": 0.5318650603294373, "learning_rate": 0.002828, "loss": 1.2267, "step": 337024 }, { "epoch": 25.297410881801127, "grad_norm": 0.5615575313568115, "learning_rate": 0.002828, "loss": 1.2262, "step": 337088 }, { "epoch": 25.302213883677297, "grad_norm": 0.5231067538261414, "learning_rate": 0.002828, "loss": 1.2302, "step": 337152 }, { "epoch": 25.30701688555347, "grad_norm": 0.5721030235290527, "learning_rate": 0.002828, "loss": 1.2228, "step": 337216 }, { "epoch": 25.311819887429643, "grad_norm": 0.8417955040931702, "learning_rate": 0.002828, "loss": 1.219, "step": 337280 }, { "epoch": 25.316622889305815, "grad_norm": 0.6137963533401489, "learning_rate": 0.002828, "loss": 1.2214, "step": 337344 }, { "epoch": 25.32142589118199, "grad_norm": 0.6264809370040894, "learning_rate": 0.002828, "loss": 1.2256, "step": 337408 }, { "epoch": 25.32622889305816, "grad_norm": 0.561613142490387, "learning_rate": 0.002828, "loss": 1.2259, "step": 337472 }, { "epoch": 25.331031894934334, "grad_norm": 0.5886028409004211, "learning_rate": 0.002828, "loss": 1.2226, "step": 337536 }, { "epoch": 25.335834896810507, "grad_norm": 0.6436465382575989, "learning_rate": 0.002828, "loss": 1.2268, "step": 337600 }, { "epoch": 25.34063789868668, "grad_norm": 0.6189551949501038, "learning_rate": 0.002828, "loss": 1.2292, "step": 337664 }, { "epoch": 25.345440900562853, "grad_norm": 0.5378521680831909, "learning_rate": 0.002828, "loss": 1.2247, "step": 337728 }, { "epoch": 25.350243902439026, "grad_norm": 0.5442854762077332, "learning_rate": 0.002828, "loss": 1.2267, "step": 337792 }, { "epoch": 25.3550469043152, "grad_norm": 0.5752109885215759, "learning_rate": 0.002828, "loss": 1.2325, "step": 337856 }, { "epoch": 25.35984990619137, "grad_norm": 0.5554222464561462, "learning_rate": 0.002828, "loss": 1.2236, "step": 337920 }, { "epoch": 25.36465290806754, "grad_norm": 0.675406813621521, "learning_rate": 0.002828, "loss": 1.2296, "step": 337984 }, { "epoch": 25.369455909943714, "grad_norm": 0.5926669239997864, "learning_rate": 0.002828, "loss": 1.221, "step": 338048 }, { "epoch": 25.374258911819886, "grad_norm": 0.4972551763057709, "learning_rate": 0.002828, "loss": 1.2233, "step": 338112 }, { "epoch": 25.37906191369606, "grad_norm": 0.6431121230125427, "learning_rate": 0.002828, "loss": 1.2251, "step": 338176 }, { "epoch": 25.383864915572232, "grad_norm": 0.5158210396766663, "learning_rate": 0.002828, "loss": 1.2228, "step": 338240 }, { "epoch": 25.388667917448405, "grad_norm": 0.5621346235275269, "learning_rate": 0.002828, "loss": 1.2302, "step": 338304 }, { "epoch": 25.393470919324578, "grad_norm": 0.6656622886657715, "learning_rate": 0.002828, "loss": 1.2243, "step": 338368 }, { "epoch": 25.39827392120075, "grad_norm": 0.5919707417488098, "learning_rate": 0.002828, "loss": 1.2254, "step": 338432 }, { "epoch": 25.403076923076924, "grad_norm": 0.5798816084861755, "learning_rate": 0.002828, "loss": 1.225, "step": 338496 }, { "epoch": 25.407879924953097, "grad_norm": 0.6159927248954773, "learning_rate": 0.002828, "loss": 1.2238, "step": 338560 }, { "epoch": 25.41268292682927, "grad_norm": 0.6071495413780212, "learning_rate": 0.002828, "loss": 1.2212, "step": 338624 }, { "epoch": 25.417485928705442, "grad_norm": 0.4696201682090759, "learning_rate": 0.002828, "loss": 1.2276, "step": 338688 }, { "epoch": 25.42228893058161, "grad_norm": 0.6017554402351379, "learning_rate": 0.002828, "loss": 1.2243, "step": 338752 }, { "epoch": 25.427091932457785, "grad_norm": 0.532129168510437, "learning_rate": 0.002828, "loss": 1.221, "step": 338816 }, { "epoch": 25.431894934333958, "grad_norm": 0.5939502120018005, "learning_rate": 0.002828, "loss": 1.2281, "step": 338880 }, { "epoch": 25.43669793621013, "grad_norm": 0.5088008046150208, "learning_rate": 0.002828, "loss": 1.2344, "step": 338944 }, { "epoch": 25.441500938086303, "grad_norm": 0.7171235680580139, "learning_rate": 0.002828, "loss": 1.2259, "step": 339008 }, { "epoch": 25.446303939962476, "grad_norm": 0.6164196133613586, "learning_rate": 0.002828, "loss": 1.2281, "step": 339072 }, { "epoch": 25.45110694183865, "grad_norm": 0.6111053228378296, "learning_rate": 0.002828, "loss": 1.2263, "step": 339136 }, { "epoch": 25.455909943714822, "grad_norm": 0.6192201375961304, "learning_rate": 0.002828, "loss": 1.23, "step": 339200 }, { "epoch": 25.460712945590995, "grad_norm": 0.7079026699066162, "learning_rate": 0.002828, "loss": 1.2239, "step": 339264 }, { "epoch": 25.465515947467168, "grad_norm": 0.630245566368103, "learning_rate": 0.002828, "loss": 1.2308, "step": 339328 }, { "epoch": 25.47031894934334, "grad_norm": 0.6225885152816772, "learning_rate": 0.002828, "loss": 1.2343, "step": 339392 }, { "epoch": 25.475121951219514, "grad_norm": 0.779857873916626, "learning_rate": 0.002828, "loss": 1.231, "step": 339456 }, { "epoch": 25.479924953095686, "grad_norm": 0.534664511680603, "learning_rate": 0.002828, "loss": 1.2294, "step": 339520 }, { "epoch": 25.484727954971856, "grad_norm": 0.6669053435325623, "learning_rate": 0.002828, "loss": 1.2285, "step": 339584 }, { "epoch": 25.48953095684803, "grad_norm": 0.6443493366241455, "learning_rate": 0.002828, "loss": 1.2205, "step": 339648 }, { "epoch": 25.4943339587242, "grad_norm": 0.5567485690116882, "learning_rate": 0.002828, "loss": 1.224, "step": 339712 }, { "epoch": 25.499136960600374, "grad_norm": 0.6056983470916748, "learning_rate": 0.002828, "loss": 1.2273, "step": 339776 }, { "epoch": 25.503939962476547, "grad_norm": 0.6651111245155334, "learning_rate": 0.002828, "loss": 1.2341, "step": 339840 }, { "epoch": 25.50874296435272, "grad_norm": 0.6043602824211121, "learning_rate": 0.002828, "loss": 1.2263, "step": 339904 }, { "epoch": 25.513545966228893, "grad_norm": 0.49276939034461975, "learning_rate": 0.002828, "loss": 1.2311, "step": 339968 }, { "epoch": 25.518348968105066, "grad_norm": 0.6172980070114136, "learning_rate": 0.002828, "loss": 1.2292, "step": 340032 }, { "epoch": 25.52315196998124, "grad_norm": 0.6005577445030212, "learning_rate": 0.002828, "loss": 1.2288, "step": 340096 }, { "epoch": 25.52795497185741, "grad_norm": 0.6682016253471375, "learning_rate": 0.002828, "loss": 1.2272, "step": 340160 }, { "epoch": 25.532757973733585, "grad_norm": 0.5602551698684692, "learning_rate": 0.002828, "loss": 1.2324, "step": 340224 }, { "epoch": 25.537560975609757, "grad_norm": 0.5643219351768494, "learning_rate": 0.002828, "loss": 1.2286, "step": 340288 }, { "epoch": 25.54236397748593, "grad_norm": 0.5434116721153259, "learning_rate": 0.002828, "loss": 1.2283, "step": 340352 }, { "epoch": 25.5471669793621, "grad_norm": 0.7124276161193848, "learning_rate": 0.002828, "loss": 1.2331, "step": 340416 }, { "epoch": 25.551969981238273, "grad_norm": 0.5400499105453491, "learning_rate": 0.002828, "loss": 1.2309, "step": 340480 }, { "epoch": 25.556772983114445, "grad_norm": 0.5698312520980835, "learning_rate": 0.002828, "loss": 1.2292, "step": 340544 }, { "epoch": 25.56157598499062, "grad_norm": 0.523460865020752, "learning_rate": 0.002828, "loss": 1.2223, "step": 340608 }, { "epoch": 25.56637898686679, "grad_norm": 0.5728746056556702, "learning_rate": 0.002828, "loss": 1.2371, "step": 340672 }, { "epoch": 25.571181988742964, "grad_norm": 0.553048849105835, "learning_rate": 0.002828, "loss": 1.2279, "step": 340736 }, { "epoch": 25.575984990619137, "grad_norm": 0.6705506443977356, "learning_rate": 0.002828, "loss": 1.2263, "step": 340800 }, { "epoch": 25.58078799249531, "grad_norm": 0.6559802293777466, "learning_rate": 0.002828, "loss": 1.2218, "step": 340864 }, { "epoch": 25.585590994371483, "grad_norm": 0.5637137293815613, "learning_rate": 0.002828, "loss": 1.2322, "step": 340928 }, { "epoch": 25.590393996247656, "grad_norm": 0.6659891605377197, "learning_rate": 0.002828, "loss": 1.2308, "step": 340992 }, { "epoch": 25.59519699812383, "grad_norm": 0.622877836227417, "learning_rate": 0.002828, "loss": 1.233, "step": 341056 }, { "epoch": 25.6, "grad_norm": 0.5427113771438599, "learning_rate": 0.002828, "loss": 1.2264, "step": 341120 }, { "epoch": 25.604803001876174, "grad_norm": 0.5061426758766174, "learning_rate": 0.002828, "loss": 1.2317, "step": 341184 }, { "epoch": 25.609606003752344, "grad_norm": 0.5866330862045288, "learning_rate": 0.002828, "loss": 1.2317, "step": 341248 }, { "epoch": 25.614409005628517, "grad_norm": 0.5532949566841125, "learning_rate": 0.002828, "loss": 1.2328, "step": 341312 }, { "epoch": 25.61921200750469, "grad_norm": 0.6040782332420349, "learning_rate": 0.002828, "loss": 1.2357, "step": 341376 }, { "epoch": 25.624015009380862, "grad_norm": 0.6957950592041016, "learning_rate": 0.002828, "loss": 1.2309, "step": 341440 }, { "epoch": 25.628818011257035, "grad_norm": 0.6486824750900269, "learning_rate": 0.002828, "loss": 1.2275, "step": 341504 }, { "epoch": 25.633621013133208, "grad_norm": 0.6482639908790588, "learning_rate": 0.002828, "loss": 1.2311, "step": 341568 }, { "epoch": 25.63842401500938, "grad_norm": 0.5687380433082581, "learning_rate": 0.002828, "loss": 1.2278, "step": 341632 }, { "epoch": 25.643227016885554, "grad_norm": 0.511089026927948, "learning_rate": 0.002828, "loss": 1.2295, "step": 341696 }, { "epoch": 25.648030018761727, "grad_norm": 0.6156114935874939, "learning_rate": 0.002828, "loss": 1.2332, "step": 341760 }, { "epoch": 25.6528330206379, "grad_norm": 0.5956178903579712, "learning_rate": 0.002828, "loss": 1.2286, "step": 341824 }, { "epoch": 25.657636022514072, "grad_norm": 0.5628892183303833, "learning_rate": 0.002828, "loss": 1.2298, "step": 341888 }, { "epoch": 25.662439024390245, "grad_norm": 0.6388689279556274, "learning_rate": 0.002828, "loss": 1.2276, "step": 341952 }, { "epoch": 25.667242026266415, "grad_norm": 0.577284038066864, "learning_rate": 0.002828, "loss": 1.2365, "step": 342016 }, { "epoch": 25.672045028142588, "grad_norm": 0.6047957539558411, "learning_rate": 0.002828, "loss": 1.232, "step": 342080 }, { "epoch": 25.67684803001876, "grad_norm": 0.6236744523048401, "learning_rate": 0.002828, "loss": 1.2375, "step": 342144 }, { "epoch": 25.681651031894933, "grad_norm": 0.6257603764533997, "learning_rate": 0.002828, "loss": 1.2312, "step": 342208 }, { "epoch": 25.686454033771106, "grad_norm": 0.5153629183769226, "learning_rate": 0.002828, "loss": 1.231, "step": 342272 }, { "epoch": 25.69125703564728, "grad_norm": 0.6125085949897766, "learning_rate": 0.002828, "loss": 1.2329, "step": 342336 }, { "epoch": 25.696060037523452, "grad_norm": 0.6297896504402161, "learning_rate": 0.002828, "loss": 1.2385, "step": 342400 }, { "epoch": 25.700863039399625, "grad_norm": 0.6260985732078552, "learning_rate": 0.002828, "loss": 1.2366, "step": 342464 }, { "epoch": 25.705666041275798, "grad_norm": 0.5179916620254517, "learning_rate": 0.002828, "loss": 1.2289, "step": 342528 }, { "epoch": 25.71046904315197, "grad_norm": 0.63902747631073, "learning_rate": 0.002828, "loss": 1.2319, "step": 342592 }, { "epoch": 25.715272045028144, "grad_norm": 0.5296642184257507, "learning_rate": 0.002828, "loss": 1.2257, "step": 342656 }, { "epoch": 25.720075046904316, "grad_norm": 0.5687287449836731, "learning_rate": 0.002828, "loss": 1.2313, "step": 342720 }, { "epoch": 25.72487804878049, "grad_norm": 0.5681202411651611, "learning_rate": 0.002828, "loss": 1.2356, "step": 342784 }, { "epoch": 25.72968105065666, "grad_norm": 0.6682190895080566, "learning_rate": 0.002828, "loss": 1.2351, "step": 342848 }, { "epoch": 25.73448405253283, "grad_norm": 0.6136249303817749, "learning_rate": 0.002828, "loss": 1.2342, "step": 342912 }, { "epoch": 25.739287054409004, "grad_norm": 0.6873012781143188, "learning_rate": 0.002828, "loss": 1.2331, "step": 342976 }, { "epoch": 25.744090056285177, "grad_norm": 0.5340631008148193, "learning_rate": 0.002828, "loss": 1.2297, "step": 343040 }, { "epoch": 25.74889305816135, "grad_norm": 0.698490560054779, "learning_rate": 0.002828, "loss": 1.2368, "step": 343104 }, { "epoch": 25.753696060037523, "grad_norm": 0.6508604884147644, "learning_rate": 0.002828, "loss": 1.2317, "step": 343168 }, { "epoch": 25.758499061913696, "grad_norm": 0.6807830333709717, "learning_rate": 0.002828, "loss": 1.2306, "step": 343232 }, { "epoch": 25.76330206378987, "grad_norm": 0.6143683195114136, "learning_rate": 0.002828, "loss": 1.2398, "step": 343296 }, { "epoch": 25.76810506566604, "grad_norm": 0.7183651924133301, "learning_rate": 0.002828, "loss": 1.2347, "step": 343360 }, { "epoch": 25.772908067542215, "grad_norm": 0.5654355883598328, "learning_rate": 0.002828, "loss": 1.2409, "step": 343424 }, { "epoch": 25.777711069418388, "grad_norm": 0.5893970727920532, "learning_rate": 0.002828, "loss": 1.2288, "step": 343488 }, { "epoch": 25.78251407129456, "grad_norm": 0.688660204410553, "learning_rate": 0.002828, "loss": 1.2375, "step": 343552 }, { "epoch": 25.787317073170733, "grad_norm": 0.5644953846931458, "learning_rate": 0.002828, "loss": 1.2311, "step": 343616 }, { "epoch": 25.792120075046903, "grad_norm": 0.6274914145469666, "learning_rate": 0.002828, "loss": 1.2347, "step": 343680 }, { "epoch": 25.796923076923076, "grad_norm": 0.48792901635169983, "learning_rate": 0.002828, "loss": 1.2397, "step": 343744 }, { "epoch": 25.80172607879925, "grad_norm": 0.6912416219711304, "learning_rate": 0.002828, "loss": 1.2295, "step": 343808 }, { "epoch": 25.80652908067542, "grad_norm": 0.6125044226646423, "learning_rate": 0.002828, "loss": 1.2404, "step": 343872 }, { "epoch": 25.811332082551594, "grad_norm": 0.5817704200744629, "learning_rate": 0.002828, "loss": 1.2366, "step": 343936 }, { "epoch": 25.816135084427767, "grad_norm": 0.653104841709137, "learning_rate": 0.002828, "loss": 1.2352, "step": 344000 }, { "epoch": 25.82093808630394, "grad_norm": 0.646251380443573, "learning_rate": 0.002828, "loss": 1.2314, "step": 344064 }, { "epoch": 25.825741088180113, "grad_norm": 0.5538818836212158, "learning_rate": 0.002828, "loss": 1.2318, "step": 344128 }, { "epoch": 25.830544090056286, "grad_norm": 0.6054178476333618, "learning_rate": 0.002828, "loss": 1.2324, "step": 344192 }, { "epoch": 25.83534709193246, "grad_norm": 0.4697195887565613, "learning_rate": 0.002828, "loss": 1.2404, "step": 344256 }, { "epoch": 25.84015009380863, "grad_norm": 0.6623765230178833, "learning_rate": 0.002828, "loss": 1.2313, "step": 344320 }, { "epoch": 25.844953095684804, "grad_norm": 0.5547646284103394, "learning_rate": 0.002828, "loss": 1.2306, "step": 344384 }, { "epoch": 25.849756097560977, "grad_norm": 0.6155876517295837, "learning_rate": 0.002828, "loss": 1.2322, "step": 344448 }, { "epoch": 25.854559099437147, "grad_norm": 0.6721112132072449, "learning_rate": 0.002828, "loss": 1.2421, "step": 344512 }, { "epoch": 25.85936210131332, "grad_norm": 0.7572811245918274, "learning_rate": 0.002828, "loss": 1.2304, "step": 344576 }, { "epoch": 25.864165103189492, "grad_norm": 0.48257964849472046, "learning_rate": 0.002828, "loss": 1.2321, "step": 344640 }, { "epoch": 25.868968105065665, "grad_norm": 0.5753004550933838, "learning_rate": 0.002828, "loss": 1.2361, "step": 344704 }, { "epoch": 25.873771106941838, "grad_norm": 0.6127859950065613, "learning_rate": 0.002828, "loss": 1.2339, "step": 344768 }, { "epoch": 25.87857410881801, "grad_norm": 0.6566331386566162, "learning_rate": 0.002828, "loss": 1.238, "step": 344832 }, { "epoch": 25.883377110694184, "grad_norm": 0.49689486622810364, "learning_rate": 0.002828, "loss": 1.2299, "step": 344896 }, { "epoch": 25.888180112570357, "grad_norm": 0.9033483266830444, "learning_rate": 0.002828, "loss": 1.2346, "step": 344960 }, { "epoch": 25.89298311444653, "grad_norm": 0.5268735885620117, "learning_rate": 0.002828, "loss": 1.2379, "step": 345024 }, { "epoch": 25.897786116322703, "grad_norm": 0.5659050941467285, "learning_rate": 0.002828, "loss": 1.239, "step": 345088 }, { "epoch": 25.902589118198875, "grad_norm": 0.700505793094635, "learning_rate": 0.002828, "loss": 1.2352, "step": 345152 }, { "epoch": 25.90739212007505, "grad_norm": 0.5935922265052795, "learning_rate": 0.002828, "loss": 1.2401, "step": 345216 }, { "epoch": 25.91219512195122, "grad_norm": 0.6356076002120972, "learning_rate": 0.002828, "loss": 1.2369, "step": 345280 }, { "epoch": 25.91699812382739, "grad_norm": 0.7625199556350708, "learning_rate": 0.002828, "loss": 1.234, "step": 345344 }, { "epoch": 25.921801125703563, "grad_norm": 0.5876018404960632, "learning_rate": 0.002828, "loss": 1.2397, "step": 345408 }, { "epoch": 25.926604127579736, "grad_norm": 0.5337401628494263, "learning_rate": 0.002828, "loss": 1.235, "step": 345472 }, { "epoch": 25.93140712945591, "grad_norm": 0.597531259059906, "learning_rate": 0.002828, "loss": 1.2314, "step": 345536 }, { "epoch": 25.936210131332082, "grad_norm": 0.5823056101799011, "learning_rate": 0.002828, "loss": 1.2322, "step": 345600 }, { "epoch": 25.941013133208255, "grad_norm": 0.5407066345214844, "learning_rate": 0.002828, "loss": 1.2338, "step": 345664 }, { "epoch": 25.945816135084428, "grad_norm": 0.687560498714447, "learning_rate": 0.002828, "loss": 1.2347, "step": 345728 }, { "epoch": 25.9506191369606, "grad_norm": 0.5314828753471375, "learning_rate": 0.002828, "loss": 1.2438, "step": 345792 }, { "epoch": 25.955422138836774, "grad_norm": 0.7035368084907532, "learning_rate": 0.002828, "loss": 1.238, "step": 345856 }, { "epoch": 25.960225140712947, "grad_norm": 0.5470126867294312, "learning_rate": 0.002828, "loss": 1.2313, "step": 345920 }, { "epoch": 25.96502814258912, "grad_norm": 0.5416370630264282, "learning_rate": 0.002828, "loss": 1.2382, "step": 345984 }, { "epoch": 25.969831144465292, "grad_norm": 0.6480735540390015, "learning_rate": 0.002828, "loss": 1.2387, "step": 346048 }, { "epoch": 25.974634146341465, "grad_norm": 0.5238390564918518, "learning_rate": 0.002828, "loss": 1.2367, "step": 346112 }, { "epoch": 25.979437148217635, "grad_norm": 0.5900247693061829, "learning_rate": 0.002828, "loss": 1.2376, "step": 346176 }, { "epoch": 25.984240150093807, "grad_norm": 0.6199198365211487, "learning_rate": 0.002828, "loss": 1.2373, "step": 346240 }, { "epoch": 25.98904315196998, "grad_norm": 0.6323431730270386, "learning_rate": 0.002828, "loss": 1.2355, "step": 346304 }, { "epoch": 25.993846153846153, "grad_norm": 0.5909257531166077, "learning_rate": 0.002828, "loss": 1.2339, "step": 346368 }, { "epoch": 25.998649155722326, "grad_norm": 0.5273489356040955, "learning_rate": 0.002828, "loss": 1.2345, "step": 346432 }, { "epoch": 26.0034521575985, "grad_norm": 0.5202985405921936, "learning_rate": 0.002828, "loss": 1.2114, "step": 346496 }, { "epoch": 26.008255159474672, "grad_norm": 0.5007950663566589, "learning_rate": 0.002828, "loss": 1.1978, "step": 346560 }, { "epoch": 26.013058161350845, "grad_norm": 0.5772574543952942, "learning_rate": 0.002828, "loss": 1.1976, "step": 346624 }, { "epoch": 26.017861163227018, "grad_norm": 0.5691853165626526, "learning_rate": 0.002828, "loss": 1.1911, "step": 346688 }, { "epoch": 26.02266416510319, "grad_norm": 0.5564082264900208, "learning_rate": 0.002828, "loss": 1.1955, "step": 346752 }, { "epoch": 26.027467166979363, "grad_norm": 0.6861823797225952, "learning_rate": 0.002828, "loss": 1.2017, "step": 346816 }, { "epoch": 26.032270168855536, "grad_norm": 0.6327756643295288, "learning_rate": 0.002828, "loss": 1.202, "step": 346880 }, { "epoch": 26.037073170731706, "grad_norm": 0.5592343211174011, "learning_rate": 0.002828, "loss": 1.2033, "step": 346944 }, { "epoch": 26.04187617260788, "grad_norm": 0.6427003145217896, "learning_rate": 0.002828, "loss": 1.1967, "step": 347008 }, { "epoch": 26.04667917448405, "grad_norm": 0.550666093826294, "learning_rate": 0.002828, "loss": 1.2017, "step": 347072 }, { "epoch": 26.051482176360224, "grad_norm": 0.5372848510742188, "learning_rate": 0.002828, "loss": 1.2099, "step": 347136 }, { "epoch": 26.056285178236397, "grad_norm": 0.6634334325790405, "learning_rate": 0.002828, "loss": 1.2021, "step": 347200 }, { "epoch": 26.06108818011257, "grad_norm": 0.6219868659973145, "learning_rate": 0.002828, "loss": 1.2004, "step": 347264 }, { "epoch": 26.065891181988743, "grad_norm": 0.5100303888320923, "learning_rate": 0.002828, "loss": 1.1993, "step": 347328 }, { "epoch": 26.070694183864916, "grad_norm": 0.6207034587860107, "learning_rate": 0.002828, "loss": 1.1937, "step": 347392 }, { "epoch": 26.07549718574109, "grad_norm": 0.6422325968742371, "learning_rate": 0.002828, "loss": 1.2051, "step": 347456 }, { "epoch": 26.08030018761726, "grad_norm": 0.5773105025291443, "learning_rate": 0.002828, "loss": 1.2094, "step": 347520 }, { "epoch": 26.085103189493434, "grad_norm": 0.5325859785079956, "learning_rate": 0.002828, "loss": 1.2028, "step": 347584 }, { "epoch": 26.089906191369607, "grad_norm": 0.6021090149879456, "learning_rate": 0.002828, "loss": 1.2003, "step": 347648 }, { "epoch": 26.09470919324578, "grad_norm": 0.6171033978462219, "learning_rate": 0.002828, "loss": 1.2009, "step": 347712 }, { "epoch": 26.09951219512195, "grad_norm": 0.5657275915145874, "learning_rate": 0.002828, "loss": 1.2007, "step": 347776 }, { "epoch": 26.104315196998122, "grad_norm": 0.6622403860092163, "learning_rate": 0.002828, "loss": 1.2071, "step": 347840 }, { "epoch": 26.109118198874295, "grad_norm": 0.6320151090621948, "learning_rate": 0.002828, "loss": 1.2067, "step": 347904 }, { "epoch": 26.113921200750468, "grad_norm": 0.5358821749687195, "learning_rate": 0.002828, "loss": 1.2033, "step": 347968 }, { "epoch": 26.11872420262664, "grad_norm": 0.6047529578208923, "learning_rate": 0.002828, "loss": 1.2053, "step": 348032 }, { "epoch": 26.123527204502814, "grad_norm": 0.5269867777824402, "learning_rate": 0.002828, "loss": 1.204, "step": 348096 }, { "epoch": 26.128330206378987, "grad_norm": 0.5756949782371521, "learning_rate": 0.002828, "loss": 1.2025, "step": 348160 }, { "epoch": 26.13313320825516, "grad_norm": 0.5575070977210999, "learning_rate": 0.002828, "loss": 1.209, "step": 348224 }, { "epoch": 26.137936210131333, "grad_norm": 0.6042987704277039, "learning_rate": 0.002828, "loss": 1.2052, "step": 348288 }, { "epoch": 26.142739212007506, "grad_norm": 0.6796466708183289, "learning_rate": 0.002828, "loss": 1.2014, "step": 348352 }, { "epoch": 26.14754221388368, "grad_norm": 0.6714834570884705, "learning_rate": 0.002828, "loss": 1.2021, "step": 348416 }, { "epoch": 26.15234521575985, "grad_norm": 0.6910606026649475, "learning_rate": 0.002828, "loss": 1.2045, "step": 348480 }, { "epoch": 26.157148217636024, "grad_norm": 0.515529990196228, "learning_rate": 0.002828, "loss": 1.2079, "step": 348544 }, { "epoch": 26.161951219512193, "grad_norm": 0.6046344041824341, "learning_rate": 0.002828, "loss": 1.2038, "step": 348608 }, { "epoch": 26.166754221388366, "grad_norm": 0.5959147810935974, "learning_rate": 0.002828, "loss": 1.2098, "step": 348672 }, { "epoch": 26.17155722326454, "grad_norm": 0.5565488934516907, "learning_rate": 0.002828, "loss": 1.2106, "step": 348736 }, { "epoch": 26.176360225140712, "grad_norm": 0.5920954346656799, "learning_rate": 0.002828, "loss": 1.2069, "step": 348800 }, { "epoch": 26.181163227016885, "grad_norm": 0.5918201208114624, "learning_rate": 0.002828, "loss": 1.2023, "step": 348864 }, { "epoch": 26.185966228893058, "grad_norm": 0.579826831817627, "learning_rate": 0.002828, "loss": 1.2045, "step": 348928 }, { "epoch": 26.19076923076923, "grad_norm": 0.6079787611961365, "learning_rate": 0.002828, "loss": 1.2042, "step": 348992 }, { "epoch": 26.195572232645404, "grad_norm": 0.5046894550323486, "learning_rate": 0.002828, "loss": 1.2053, "step": 349056 }, { "epoch": 26.200375234521577, "grad_norm": 0.6525089740753174, "learning_rate": 0.002828, "loss": 1.207, "step": 349120 }, { "epoch": 26.20517823639775, "grad_norm": 0.5801729559898376, "learning_rate": 0.002828, "loss": 1.2055, "step": 349184 }, { "epoch": 26.209981238273922, "grad_norm": 0.625745415687561, "learning_rate": 0.002828, "loss": 1.2069, "step": 349248 }, { "epoch": 26.214784240150095, "grad_norm": 0.6147674918174744, "learning_rate": 0.002828, "loss": 1.2063, "step": 349312 }, { "epoch": 26.219587242026268, "grad_norm": 0.5819293856620789, "learning_rate": 0.002828, "loss": 1.211, "step": 349376 }, { "epoch": 26.224390243902437, "grad_norm": 0.5987066626548767, "learning_rate": 0.002828, "loss": 1.2159, "step": 349440 }, { "epoch": 26.22919324577861, "grad_norm": 0.5650080442428589, "learning_rate": 0.002828, "loss": 1.2152, "step": 349504 }, { "epoch": 26.233996247654783, "grad_norm": 0.5981659889221191, "learning_rate": 0.002828, "loss": 1.2103, "step": 349568 }, { "epoch": 26.238799249530956, "grad_norm": 0.5904810428619385, "learning_rate": 0.002828, "loss": 1.2043, "step": 349632 }, { "epoch": 26.24360225140713, "grad_norm": 0.5519508123397827, "learning_rate": 0.002828, "loss": 1.2107, "step": 349696 }, { "epoch": 26.248405253283302, "grad_norm": 0.5517117381095886, "learning_rate": 0.002828, "loss": 1.2064, "step": 349760 }, { "epoch": 26.253208255159475, "grad_norm": 0.5335156321525574, "learning_rate": 0.002828, "loss": 1.2143, "step": 349824 }, { "epoch": 26.258011257035648, "grad_norm": 0.5944948792457581, "learning_rate": 0.002828, "loss": 1.2093, "step": 349888 }, { "epoch": 26.26281425891182, "grad_norm": 0.5173745155334473, "learning_rate": 0.002828, "loss": 1.2091, "step": 349952 }, { "epoch": 26.267617260787993, "grad_norm": 0.5639364123344421, "learning_rate": 0.002828, "loss": 1.2113, "step": 350016 }, { "epoch": 26.272420262664166, "grad_norm": 0.508617103099823, "learning_rate": 0.002828, "loss": 1.2089, "step": 350080 }, { "epoch": 26.27722326454034, "grad_norm": 0.4807698428630829, "learning_rate": 0.002828, "loss": 1.2057, "step": 350144 }, { "epoch": 26.28202626641651, "grad_norm": 0.5463166236877441, "learning_rate": 0.002828, "loss": 1.2077, "step": 350208 }, { "epoch": 26.28682926829268, "grad_norm": 0.6454668641090393, "learning_rate": 0.002828, "loss": 1.2096, "step": 350272 }, { "epoch": 26.291632270168854, "grad_norm": 0.6266613602638245, "learning_rate": 0.002828, "loss": 1.2051, "step": 350336 }, { "epoch": 26.296435272045027, "grad_norm": 0.6026892066001892, "learning_rate": 0.002828, "loss": 1.2124, "step": 350400 }, { "epoch": 26.3012382739212, "grad_norm": 0.6331421732902527, "learning_rate": 0.002828, "loss": 1.2122, "step": 350464 }, { "epoch": 26.306041275797373, "grad_norm": 0.5798826217651367, "learning_rate": 0.002828, "loss": 1.2106, "step": 350528 }, { "epoch": 26.310844277673546, "grad_norm": 0.527355432510376, "learning_rate": 0.002828, "loss": 1.2091, "step": 350592 }, { "epoch": 26.31564727954972, "grad_norm": 0.5960228443145752, "learning_rate": 0.002828, "loss": 1.2136, "step": 350656 }, { "epoch": 26.32045028142589, "grad_norm": 0.667033851146698, "learning_rate": 0.002828, "loss": 1.2088, "step": 350720 }, { "epoch": 26.325253283302064, "grad_norm": 0.6654618978500366, "learning_rate": 0.002828, "loss": 1.2161, "step": 350784 }, { "epoch": 26.330056285178237, "grad_norm": 0.5624558925628662, "learning_rate": 0.002828, "loss": 1.2076, "step": 350848 }, { "epoch": 26.33485928705441, "grad_norm": 0.534230649471283, "learning_rate": 0.002828, "loss": 1.2166, "step": 350912 }, { "epoch": 26.339662288930583, "grad_norm": 0.6490930914878845, "learning_rate": 0.002828, "loss": 1.2119, "step": 350976 }, { "epoch": 26.344465290806752, "grad_norm": 0.70958411693573, "learning_rate": 0.002828, "loss": 1.2099, "step": 351040 }, { "epoch": 26.349268292682925, "grad_norm": 0.6444900035858154, "learning_rate": 0.002828, "loss": 1.2166, "step": 351104 }, { "epoch": 26.3540712945591, "grad_norm": 0.5082390904426575, "learning_rate": 0.002828, "loss": 1.2157, "step": 351168 }, { "epoch": 26.35887429643527, "grad_norm": 0.46947455406188965, "learning_rate": 0.002828, "loss": 1.2133, "step": 351232 }, { "epoch": 26.363677298311444, "grad_norm": 0.58272385597229, "learning_rate": 0.002828, "loss": 1.2162, "step": 351296 }, { "epoch": 26.368480300187617, "grad_norm": 0.6278541684150696, "learning_rate": 0.002828, "loss": 1.2255, "step": 351360 }, { "epoch": 26.37328330206379, "grad_norm": 0.7564519047737122, "learning_rate": 0.002828, "loss": 1.2239, "step": 351424 }, { "epoch": 26.378086303939963, "grad_norm": 0.6092026829719543, "learning_rate": 0.002828, "loss": 1.2554, "step": 351488 }, { "epoch": 26.382889305816136, "grad_norm": 0.5624083280563354, "learning_rate": 0.002828, "loss": 1.2196, "step": 351552 }, { "epoch": 26.38769230769231, "grad_norm": 0.6422922611236572, "learning_rate": 0.002828, "loss": 1.2153, "step": 351616 }, { "epoch": 26.39249530956848, "grad_norm": 0.7231433391571045, "learning_rate": 0.002828, "loss": 1.2128, "step": 351680 }, { "epoch": 26.397298311444654, "grad_norm": 0.6518374681472778, "learning_rate": 0.002828, "loss": 1.213, "step": 351744 }, { "epoch": 26.402101313320827, "grad_norm": 0.5458804965019226, "learning_rate": 0.002828, "loss": 1.2119, "step": 351808 }, { "epoch": 26.406904315196996, "grad_norm": 0.5119909048080444, "learning_rate": 0.002828, "loss": 1.2137, "step": 351872 }, { "epoch": 26.41170731707317, "grad_norm": 0.5168522000312805, "learning_rate": 0.002828, "loss": 1.2135, "step": 351936 }, { "epoch": 26.416510318949342, "grad_norm": 0.6378693580627441, "learning_rate": 0.002828, "loss": 1.2189, "step": 352000 }, { "epoch": 26.421313320825515, "grad_norm": 0.6508474946022034, "learning_rate": 0.002828, "loss": 1.2135, "step": 352064 }, { "epoch": 26.426116322701688, "grad_norm": 0.7472125291824341, "learning_rate": 0.002828, "loss": 1.222, "step": 352128 }, { "epoch": 26.43091932457786, "grad_norm": 0.644286572933197, "learning_rate": 0.002828, "loss": 1.2105, "step": 352192 }, { "epoch": 26.435722326454034, "grad_norm": 0.5401178598403931, "learning_rate": 0.002828, "loss": 1.2194, "step": 352256 }, { "epoch": 26.440525328330207, "grad_norm": 0.6656849384307861, "learning_rate": 0.002828, "loss": 1.2116, "step": 352320 }, { "epoch": 26.44532833020638, "grad_norm": 0.8256285190582275, "learning_rate": 0.002828, "loss": 1.2161, "step": 352384 }, { "epoch": 26.450131332082552, "grad_norm": 0.6020409464836121, "learning_rate": 0.002828, "loss": 1.2163, "step": 352448 }, { "epoch": 26.454934333958725, "grad_norm": 0.550679624080658, "learning_rate": 0.002828, "loss": 1.2161, "step": 352512 }, { "epoch": 26.459737335834898, "grad_norm": 0.6722231507301331, "learning_rate": 0.002828, "loss": 1.2166, "step": 352576 }, { "epoch": 26.46454033771107, "grad_norm": 0.6431364417076111, "learning_rate": 0.002828, "loss": 1.2214, "step": 352640 }, { "epoch": 26.46934333958724, "grad_norm": 0.5253560543060303, "learning_rate": 0.002828, "loss": 1.219, "step": 352704 }, { "epoch": 26.474146341463413, "grad_norm": 0.6368989944458008, "learning_rate": 0.002828, "loss": 1.2195, "step": 352768 }, { "epoch": 26.478949343339586, "grad_norm": 0.5993379950523376, "learning_rate": 0.002828, "loss": 1.2115, "step": 352832 }, { "epoch": 26.48375234521576, "grad_norm": 0.493409126996994, "learning_rate": 0.002828, "loss": 1.2249, "step": 352896 }, { "epoch": 26.488555347091932, "grad_norm": 0.6184572577476501, "learning_rate": 0.002828, "loss": 1.218, "step": 352960 }, { "epoch": 26.493358348968105, "grad_norm": 0.592602014541626, "learning_rate": 0.002828, "loss": 1.2259, "step": 353024 }, { "epoch": 26.498161350844278, "grad_norm": 0.6765268445014954, "learning_rate": 0.002828, "loss": 1.2167, "step": 353088 }, { "epoch": 26.50296435272045, "grad_norm": 0.651823103427887, "learning_rate": 0.002828, "loss": 1.219, "step": 353152 }, { "epoch": 26.507767354596623, "grad_norm": 0.5332434177398682, "learning_rate": 0.002828, "loss": 1.2175, "step": 353216 }, { "epoch": 26.512570356472796, "grad_norm": 0.5774806141853333, "learning_rate": 0.002828, "loss": 1.2194, "step": 353280 }, { "epoch": 26.51737335834897, "grad_norm": 0.5372803807258606, "learning_rate": 0.002828, "loss": 1.2159, "step": 353344 }, { "epoch": 26.522176360225142, "grad_norm": 0.6259738206863403, "learning_rate": 0.002828, "loss": 1.2207, "step": 353408 }, { "epoch": 26.526979362101315, "grad_norm": 0.5487435460090637, "learning_rate": 0.002828, "loss": 1.2166, "step": 353472 }, { "epoch": 26.531782363977484, "grad_norm": 0.6260308027267456, "learning_rate": 0.002828, "loss": 1.2161, "step": 353536 }, { "epoch": 26.536585365853657, "grad_norm": 0.581946611404419, "learning_rate": 0.002828, "loss": 1.2206, "step": 353600 }, { "epoch": 26.54138836772983, "grad_norm": 0.6206014752388, "learning_rate": 0.002828, "loss": 1.2178, "step": 353664 }, { "epoch": 26.546191369606003, "grad_norm": 0.624851644039154, "learning_rate": 0.002828, "loss": 1.2206, "step": 353728 }, { "epoch": 26.550994371482176, "grad_norm": 0.6026180982589722, "learning_rate": 0.002828, "loss": 1.2178, "step": 353792 }, { "epoch": 26.55579737335835, "grad_norm": 0.6708505153656006, "learning_rate": 0.002828, "loss": 1.2139, "step": 353856 }, { "epoch": 26.56060037523452, "grad_norm": 0.6612686514854431, "learning_rate": 0.002828, "loss": 1.2192, "step": 353920 }, { "epoch": 26.565403377110695, "grad_norm": 0.6385770440101624, "learning_rate": 0.002828, "loss": 1.2189, "step": 353984 }, { "epoch": 26.570206378986867, "grad_norm": 0.7035972476005554, "learning_rate": 0.002828, "loss": 1.2212, "step": 354048 }, { "epoch": 26.57500938086304, "grad_norm": 0.4659041166305542, "learning_rate": 0.002828, "loss": 1.2139, "step": 354112 }, { "epoch": 26.579812382739213, "grad_norm": 0.4893842339515686, "learning_rate": 0.002828, "loss": 1.219, "step": 354176 }, { "epoch": 26.584615384615386, "grad_norm": 0.5250750780105591, "learning_rate": 0.002828, "loss": 1.2261, "step": 354240 }, { "epoch": 26.58941838649156, "grad_norm": 0.5908548831939697, "learning_rate": 0.002828, "loss": 1.2216, "step": 354304 }, { "epoch": 26.59422138836773, "grad_norm": 0.5817123651504517, "learning_rate": 0.002828, "loss": 1.2183, "step": 354368 }, { "epoch": 26.5990243902439, "grad_norm": 0.5596290826797485, "learning_rate": 0.002828, "loss": 1.2177, "step": 354432 }, { "epoch": 26.603827392120074, "grad_norm": 0.7440441846847534, "learning_rate": 0.002828, "loss": 1.2171, "step": 354496 }, { "epoch": 26.608630393996247, "grad_norm": 0.6214221119880676, "learning_rate": 0.002828, "loss": 1.2158, "step": 354560 }, { "epoch": 26.61343339587242, "grad_norm": 0.5204899907112122, "learning_rate": 0.002828, "loss": 1.2196, "step": 354624 }, { "epoch": 26.618236397748593, "grad_norm": 0.6049349904060364, "learning_rate": 0.002828, "loss": 1.2217, "step": 354688 }, { "epoch": 26.623039399624766, "grad_norm": 0.5005108118057251, "learning_rate": 0.002828, "loss": 1.2208, "step": 354752 }, { "epoch": 26.62784240150094, "grad_norm": 0.6195684671401978, "learning_rate": 0.002828, "loss": 1.2126, "step": 354816 }, { "epoch": 26.63264540337711, "grad_norm": 0.6185719966888428, "learning_rate": 0.002828, "loss": 1.2211, "step": 354880 }, { "epoch": 26.637448405253284, "grad_norm": 0.6542859673500061, "learning_rate": 0.002828, "loss": 1.2183, "step": 354944 }, { "epoch": 26.642251407129457, "grad_norm": 0.73667973279953, "learning_rate": 0.002828, "loss": 1.2255, "step": 355008 }, { "epoch": 26.64705440900563, "grad_norm": 0.6478698253631592, "learning_rate": 0.002828, "loss": 1.2181, "step": 355072 }, { "epoch": 26.651857410881803, "grad_norm": 0.5741866230964661, "learning_rate": 0.002828, "loss": 1.218, "step": 355136 }, { "epoch": 26.656660412757972, "grad_norm": 0.5622037649154663, "learning_rate": 0.002828, "loss": 1.2163, "step": 355200 }, { "epoch": 26.661463414634145, "grad_norm": 0.5420361161231995, "learning_rate": 0.002828, "loss": 1.2218, "step": 355264 }, { "epoch": 26.666266416510318, "grad_norm": 0.6684010624885559, "learning_rate": 0.002828, "loss": 1.2209, "step": 355328 }, { "epoch": 26.67106941838649, "grad_norm": 0.6673370003700256, "learning_rate": 0.002828, "loss": 1.2175, "step": 355392 }, { "epoch": 26.675872420262664, "grad_norm": 0.5472583770751953, "learning_rate": 0.002828, "loss": 1.2205, "step": 355456 }, { "epoch": 26.680675422138837, "grad_norm": 0.5732004642486572, "learning_rate": 0.002828, "loss": 1.2233, "step": 355520 }, { "epoch": 26.68547842401501, "grad_norm": 0.5972156524658203, "learning_rate": 0.002828, "loss": 1.2203, "step": 355584 }, { "epoch": 26.690281425891182, "grad_norm": 0.6077661514282227, "learning_rate": 0.002828, "loss": 1.2213, "step": 355648 }, { "epoch": 26.695084427767355, "grad_norm": 0.5697489380836487, "learning_rate": 0.002828, "loss": 1.2214, "step": 355712 }, { "epoch": 26.69988742964353, "grad_norm": 0.5756834745407104, "learning_rate": 0.002828, "loss": 1.2201, "step": 355776 }, { "epoch": 26.7046904315197, "grad_norm": 0.5190218687057495, "learning_rate": 0.002828, "loss": 1.2224, "step": 355840 }, { "epoch": 26.709493433395874, "grad_norm": 0.5287311673164368, "learning_rate": 0.002828, "loss": 1.2254, "step": 355904 }, { "epoch": 26.714296435272043, "grad_norm": 0.6516658663749695, "learning_rate": 0.002828, "loss": 1.2202, "step": 355968 }, { "epoch": 26.719099437148216, "grad_norm": 0.5656246542930603, "learning_rate": 0.002828, "loss": 1.2218, "step": 356032 }, { "epoch": 26.72390243902439, "grad_norm": 0.5156927704811096, "learning_rate": 0.002828, "loss": 1.2163, "step": 356096 }, { "epoch": 26.728705440900562, "grad_norm": 0.645133912563324, "learning_rate": 0.002828, "loss": 1.2275, "step": 356160 }, { "epoch": 26.733508442776735, "grad_norm": 0.5428419709205627, "learning_rate": 0.002828, "loss": 1.224, "step": 356224 }, { "epoch": 26.738311444652908, "grad_norm": 0.6389603018760681, "learning_rate": 0.002828, "loss": 1.2184, "step": 356288 }, { "epoch": 26.74311444652908, "grad_norm": 0.5498131513595581, "learning_rate": 0.002828, "loss": 1.2227, "step": 356352 }, { "epoch": 26.747917448405254, "grad_norm": 0.6554180383682251, "learning_rate": 0.002828, "loss": 1.2233, "step": 356416 }, { "epoch": 26.752720450281426, "grad_norm": 0.69988614320755, "learning_rate": 0.002828, "loss": 1.2159, "step": 356480 }, { "epoch": 26.7575234521576, "grad_norm": 0.6135717630386353, "learning_rate": 0.002828, "loss": 1.2236, "step": 356544 }, { "epoch": 26.762326454033772, "grad_norm": 0.5877432227134705, "learning_rate": 0.002828, "loss": 1.2185, "step": 356608 }, { "epoch": 26.767129455909945, "grad_norm": 0.6867513060569763, "learning_rate": 0.002828, "loss": 1.222, "step": 356672 }, { "epoch": 26.771932457786118, "grad_norm": 0.6261159181594849, "learning_rate": 0.002828, "loss": 1.2247, "step": 356736 }, { "epoch": 26.776735459662287, "grad_norm": 0.6596559286117554, "learning_rate": 0.002828, "loss": 1.223, "step": 356800 }, { "epoch": 26.78153846153846, "grad_norm": 0.5095260143280029, "learning_rate": 0.002828, "loss": 1.22, "step": 356864 }, { "epoch": 26.786341463414633, "grad_norm": 0.7851542830467224, "learning_rate": 0.002828, "loss": 1.217, "step": 356928 }, { "epoch": 26.791144465290806, "grad_norm": 0.6265200972557068, "learning_rate": 0.002828, "loss": 1.2242, "step": 356992 }, { "epoch": 26.79594746716698, "grad_norm": 0.5440147519111633, "learning_rate": 0.002828, "loss": 1.2189, "step": 357056 }, { "epoch": 26.80075046904315, "grad_norm": 0.5513255596160889, "learning_rate": 0.002828, "loss": 1.221, "step": 357120 }, { "epoch": 26.805553470919325, "grad_norm": 0.5585968494415283, "learning_rate": 0.002828, "loss": 1.2275, "step": 357184 }, { "epoch": 26.810356472795497, "grad_norm": 0.5784460306167603, "learning_rate": 0.002828, "loss": 1.2221, "step": 357248 }, { "epoch": 26.81515947467167, "grad_norm": 0.6278428435325623, "learning_rate": 0.002828, "loss": 1.2271, "step": 357312 }, { "epoch": 26.819962476547843, "grad_norm": 0.6241024136543274, "learning_rate": 0.002828, "loss": 1.2237, "step": 357376 }, { "epoch": 26.824765478424016, "grad_norm": 0.5838163495063782, "learning_rate": 0.002828, "loss": 1.2276, "step": 357440 }, { "epoch": 26.82956848030019, "grad_norm": 0.49887073040008545, "learning_rate": 0.002828, "loss": 1.2245, "step": 357504 }, { "epoch": 26.834371482176362, "grad_norm": 0.5843515992164612, "learning_rate": 0.002828, "loss": 1.2187, "step": 357568 }, { "epoch": 26.83917448405253, "grad_norm": 0.5945191979408264, "learning_rate": 0.002828, "loss": 1.2238, "step": 357632 }, { "epoch": 26.843977485928704, "grad_norm": 0.5643002986907959, "learning_rate": 0.002828, "loss": 1.2173, "step": 357696 }, { "epoch": 26.848780487804877, "grad_norm": 0.545021116733551, "learning_rate": 0.002828, "loss": 1.2252, "step": 357760 }, { "epoch": 26.85358348968105, "grad_norm": 0.5684725642204285, "learning_rate": 0.002828, "loss": 1.2253, "step": 357824 }, { "epoch": 26.858386491557223, "grad_norm": 0.6127748489379883, "learning_rate": 0.002828, "loss": 1.2191, "step": 357888 }, { "epoch": 26.863189493433396, "grad_norm": 0.6032697558403015, "learning_rate": 0.002828, "loss": 1.217, "step": 357952 }, { "epoch": 26.86799249530957, "grad_norm": 0.6626061797142029, "learning_rate": 0.002828, "loss": 1.2218, "step": 358016 }, { "epoch": 26.87279549718574, "grad_norm": 0.7547987699508667, "learning_rate": 0.002828, "loss": 1.2178, "step": 358080 }, { "epoch": 26.877598499061914, "grad_norm": 0.5466580390930176, "learning_rate": 0.002828, "loss": 1.2196, "step": 358144 }, { "epoch": 26.882401500938087, "grad_norm": 0.7023701667785645, "learning_rate": 0.002828, "loss": 1.2242, "step": 358208 }, { "epoch": 26.88720450281426, "grad_norm": 0.6215884685516357, "learning_rate": 0.002828, "loss": 1.2253, "step": 358272 }, { "epoch": 26.892007504690433, "grad_norm": 0.621918797492981, "learning_rate": 0.002828, "loss": 1.2227, "step": 358336 }, { "epoch": 26.896810506566602, "grad_norm": 0.5851648449897766, "learning_rate": 0.002828, "loss": 1.2261, "step": 358400 }, { "epoch": 26.901613508442775, "grad_norm": 0.6201433539390564, "learning_rate": 0.002828, "loss": 1.2241, "step": 358464 }, { "epoch": 26.906416510318948, "grad_norm": 0.5449443459510803, "learning_rate": 0.002828, "loss": 1.2227, "step": 358528 }, { "epoch": 26.91121951219512, "grad_norm": 0.5317158102989197, "learning_rate": 0.002828, "loss": 1.2207, "step": 358592 }, { "epoch": 26.916022514071294, "grad_norm": 0.6267401576042175, "learning_rate": 0.002828, "loss": 1.2224, "step": 358656 }, { "epoch": 26.920825515947467, "grad_norm": 0.5952174663543701, "learning_rate": 0.002828, "loss": 1.2229, "step": 358720 }, { "epoch": 26.92562851782364, "grad_norm": 0.6656439304351807, "learning_rate": 0.002828, "loss": 1.2215, "step": 358784 }, { "epoch": 26.930431519699813, "grad_norm": 0.5638568997383118, "learning_rate": 0.002828, "loss": 1.2289, "step": 358848 }, { "epoch": 26.935234521575985, "grad_norm": 0.5787448883056641, "learning_rate": 0.002828, "loss": 1.2243, "step": 358912 }, { "epoch": 26.94003752345216, "grad_norm": 0.7306926846504211, "learning_rate": 0.002828, "loss": 1.2243, "step": 358976 }, { "epoch": 26.94484052532833, "grad_norm": 0.5893208980560303, "learning_rate": 0.002828, "loss": 1.2222, "step": 359040 }, { "epoch": 26.949643527204504, "grad_norm": 0.5581895709037781, "learning_rate": 0.002828, "loss": 1.2264, "step": 359104 }, { "epoch": 26.954446529080677, "grad_norm": 0.548004686832428, "learning_rate": 0.002828, "loss": 1.2236, "step": 359168 }, { "epoch": 26.959249530956846, "grad_norm": 0.5354708433151245, "learning_rate": 0.002828, "loss": 1.2243, "step": 359232 }, { "epoch": 26.96405253283302, "grad_norm": 0.5066986083984375, "learning_rate": 0.002828, "loss": 1.2257, "step": 359296 }, { "epoch": 26.968855534709192, "grad_norm": 0.5515348315238953, "learning_rate": 0.002828, "loss": 1.2286, "step": 359360 }, { "epoch": 26.973658536585365, "grad_norm": 0.5831103920936584, "learning_rate": 0.002828, "loss": 1.2265, "step": 359424 }, { "epoch": 26.978461538461538, "grad_norm": 0.5766531229019165, "learning_rate": 0.002828, "loss": 1.2284, "step": 359488 }, { "epoch": 26.98326454033771, "grad_norm": 0.7886342406272888, "learning_rate": 0.002828, "loss": 1.2273, "step": 359552 }, { "epoch": 26.988067542213884, "grad_norm": 0.6386691927909851, "learning_rate": 0.002828, "loss": 1.2314, "step": 359616 }, { "epoch": 26.992870544090056, "grad_norm": 0.5953747630119324, "learning_rate": 0.002828, "loss": 1.2205, "step": 359680 }, { "epoch": 26.99767354596623, "grad_norm": 0.5155026316642761, "learning_rate": 0.002828, "loss": 1.2194, "step": 359744 }, { "epoch": 27.002476547842402, "grad_norm": 0.6157639026641846, "learning_rate": 0.002828, "loss": 1.2097, "step": 359808 }, { "epoch": 27.007279549718575, "grad_norm": 0.5952342748641968, "learning_rate": 0.002828, "loss": 1.1807, "step": 359872 }, { "epoch": 27.012082551594748, "grad_norm": 0.6238614916801453, "learning_rate": 0.002828, "loss": 1.1813, "step": 359936 }, { "epoch": 27.01688555347092, "grad_norm": 0.5040086507797241, "learning_rate": 0.002828, "loss": 1.1838, "step": 360000 }, { "epoch": 27.02168855534709, "grad_norm": 0.5880621671676636, "learning_rate": 0.002828, "loss": 1.1862, "step": 360064 }, { "epoch": 27.026491557223263, "grad_norm": 0.5540125966072083, "learning_rate": 0.002828, "loss": 1.1836, "step": 360128 }, { "epoch": 27.031294559099436, "grad_norm": 0.7543290853500366, "learning_rate": 0.002828, "loss": 1.1844, "step": 360192 }, { "epoch": 27.03609756097561, "grad_norm": 0.5563064217567444, "learning_rate": 0.002828, "loss": 1.1951, "step": 360256 }, { "epoch": 27.040900562851782, "grad_norm": 0.5678032040596008, "learning_rate": 0.002828, "loss": 1.1882, "step": 360320 }, { "epoch": 27.045703564727955, "grad_norm": 0.5668981671333313, "learning_rate": 0.002828, "loss": 1.1883, "step": 360384 }, { "epoch": 27.050506566604128, "grad_norm": 0.6456194519996643, "learning_rate": 0.002828, "loss": 1.188, "step": 360448 }, { "epoch": 27.0553095684803, "grad_norm": 0.5644674897193909, "learning_rate": 0.002828, "loss": 1.1907, "step": 360512 }, { "epoch": 27.060112570356473, "grad_norm": 0.6194739937782288, "learning_rate": 0.002828, "loss": 1.19, "step": 360576 }, { "epoch": 27.064915572232646, "grad_norm": 0.584814190864563, "learning_rate": 0.002828, "loss": 1.1868, "step": 360640 }, { "epoch": 27.06971857410882, "grad_norm": 0.5569928288459778, "learning_rate": 0.002828, "loss": 1.1862, "step": 360704 }, { "epoch": 27.074521575984992, "grad_norm": 0.5890145301818848, "learning_rate": 0.002828, "loss": 1.1916, "step": 360768 }, { "epoch": 27.079324577861165, "grad_norm": 0.5473900437355042, "learning_rate": 0.002828, "loss": 1.1914, "step": 360832 }, { "epoch": 27.084127579737334, "grad_norm": 0.6628814339637756, "learning_rate": 0.002828, "loss": 1.1914, "step": 360896 }, { "epoch": 27.088930581613507, "grad_norm": 0.6406671404838562, "learning_rate": 0.002828, "loss": 1.1932, "step": 360960 }, { "epoch": 27.09373358348968, "grad_norm": 0.5623260140419006, "learning_rate": 0.002828, "loss": 1.1938, "step": 361024 }, { "epoch": 27.098536585365853, "grad_norm": 0.7211869359016418, "learning_rate": 0.002828, "loss": 1.1985, "step": 361088 }, { "epoch": 27.103339587242026, "grad_norm": 0.5265310406684875, "learning_rate": 0.002828, "loss": 1.191, "step": 361152 }, { "epoch": 27.1081425891182, "grad_norm": 0.7146878242492676, "learning_rate": 0.002828, "loss": 1.1942, "step": 361216 }, { "epoch": 27.11294559099437, "grad_norm": 0.6107980012893677, "learning_rate": 0.002828, "loss": 1.193, "step": 361280 }, { "epoch": 27.117748592870544, "grad_norm": 0.6449308395385742, "learning_rate": 0.002828, "loss": 1.1963, "step": 361344 }, { "epoch": 27.122551594746717, "grad_norm": 0.6099646091461182, "learning_rate": 0.002828, "loss": 1.1949, "step": 361408 }, { "epoch": 27.12735459662289, "grad_norm": 0.7447180151939392, "learning_rate": 0.002828, "loss": 1.1938, "step": 361472 }, { "epoch": 27.132157598499063, "grad_norm": 0.7580665349960327, "learning_rate": 0.002828, "loss": 1.1975, "step": 361536 }, { "epoch": 27.136960600375236, "grad_norm": 0.5835822820663452, "learning_rate": 0.002828, "loss": 1.1875, "step": 361600 }, { "epoch": 27.14176360225141, "grad_norm": 0.5681918859481812, "learning_rate": 0.002828, "loss": 1.1966, "step": 361664 }, { "epoch": 27.146566604127578, "grad_norm": 0.5537122488021851, "learning_rate": 0.002828, "loss": 1.1916, "step": 361728 }, { "epoch": 27.15136960600375, "grad_norm": 0.5107552409172058, "learning_rate": 0.002828, "loss": 1.1883, "step": 361792 }, { "epoch": 27.156172607879924, "grad_norm": 0.6284750699996948, "learning_rate": 0.002828, "loss": 1.196, "step": 361856 }, { "epoch": 27.160975609756097, "grad_norm": 0.7075376510620117, "learning_rate": 0.002828, "loss": 1.1987, "step": 361920 }, { "epoch": 27.16577861163227, "grad_norm": 0.7268292903900146, "learning_rate": 0.002828, "loss": 1.1958, "step": 361984 }, { "epoch": 27.170581613508443, "grad_norm": 0.6370378136634827, "learning_rate": 0.002828, "loss": 1.1899, "step": 362048 }, { "epoch": 27.175384615384615, "grad_norm": 0.5429769158363342, "learning_rate": 0.002828, "loss": 1.1987, "step": 362112 }, { "epoch": 27.18018761726079, "grad_norm": 0.569195568561554, "learning_rate": 0.002828, "loss": 1.1927, "step": 362176 }, { "epoch": 27.18499061913696, "grad_norm": 0.6059727668762207, "learning_rate": 0.002828, "loss": 1.1929, "step": 362240 }, { "epoch": 27.189793621013134, "grad_norm": 0.6149117946624756, "learning_rate": 0.002828, "loss": 1.1942, "step": 362304 }, { "epoch": 27.194596622889307, "grad_norm": 0.5530393719673157, "learning_rate": 0.002828, "loss": 1.1926, "step": 362368 }, { "epoch": 27.19939962476548, "grad_norm": 0.5254247188568115, "learning_rate": 0.002828, "loss": 1.1984, "step": 362432 }, { "epoch": 27.204202626641653, "grad_norm": 0.5869778990745544, "learning_rate": 0.002828, "loss": 1.1975, "step": 362496 }, { "epoch": 27.209005628517822, "grad_norm": 0.6287544369697571, "learning_rate": 0.002828, "loss": 1.1919, "step": 362560 }, { "epoch": 27.213808630393995, "grad_norm": 0.6365551948547363, "learning_rate": 0.002828, "loss": 1.1961, "step": 362624 }, { "epoch": 27.218611632270168, "grad_norm": 0.501448929309845, "learning_rate": 0.002828, "loss": 1.2058, "step": 362688 }, { "epoch": 27.22341463414634, "grad_norm": 0.5506671667098999, "learning_rate": 0.002828, "loss": 1.2008, "step": 362752 }, { "epoch": 27.228217636022514, "grad_norm": 0.6569094657897949, "learning_rate": 0.002828, "loss": 1.2003, "step": 362816 }, { "epoch": 27.233020637898687, "grad_norm": 0.5557685494422913, "learning_rate": 0.002828, "loss": 1.1955, "step": 362880 }, { "epoch": 27.23782363977486, "grad_norm": 0.6757054328918457, "learning_rate": 0.002828, "loss": 1.1967, "step": 362944 }, { "epoch": 27.242626641651032, "grad_norm": 0.5856550335884094, "learning_rate": 0.002828, "loss": 1.197, "step": 363008 }, { "epoch": 27.247429643527205, "grad_norm": 0.5449078679084778, "learning_rate": 0.002828, "loss": 1.1995, "step": 363072 }, { "epoch": 27.252232645403378, "grad_norm": 0.5528790354728699, "learning_rate": 0.002828, "loss": 1.2017, "step": 363136 }, { "epoch": 27.25703564727955, "grad_norm": 0.5473564863204956, "learning_rate": 0.002828, "loss": 1.1953, "step": 363200 }, { "epoch": 27.261838649155724, "grad_norm": 0.6807570457458496, "learning_rate": 0.002828, "loss": 1.2011, "step": 363264 }, { "epoch": 27.266641651031893, "grad_norm": 0.5755122303962708, "learning_rate": 0.002828, "loss": 1.201, "step": 363328 }, { "epoch": 27.271444652908066, "grad_norm": 0.6368680000305176, "learning_rate": 0.002828, "loss": 1.2027, "step": 363392 }, { "epoch": 27.27624765478424, "grad_norm": 0.555194079875946, "learning_rate": 0.002828, "loss": 1.2019, "step": 363456 }, { "epoch": 27.281050656660412, "grad_norm": 0.6025437712669373, "learning_rate": 0.002828, "loss": 1.1962, "step": 363520 }, { "epoch": 27.285853658536585, "grad_norm": 0.5380324721336365, "learning_rate": 0.002828, "loss": 1.196, "step": 363584 }, { "epoch": 27.290656660412758, "grad_norm": 0.6038591265678406, "learning_rate": 0.002828, "loss": 1.2011, "step": 363648 }, { "epoch": 27.29545966228893, "grad_norm": 0.5660392642021179, "learning_rate": 0.002828, "loss": 1.2017, "step": 363712 }, { "epoch": 27.300262664165103, "grad_norm": 0.590695858001709, "learning_rate": 0.002828, "loss": 1.1945, "step": 363776 }, { "epoch": 27.305065666041276, "grad_norm": 0.6372555494308472, "learning_rate": 0.002828, "loss": 1.2016, "step": 363840 }, { "epoch": 27.30986866791745, "grad_norm": 0.6135676503181458, "learning_rate": 0.002828, "loss": 1.1995, "step": 363904 }, { "epoch": 27.314671669793622, "grad_norm": 0.5026392340660095, "learning_rate": 0.002828, "loss": 1.2005, "step": 363968 }, { "epoch": 27.319474671669795, "grad_norm": 0.5486312508583069, "learning_rate": 0.002828, "loss": 1.2019, "step": 364032 }, { "epoch": 27.324277673545968, "grad_norm": 0.5584508776664734, "learning_rate": 0.002828, "loss": 1.2015, "step": 364096 }, { "epoch": 27.329080675422137, "grad_norm": 0.5603387951850891, "learning_rate": 0.002828, "loss": 1.2008, "step": 364160 }, { "epoch": 27.33388367729831, "grad_norm": 0.6798414587974548, "learning_rate": 0.002828, "loss": 1.1984, "step": 364224 }, { "epoch": 27.338686679174483, "grad_norm": 0.5990169048309326, "learning_rate": 0.002828, "loss": 1.2001, "step": 364288 }, { "epoch": 27.343489681050656, "grad_norm": 0.7844681739807129, "learning_rate": 0.002828, "loss": 1.1981, "step": 364352 }, { "epoch": 27.34829268292683, "grad_norm": 0.6317253708839417, "learning_rate": 0.002828, "loss": 1.2035, "step": 364416 }, { "epoch": 27.353095684803, "grad_norm": 0.5308550596237183, "learning_rate": 0.002828, "loss": 1.2005, "step": 364480 }, { "epoch": 27.357898686679174, "grad_norm": 0.4954928755760193, "learning_rate": 0.002828, "loss": 1.195, "step": 364544 }, { "epoch": 27.362701688555347, "grad_norm": 0.5386978387832642, "learning_rate": 0.002828, "loss": 1.203, "step": 364608 }, { "epoch": 27.36750469043152, "grad_norm": 0.5351986885070801, "learning_rate": 0.002828, "loss": 1.2036, "step": 364672 }, { "epoch": 27.372307692307693, "grad_norm": 0.5314650535583496, "learning_rate": 0.002828, "loss": 1.202, "step": 364736 }, { "epoch": 27.377110694183866, "grad_norm": 0.5204005837440491, "learning_rate": 0.002828, "loss": 1.202, "step": 364800 }, { "epoch": 27.38191369606004, "grad_norm": 0.6783261299133301, "learning_rate": 0.002828, "loss": 1.2035, "step": 364864 }, { "epoch": 27.38671669793621, "grad_norm": 0.678269624710083, "learning_rate": 0.002828, "loss": 1.2031, "step": 364928 }, { "epoch": 27.39151969981238, "grad_norm": 0.7005301713943481, "learning_rate": 0.002828, "loss": 1.2084, "step": 364992 }, { "epoch": 27.396322701688554, "grad_norm": 0.6017382740974426, "learning_rate": 0.002828, "loss": 1.2083, "step": 365056 }, { "epoch": 27.401125703564727, "grad_norm": 0.5842321515083313, "learning_rate": 0.002828, "loss": 1.2031, "step": 365120 }, { "epoch": 27.4059287054409, "grad_norm": 0.5812395811080933, "learning_rate": 0.002828, "loss": 1.2075, "step": 365184 }, { "epoch": 27.410731707317073, "grad_norm": 0.487456351518631, "learning_rate": 0.002828, "loss": 1.2042, "step": 365248 }, { "epoch": 27.415534709193246, "grad_norm": 0.5946784019470215, "learning_rate": 0.002828, "loss": 1.2007, "step": 365312 }, { "epoch": 27.42033771106942, "grad_norm": 0.6484962105751038, "learning_rate": 0.002828, "loss": 1.2049, "step": 365376 }, { "epoch": 27.42514071294559, "grad_norm": 0.5717827081680298, "learning_rate": 0.002828, "loss": 1.2045, "step": 365440 }, { "epoch": 27.429943714821764, "grad_norm": 0.5931734442710876, "learning_rate": 0.002828, "loss": 1.2047, "step": 365504 }, { "epoch": 27.434746716697937, "grad_norm": 0.5433608889579773, "learning_rate": 0.002828, "loss": 1.2075, "step": 365568 }, { "epoch": 27.43954971857411, "grad_norm": 0.5237091779708862, "learning_rate": 0.002828, "loss": 1.1959, "step": 365632 }, { "epoch": 27.444352720450283, "grad_norm": 0.5132204294204712, "learning_rate": 0.002828, "loss": 1.2055, "step": 365696 }, { "epoch": 27.449155722326456, "grad_norm": 0.7822837233543396, "learning_rate": 0.002828, "loss": 1.2055, "step": 365760 }, { "epoch": 27.453958724202625, "grad_norm": 0.4899812638759613, "learning_rate": 0.002828, "loss": 1.199, "step": 365824 }, { "epoch": 27.458761726078798, "grad_norm": 0.5946176648139954, "learning_rate": 0.002828, "loss": 1.1991, "step": 365888 }, { "epoch": 27.46356472795497, "grad_norm": 0.7705643177032471, "learning_rate": 0.002828, "loss": 1.208, "step": 365952 }, { "epoch": 27.468367729831144, "grad_norm": 0.5611868500709534, "learning_rate": 0.002828, "loss": 1.2011, "step": 366016 }, { "epoch": 27.473170731707317, "grad_norm": 0.6456145644187927, "learning_rate": 0.002828, "loss": 1.2046, "step": 366080 }, { "epoch": 27.47797373358349, "grad_norm": 0.5337197184562683, "learning_rate": 0.002828, "loss": 1.2025, "step": 366144 }, { "epoch": 27.482776735459662, "grad_norm": 0.6826463937759399, "learning_rate": 0.002828, "loss": 1.201, "step": 366208 }, { "epoch": 27.487579737335835, "grad_norm": 0.5261040329933167, "learning_rate": 0.002828, "loss": 1.2137, "step": 366272 }, { "epoch": 27.492382739212008, "grad_norm": 0.5058457255363464, "learning_rate": 0.002828, "loss": 1.207, "step": 366336 }, { "epoch": 27.49718574108818, "grad_norm": 0.6075417995452881, "learning_rate": 0.002828, "loss": 1.2058, "step": 366400 }, { "epoch": 27.501988742964354, "grad_norm": 0.5280016660690308, "learning_rate": 0.002828, "loss": 1.1983, "step": 366464 }, { "epoch": 27.506791744840527, "grad_norm": 0.6989396810531616, "learning_rate": 0.002828, "loss": 1.2057, "step": 366528 }, { "epoch": 27.511594746716696, "grad_norm": 0.5592902302742004, "learning_rate": 0.002828, "loss": 1.2045, "step": 366592 }, { "epoch": 27.51639774859287, "grad_norm": 0.6710501909255981, "learning_rate": 0.002828, "loss": 1.2068, "step": 366656 }, { "epoch": 27.521200750469042, "grad_norm": 0.590663492679596, "learning_rate": 0.002828, "loss": 1.2, "step": 366720 }, { "epoch": 27.526003752345215, "grad_norm": 0.5750949382781982, "learning_rate": 0.002828, "loss": 1.2102, "step": 366784 }, { "epoch": 27.530806754221388, "grad_norm": 0.592196524143219, "learning_rate": 0.002828, "loss": 1.2072, "step": 366848 }, { "epoch": 27.53560975609756, "grad_norm": 0.5525135397911072, "learning_rate": 0.002828, "loss": 1.2089, "step": 366912 }, { "epoch": 27.540412757973733, "grad_norm": 0.5719900131225586, "learning_rate": 0.002828, "loss": 1.2087, "step": 366976 }, { "epoch": 27.545215759849906, "grad_norm": 0.5599880218505859, "learning_rate": 0.002828, "loss": 1.2078, "step": 367040 }, { "epoch": 27.55001876172608, "grad_norm": 0.7876613736152649, "learning_rate": 0.002828, "loss": 1.2033, "step": 367104 }, { "epoch": 27.554821763602252, "grad_norm": 0.5568932294845581, "learning_rate": 0.002828, "loss": 1.2016, "step": 367168 }, { "epoch": 27.559624765478425, "grad_norm": 0.5835604667663574, "learning_rate": 0.002828, "loss": 1.2044, "step": 367232 }, { "epoch": 27.564427767354598, "grad_norm": 0.4952414631843567, "learning_rate": 0.002828, "loss": 1.2037, "step": 367296 }, { "epoch": 27.56923076923077, "grad_norm": 0.5456660389900208, "learning_rate": 0.002828, "loss": 1.198, "step": 367360 }, { "epoch": 27.57403377110694, "grad_norm": 0.5977687835693359, "learning_rate": 0.002828, "loss": 1.2115, "step": 367424 }, { "epoch": 27.578836772983113, "grad_norm": 0.5785269141197205, "learning_rate": 0.002828, "loss": 1.2126, "step": 367488 }, { "epoch": 27.583639774859286, "grad_norm": 0.6125280857086182, "learning_rate": 0.002828, "loss": 1.2084, "step": 367552 }, { "epoch": 27.58844277673546, "grad_norm": 0.5674769878387451, "learning_rate": 0.002828, "loss": 1.2078, "step": 367616 }, { "epoch": 27.59324577861163, "grad_norm": 0.5072876214981079, "learning_rate": 0.002828, "loss": 1.21, "step": 367680 }, { "epoch": 27.598048780487805, "grad_norm": 0.7092969417572021, "learning_rate": 0.002828, "loss": 1.2029, "step": 367744 }, { "epoch": 27.602851782363977, "grad_norm": 0.5517896413803101, "learning_rate": 0.002828, "loss": 1.2095, "step": 367808 }, { "epoch": 27.60765478424015, "grad_norm": 0.6972664594650269, "learning_rate": 0.002828, "loss": 1.2072, "step": 367872 }, { "epoch": 27.612457786116323, "grad_norm": 0.6762186288833618, "learning_rate": 0.002828, "loss": 1.2081, "step": 367936 }, { "epoch": 27.617260787992496, "grad_norm": 0.637596070766449, "learning_rate": 0.002828, "loss": 1.211, "step": 368000 }, { "epoch": 27.62206378986867, "grad_norm": 0.49644848704338074, "learning_rate": 0.002828, "loss": 1.2035, "step": 368064 }, { "epoch": 27.626866791744842, "grad_norm": 0.6612474322319031, "learning_rate": 0.002828, "loss": 1.2083, "step": 368128 }, { "epoch": 27.631669793621015, "grad_norm": 0.5150904655456543, "learning_rate": 0.002828, "loss": 1.2104, "step": 368192 }, { "epoch": 27.636472795497184, "grad_norm": 0.7277690768241882, "learning_rate": 0.002828, "loss": 1.2086, "step": 368256 }, { "epoch": 27.641275797373357, "grad_norm": 0.62581467628479, "learning_rate": 0.002828, "loss": 1.2106, "step": 368320 }, { "epoch": 27.64607879924953, "grad_norm": 0.7047035098075867, "learning_rate": 0.002828, "loss": 1.213, "step": 368384 }, { "epoch": 27.650881801125703, "grad_norm": 0.5907717347145081, "learning_rate": 0.002828, "loss": 1.2033, "step": 368448 }, { "epoch": 27.655684803001876, "grad_norm": 0.6000102162361145, "learning_rate": 0.002828, "loss": 1.2151, "step": 368512 }, { "epoch": 27.66048780487805, "grad_norm": 0.6553179025650024, "learning_rate": 0.002828, "loss": 1.2069, "step": 368576 }, { "epoch": 27.66529080675422, "grad_norm": 0.623139500617981, "learning_rate": 0.002828, "loss": 1.2105, "step": 368640 }, { "epoch": 27.670093808630394, "grad_norm": 0.6486320495605469, "learning_rate": 0.002828, "loss": 1.2103, "step": 368704 }, { "epoch": 27.674896810506567, "grad_norm": 0.5863367915153503, "learning_rate": 0.002828, "loss": 1.2074, "step": 368768 }, { "epoch": 27.67969981238274, "grad_norm": 0.5867400765419006, "learning_rate": 0.002828, "loss": 1.2076, "step": 368832 }, { "epoch": 27.684502814258913, "grad_norm": 0.6032609939575195, "learning_rate": 0.002828, "loss": 1.2093, "step": 368896 }, { "epoch": 27.689305816135086, "grad_norm": 0.5408675074577332, "learning_rate": 0.002828, "loss": 1.2117, "step": 368960 }, { "epoch": 27.69410881801126, "grad_norm": 0.7075996994972229, "learning_rate": 0.002828, "loss": 1.209, "step": 369024 }, { "epoch": 27.698911819887428, "grad_norm": 0.5513624548912048, "learning_rate": 0.002828, "loss": 1.2056, "step": 369088 }, { "epoch": 27.7037148217636, "grad_norm": 0.5250154137611389, "learning_rate": 0.002828, "loss": 1.2144, "step": 369152 }, { "epoch": 27.708517823639774, "grad_norm": 0.5961335301399231, "learning_rate": 0.002828, "loss": 1.2186, "step": 369216 }, { "epoch": 27.713320825515947, "grad_norm": 0.626379132270813, "learning_rate": 0.002828, "loss": 1.21, "step": 369280 }, { "epoch": 27.71812382739212, "grad_norm": 0.5551455616950989, "learning_rate": 0.002828, "loss": 1.204, "step": 369344 }, { "epoch": 27.722926829268292, "grad_norm": 0.5567622780799866, "learning_rate": 0.002828, "loss": 1.2046, "step": 369408 }, { "epoch": 27.727729831144465, "grad_norm": 0.6794117093086243, "learning_rate": 0.002828, "loss": 1.2087, "step": 369472 }, { "epoch": 27.732532833020638, "grad_norm": 0.6635667681694031, "learning_rate": 0.002828, "loss": 1.2106, "step": 369536 }, { "epoch": 27.73733583489681, "grad_norm": 0.5562478303909302, "learning_rate": 0.002828, "loss": 1.2108, "step": 369600 }, { "epoch": 27.742138836772984, "grad_norm": 0.6434085369110107, "learning_rate": 0.002828, "loss": 1.2013, "step": 369664 }, { "epoch": 27.746941838649157, "grad_norm": 0.7914767861366272, "learning_rate": 0.002828, "loss": 1.214, "step": 369728 }, { "epoch": 27.75174484052533, "grad_norm": 0.723877489566803, "learning_rate": 0.002828, "loss": 1.2139, "step": 369792 }, { "epoch": 27.756547842401503, "grad_norm": 0.5417585372924805, "learning_rate": 0.002828, "loss": 1.2147, "step": 369856 }, { "epoch": 27.761350844277672, "grad_norm": 0.5280811190605164, "learning_rate": 0.002828, "loss": 1.2093, "step": 369920 }, { "epoch": 27.766153846153845, "grad_norm": 0.5942422151565552, "learning_rate": 0.002828, "loss": 1.2131, "step": 369984 }, { "epoch": 27.770956848030018, "grad_norm": 0.5912095904350281, "learning_rate": 0.002828, "loss": 1.209, "step": 370048 }, { "epoch": 27.77575984990619, "grad_norm": 0.5693986415863037, "learning_rate": 0.002828, "loss": 1.2122, "step": 370112 }, { "epoch": 27.780562851782364, "grad_norm": 0.5609849691390991, "learning_rate": 0.002828, "loss": 1.2099, "step": 370176 }, { "epoch": 27.785365853658536, "grad_norm": 0.5266005992889404, "learning_rate": 0.002828, "loss": 1.2074, "step": 370240 }, { "epoch": 27.79016885553471, "grad_norm": 0.5555135011672974, "learning_rate": 0.002828, "loss": 1.218, "step": 370304 }, { "epoch": 27.794971857410882, "grad_norm": 0.6156359314918518, "learning_rate": 0.002828, "loss": 1.2109, "step": 370368 }, { "epoch": 27.799774859287055, "grad_norm": 0.5655944347381592, "learning_rate": 0.002828, "loss": 1.2076, "step": 370432 }, { "epoch": 27.804577861163228, "grad_norm": 0.4762551486492157, "learning_rate": 0.002828, "loss": 1.2109, "step": 370496 }, { "epoch": 27.8093808630394, "grad_norm": 0.5622091889381409, "learning_rate": 0.002828, "loss": 1.2161, "step": 370560 }, { "epoch": 27.814183864915574, "grad_norm": 0.6137254238128662, "learning_rate": 0.002828, "loss": 1.2119, "step": 370624 }, { "epoch": 27.818986866791747, "grad_norm": 0.6522278785705566, "learning_rate": 0.002828, "loss": 1.2092, "step": 370688 }, { "epoch": 27.823789868667916, "grad_norm": 0.683634340763092, "learning_rate": 0.002828, "loss": 1.2099, "step": 370752 }, { "epoch": 27.82859287054409, "grad_norm": 0.5084274411201477, "learning_rate": 0.002828, "loss": 1.2023, "step": 370816 }, { "epoch": 27.83339587242026, "grad_norm": 0.6315329074859619, "learning_rate": 0.002828, "loss": 1.2136, "step": 370880 }, { "epoch": 27.838198874296435, "grad_norm": 0.5127155780792236, "learning_rate": 0.002828, "loss": 1.2131, "step": 370944 }, { "epoch": 27.843001876172607, "grad_norm": 0.5103921890258789, "learning_rate": 0.002828, "loss": 1.2106, "step": 371008 }, { "epoch": 27.84780487804878, "grad_norm": 0.5688643455505371, "learning_rate": 0.002828, "loss": 1.2162, "step": 371072 }, { "epoch": 27.852607879924953, "grad_norm": 0.5613607168197632, "learning_rate": 0.002828, "loss": 1.2147, "step": 371136 }, { "epoch": 27.857410881801126, "grad_norm": 0.677702784538269, "learning_rate": 0.002828, "loss": 1.212, "step": 371200 }, { "epoch": 27.8622138836773, "grad_norm": 0.686955451965332, "learning_rate": 0.002828, "loss": 1.2093, "step": 371264 }, { "epoch": 27.867016885553472, "grad_norm": 0.6308797597885132, "learning_rate": 0.002828, "loss": 1.2121, "step": 371328 }, { "epoch": 27.871819887429645, "grad_norm": 0.5449283123016357, "learning_rate": 0.002828, "loss": 1.2182, "step": 371392 }, { "epoch": 27.876622889305818, "grad_norm": 0.6226169466972351, "learning_rate": 0.002828, "loss": 1.208, "step": 371456 }, { "epoch": 27.88142589118199, "grad_norm": 0.6177122592926025, "learning_rate": 0.002828, "loss": 1.2113, "step": 371520 }, { "epoch": 27.88622889305816, "grad_norm": 0.6782193183898926, "learning_rate": 0.002828, "loss": 1.2102, "step": 371584 }, { "epoch": 27.891031894934333, "grad_norm": 0.5172768831253052, "learning_rate": 0.002828, "loss": 1.207, "step": 371648 }, { "epoch": 27.895834896810506, "grad_norm": 0.5725329518318176, "learning_rate": 0.002828, "loss": 1.2118, "step": 371712 }, { "epoch": 27.90063789868668, "grad_norm": 0.5436069369316101, "learning_rate": 0.002828, "loss": 1.2157, "step": 371776 }, { "epoch": 27.90544090056285, "grad_norm": 0.5015886425971985, "learning_rate": 0.002828, "loss": 1.2092, "step": 371840 }, { "epoch": 27.910243902439024, "grad_norm": 0.6245830655097961, "learning_rate": 0.002828, "loss": 1.2167, "step": 371904 }, { "epoch": 27.915046904315197, "grad_norm": 0.548487663269043, "learning_rate": 0.002828, "loss": 1.2126, "step": 371968 }, { "epoch": 27.91984990619137, "grad_norm": 0.5911235809326172, "learning_rate": 0.002828, "loss": 1.2155, "step": 372032 }, { "epoch": 27.924652908067543, "grad_norm": 0.5632354021072388, "learning_rate": 0.002828, "loss": 1.2134, "step": 372096 }, { "epoch": 27.929455909943716, "grad_norm": 0.6129593849182129, "learning_rate": 0.002828, "loss": 1.2135, "step": 372160 }, { "epoch": 27.93425891181989, "grad_norm": 0.6318002939224243, "learning_rate": 0.002828, "loss": 1.2128, "step": 372224 }, { "epoch": 27.93906191369606, "grad_norm": 0.5950889587402344, "learning_rate": 0.002828, "loss": 1.2122, "step": 372288 }, { "epoch": 27.94386491557223, "grad_norm": 0.6795059442520142, "learning_rate": 0.002828, "loss": 1.2124, "step": 372352 }, { "epoch": 27.948667917448404, "grad_norm": 0.5208553075790405, "learning_rate": 0.002828, "loss": 1.2081, "step": 372416 }, { "epoch": 27.953470919324577, "grad_norm": 0.6146217584609985, "learning_rate": 0.002828, "loss": 1.2164, "step": 372480 }, { "epoch": 27.95827392120075, "grad_norm": 0.5160702466964722, "learning_rate": 0.002828, "loss": 1.2105, "step": 372544 }, { "epoch": 27.963076923076922, "grad_norm": 0.5801197290420532, "learning_rate": 0.002828, "loss": 1.2157, "step": 372608 }, { "epoch": 27.967879924953095, "grad_norm": 0.6412590146064758, "learning_rate": 0.002828, "loss": 1.2167, "step": 372672 }, { "epoch": 27.97268292682927, "grad_norm": 0.5855358242988586, "learning_rate": 0.002828, "loss": 1.2114, "step": 372736 }, { "epoch": 27.97748592870544, "grad_norm": 0.6856404542922974, "learning_rate": 0.002828, "loss": 1.2062, "step": 372800 }, { "epoch": 27.982288930581614, "grad_norm": 0.5524616837501526, "learning_rate": 0.002828, "loss": 1.2135, "step": 372864 }, { "epoch": 27.987091932457787, "grad_norm": 0.5954063534736633, "learning_rate": 0.002828, "loss": 1.2121, "step": 372928 }, { "epoch": 27.99189493433396, "grad_norm": 0.6046544909477234, "learning_rate": 0.002828, "loss": 1.2097, "step": 372992 }, { "epoch": 27.996697936210133, "grad_norm": 0.6298834681510925, "learning_rate": 0.002828, "loss": 1.2152, "step": 373056 }, { "epoch": 28.001500938086306, "grad_norm": 0.617692232131958, "learning_rate": 0.002828, "loss": 1.1993, "step": 373120 }, { "epoch": 28.006303939962475, "grad_norm": 0.5413621068000793, "learning_rate": 0.002828, "loss": 1.1748, "step": 373184 }, { "epoch": 28.011106941838648, "grad_norm": 0.6742262840270996, "learning_rate": 0.002828, "loss": 1.1814, "step": 373248 }, { "epoch": 28.01590994371482, "grad_norm": 0.5467806458473206, "learning_rate": 0.002828, "loss": 1.1843, "step": 373312 }, { "epoch": 28.020712945590994, "grad_norm": 0.578707754611969, "learning_rate": 0.002828, "loss": 1.1711, "step": 373376 }, { "epoch": 28.025515947467166, "grad_norm": 0.5677569508552551, "learning_rate": 0.002828, "loss": 1.1773, "step": 373440 }, { "epoch": 28.03031894934334, "grad_norm": 0.5994061231613159, "learning_rate": 0.002828, "loss": 1.1742, "step": 373504 }, { "epoch": 28.035121951219512, "grad_norm": 0.5310932993888855, "learning_rate": 0.002828, "loss": 1.1836, "step": 373568 }, { "epoch": 28.039924953095685, "grad_norm": 0.7003358006477356, "learning_rate": 0.002828, "loss": 1.1692, "step": 373632 }, { "epoch": 28.044727954971858, "grad_norm": 0.6028397083282471, "learning_rate": 0.002828, "loss": 1.1781, "step": 373696 }, { "epoch": 28.04953095684803, "grad_norm": 0.6095808148384094, "learning_rate": 0.002828, "loss": 1.1811, "step": 373760 }, { "epoch": 28.054333958724204, "grad_norm": 0.49688461422920227, "learning_rate": 0.002828, "loss": 1.1813, "step": 373824 }, { "epoch": 28.059136960600377, "grad_norm": 0.7206149697303772, "learning_rate": 0.002828, "loss": 1.1772, "step": 373888 }, { "epoch": 28.06393996247655, "grad_norm": 0.5841058492660522, "learning_rate": 0.002828, "loss": 1.1827, "step": 373952 }, { "epoch": 28.06874296435272, "grad_norm": 0.7009063363075256, "learning_rate": 0.002828, "loss": 1.1806, "step": 374016 }, { "epoch": 28.07354596622889, "grad_norm": 0.562809407711029, "learning_rate": 0.002828, "loss": 1.1794, "step": 374080 }, { "epoch": 28.078348968105065, "grad_norm": 0.6682745218276978, "learning_rate": 0.002828, "loss": 1.1766, "step": 374144 }, { "epoch": 28.083151969981238, "grad_norm": 0.6088096499443054, "learning_rate": 0.002828, "loss": 1.1808, "step": 374208 }, { "epoch": 28.08795497185741, "grad_norm": 0.640619695186615, "learning_rate": 0.002828, "loss": 1.1807, "step": 374272 }, { "epoch": 28.092757973733583, "grad_norm": 0.6709964871406555, "learning_rate": 0.002828, "loss": 1.1827, "step": 374336 }, { "epoch": 28.097560975609756, "grad_norm": 0.7114203572273254, "learning_rate": 0.002828, "loss": 1.1807, "step": 374400 }, { "epoch": 28.10236397748593, "grad_norm": 0.5254417657852173, "learning_rate": 0.002828, "loss": 1.1875, "step": 374464 }, { "epoch": 28.107166979362102, "grad_norm": 0.5190957188606262, "learning_rate": 0.002828, "loss": 1.1773, "step": 374528 }, { "epoch": 28.111969981238275, "grad_norm": 0.6001734137535095, "learning_rate": 0.002828, "loss": 1.1761, "step": 374592 }, { "epoch": 28.116772983114448, "grad_norm": 0.7337915897369385, "learning_rate": 0.002828, "loss": 1.1826, "step": 374656 }, { "epoch": 28.12157598499062, "grad_norm": 0.5772360563278198, "learning_rate": 0.002828, "loss": 1.1814, "step": 374720 }, { "epoch": 28.126378986866793, "grad_norm": 0.8065518140792847, "learning_rate": 0.002828, "loss": 1.1846, "step": 374784 }, { "epoch": 28.131181988742963, "grad_norm": 0.7258828282356262, "learning_rate": 0.002828, "loss": 1.1804, "step": 374848 }, { "epoch": 28.135984990619136, "grad_norm": 0.624515950679779, "learning_rate": 0.002828, "loss": 1.1781, "step": 374912 }, { "epoch": 28.14078799249531, "grad_norm": 0.611997127532959, "learning_rate": 0.002828, "loss": 1.1832, "step": 374976 }, { "epoch": 28.14559099437148, "grad_norm": 0.5260868668556213, "learning_rate": 0.002828, "loss": 1.1804, "step": 375040 }, { "epoch": 28.150393996247654, "grad_norm": 0.6274808049201965, "learning_rate": 0.002828, "loss": 1.185, "step": 375104 }, { "epoch": 28.155196998123827, "grad_norm": 0.6832979917526245, "learning_rate": 0.002828, "loss": 1.1808, "step": 375168 }, { "epoch": 28.16, "grad_norm": 0.5438277125358582, "learning_rate": 0.002828, "loss": 1.1827, "step": 375232 }, { "epoch": 28.164803001876173, "grad_norm": 0.5315305590629578, "learning_rate": 0.002828, "loss": 1.1837, "step": 375296 }, { "epoch": 28.169606003752346, "grad_norm": 0.5696422457695007, "learning_rate": 0.002828, "loss": 1.1862, "step": 375360 }, { "epoch": 28.17440900562852, "grad_norm": 0.5869351625442505, "learning_rate": 0.002828, "loss": 1.185, "step": 375424 }, { "epoch": 28.17921200750469, "grad_norm": 0.5588424205780029, "learning_rate": 0.002828, "loss": 1.1894, "step": 375488 }, { "epoch": 28.184015009380865, "grad_norm": 0.512924313545227, "learning_rate": 0.002828, "loss": 1.1883, "step": 375552 }, { "epoch": 28.188818011257034, "grad_norm": 0.6198523044586182, "learning_rate": 0.002828, "loss": 1.1939, "step": 375616 }, { "epoch": 28.193621013133207, "grad_norm": 0.766189455986023, "learning_rate": 0.002828, "loss": 1.1935, "step": 375680 }, { "epoch": 28.19842401500938, "grad_norm": 0.48696544766426086, "learning_rate": 0.002828, "loss": 1.1843, "step": 375744 }, { "epoch": 28.203227016885553, "grad_norm": 0.5477736592292786, "learning_rate": 0.002828, "loss": 1.1829, "step": 375808 }, { "epoch": 28.208030018761725, "grad_norm": 0.6221043467521667, "learning_rate": 0.002828, "loss": 1.1885, "step": 375872 }, { "epoch": 28.2128330206379, "grad_norm": 0.5639182329177856, "learning_rate": 0.002828, "loss": 1.1826, "step": 375936 }, { "epoch": 28.21763602251407, "grad_norm": 0.6532436609268188, "learning_rate": 0.002828, "loss": 1.1884, "step": 376000 }, { "epoch": 28.222439024390244, "grad_norm": 0.5616109371185303, "learning_rate": 0.002828, "loss": 1.1903, "step": 376064 }, { "epoch": 28.227242026266417, "grad_norm": 0.600661039352417, "learning_rate": 0.002828, "loss": 1.1866, "step": 376128 }, { "epoch": 28.23204502814259, "grad_norm": 0.652381420135498, "learning_rate": 0.002828, "loss": 1.1857, "step": 376192 }, { "epoch": 28.236848030018763, "grad_norm": 0.7465491890907288, "learning_rate": 0.002828, "loss": 1.188, "step": 376256 }, { "epoch": 28.241651031894936, "grad_norm": 0.6826187372207642, "learning_rate": 0.002828, "loss": 1.189, "step": 376320 }, { "epoch": 28.24645403377111, "grad_norm": 0.5452209711074829, "learning_rate": 0.002828, "loss": 1.185, "step": 376384 }, { "epoch": 28.251257035647278, "grad_norm": 0.6479519605636597, "learning_rate": 0.002828, "loss": 1.1894, "step": 376448 }, { "epoch": 28.25606003752345, "grad_norm": 0.7790426015853882, "learning_rate": 0.002828, "loss": 1.1889, "step": 376512 }, { "epoch": 28.260863039399624, "grad_norm": 0.5609104633331299, "learning_rate": 0.002828, "loss": 1.1846, "step": 376576 }, { "epoch": 28.265666041275797, "grad_norm": 0.612287700176239, "learning_rate": 0.002828, "loss": 1.1909, "step": 376640 }, { "epoch": 28.27046904315197, "grad_norm": 0.5369693636894226, "learning_rate": 0.002828, "loss": 1.1915, "step": 376704 }, { "epoch": 28.275272045028142, "grad_norm": 0.5666752457618713, "learning_rate": 0.002828, "loss": 1.1853, "step": 376768 }, { "epoch": 28.280075046904315, "grad_norm": 0.5566587448120117, "learning_rate": 0.002828, "loss": 1.1899, "step": 376832 }, { "epoch": 28.284878048780488, "grad_norm": 0.6081478595733643, "learning_rate": 0.002828, "loss": 1.1903, "step": 376896 }, { "epoch": 28.28968105065666, "grad_norm": 0.5504257082939148, "learning_rate": 0.002828, "loss": 1.195, "step": 376960 }, { "epoch": 28.294484052532834, "grad_norm": 0.5835635662078857, "learning_rate": 0.002828, "loss": 1.1859, "step": 377024 }, { "epoch": 28.299287054409007, "grad_norm": 0.5684508681297302, "learning_rate": 0.002828, "loss": 1.1919, "step": 377088 }, { "epoch": 28.30409005628518, "grad_norm": 0.6690579056739807, "learning_rate": 0.002828, "loss": 1.1893, "step": 377152 }, { "epoch": 28.308893058161352, "grad_norm": 0.6685102581977844, "learning_rate": 0.002828, "loss": 1.1895, "step": 377216 }, { "epoch": 28.313696060037522, "grad_norm": 0.5561646819114685, "learning_rate": 0.002828, "loss": 1.1918, "step": 377280 }, { "epoch": 28.318499061913695, "grad_norm": 0.47592681646347046, "learning_rate": 0.002828, "loss": 1.1851, "step": 377344 }, { "epoch": 28.323302063789868, "grad_norm": 0.5404749512672424, "learning_rate": 0.002828, "loss": 1.1914, "step": 377408 }, { "epoch": 28.32810506566604, "grad_norm": 0.5822522640228271, "learning_rate": 0.002828, "loss": 1.1938, "step": 377472 }, { "epoch": 28.332908067542213, "grad_norm": 0.5612675547599792, "learning_rate": 0.002828, "loss": 1.19, "step": 377536 }, { "epoch": 28.337711069418386, "grad_norm": 0.5781949758529663, "learning_rate": 0.002828, "loss": 1.1933, "step": 377600 }, { "epoch": 28.34251407129456, "grad_norm": 0.6771215200424194, "learning_rate": 0.002828, "loss": 1.1916, "step": 377664 }, { "epoch": 28.347317073170732, "grad_norm": 0.5959840416908264, "learning_rate": 0.002828, "loss": 1.1925, "step": 377728 }, { "epoch": 28.352120075046905, "grad_norm": 0.5531420111656189, "learning_rate": 0.002828, "loss": 1.1904, "step": 377792 }, { "epoch": 28.356923076923078, "grad_norm": 0.638388454914093, "learning_rate": 0.002828, "loss": 1.187, "step": 377856 }, { "epoch": 28.36172607879925, "grad_norm": 0.6081584095954895, "learning_rate": 0.002828, "loss": 1.184, "step": 377920 }, { "epoch": 28.366529080675424, "grad_norm": 0.6064649820327759, "learning_rate": 0.002828, "loss": 1.1913, "step": 377984 }, { "epoch": 28.371332082551596, "grad_norm": 0.48571982979774475, "learning_rate": 0.002828, "loss": 1.1862, "step": 378048 }, { "epoch": 28.376135084427766, "grad_norm": 0.5620942115783691, "learning_rate": 0.002828, "loss": 1.1951, "step": 378112 }, { "epoch": 28.38093808630394, "grad_norm": 0.674046516418457, "learning_rate": 0.002828, "loss": 1.1904, "step": 378176 }, { "epoch": 28.38574108818011, "grad_norm": 0.5276687145233154, "learning_rate": 0.002828, "loss": 1.1923, "step": 378240 }, { "epoch": 28.390544090056284, "grad_norm": 0.5791585445404053, "learning_rate": 0.002828, "loss": 1.1916, "step": 378304 }, { "epoch": 28.395347091932457, "grad_norm": 0.6942904591560364, "learning_rate": 0.002828, "loss": 1.1876, "step": 378368 }, { "epoch": 28.40015009380863, "grad_norm": 0.5368022918701172, "learning_rate": 0.002828, "loss": 1.1934, "step": 378432 }, { "epoch": 28.404953095684803, "grad_norm": 0.5756661295890808, "learning_rate": 0.002828, "loss": 1.1904, "step": 378496 }, { "epoch": 28.409756097560976, "grad_norm": 0.5052250027656555, "learning_rate": 0.002828, "loss": 1.1951, "step": 378560 }, { "epoch": 28.41455909943715, "grad_norm": 0.5766377449035645, "learning_rate": 0.002828, "loss": 1.1868, "step": 378624 }, { "epoch": 28.41936210131332, "grad_norm": 0.5014985799789429, "learning_rate": 0.002828, "loss": 1.193, "step": 378688 }, { "epoch": 28.424165103189495, "grad_norm": 0.5464946627616882, "learning_rate": 0.002828, "loss": 1.1926, "step": 378752 }, { "epoch": 28.428968105065668, "grad_norm": 0.7980459928512573, "learning_rate": 0.002828, "loss": 1.1949, "step": 378816 }, { "epoch": 28.43377110694184, "grad_norm": 0.5736128687858582, "learning_rate": 0.002828, "loss": 1.1987, "step": 378880 }, { "epoch": 28.43857410881801, "grad_norm": 0.6616801619529724, "learning_rate": 0.002828, "loss": 1.1913, "step": 378944 }, { "epoch": 28.443377110694183, "grad_norm": 0.587303638458252, "learning_rate": 0.002828, "loss": 1.1915, "step": 379008 }, { "epoch": 28.448180112570356, "grad_norm": 0.6374856233596802, "learning_rate": 0.002828, "loss": 1.1945, "step": 379072 }, { "epoch": 28.45298311444653, "grad_norm": 0.5934728384017944, "learning_rate": 0.002828, "loss": 1.1957, "step": 379136 }, { "epoch": 28.4577861163227, "grad_norm": 0.6261712312698364, "learning_rate": 0.002828, "loss": 1.187, "step": 379200 }, { "epoch": 28.462589118198874, "grad_norm": 0.6582685708999634, "learning_rate": 0.002828, "loss": 1.1984, "step": 379264 }, { "epoch": 28.467392120075047, "grad_norm": 0.6693552732467651, "learning_rate": 0.002828, "loss": 1.1932, "step": 379328 }, { "epoch": 28.47219512195122, "grad_norm": 0.5348833799362183, "learning_rate": 0.002828, "loss": 1.1854, "step": 379392 }, { "epoch": 28.476998123827393, "grad_norm": 0.5958510637283325, "learning_rate": 0.002828, "loss": 1.1896, "step": 379456 }, { "epoch": 28.481801125703566, "grad_norm": 0.5670205354690552, "learning_rate": 0.002828, "loss": 1.1949, "step": 379520 }, { "epoch": 28.48660412757974, "grad_norm": 0.5278819799423218, "learning_rate": 0.002828, "loss": 1.196, "step": 379584 }, { "epoch": 28.49140712945591, "grad_norm": 0.5390208959579468, "learning_rate": 0.002828, "loss": 1.1921, "step": 379648 }, { "epoch": 28.49621013133208, "grad_norm": 0.5674098134040833, "learning_rate": 0.002828, "loss": 1.1896, "step": 379712 }, { "epoch": 28.501013133208254, "grad_norm": 0.5298190116882324, "learning_rate": 0.002828, "loss": 1.1914, "step": 379776 }, { "epoch": 28.505816135084427, "grad_norm": 0.6678189039230347, "learning_rate": 0.002828, "loss": 1.1932, "step": 379840 }, { "epoch": 28.5106191369606, "grad_norm": 0.5374751091003418, "learning_rate": 0.002828, "loss": 1.1878, "step": 379904 }, { "epoch": 28.515422138836772, "grad_norm": 0.5720387697219849, "learning_rate": 0.002828, "loss": 1.1934, "step": 379968 }, { "epoch": 28.520225140712945, "grad_norm": 0.6150330901145935, "learning_rate": 0.002828, "loss": 1.1957, "step": 380032 }, { "epoch": 28.525028142589118, "grad_norm": 0.6207433938980103, "learning_rate": 0.002828, "loss": 1.1949, "step": 380096 }, { "epoch": 28.52983114446529, "grad_norm": 0.7584127187728882, "learning_rate": 0.002828, "loss": 1.1899, "step": 380160 }, { "epoch": 28.534634146341464, "grad_norm": 0.5574619174003601, "learning_rate": 0.002828, "loss": 1.197, "step": 380224 }, { "epoch": 28.539437148217637, "grad_norm": 0.6886953115463257, "learning_rate": 0.002828, "loss": 1.189, "step": 380288 }, { "epoch": 28.54424015009381, "grad_norm": 0.6965809464454651, "learning_rate": 0.002828, "loss": 1.1957, "step": 380352 }, { "epoch": 28.549043151969983, "grad_norm": 0.6613625288009644, "learning_rate": 0.002828, "loss": 1.2169, "step": 380416 }, { "epoch": 28.553846153846155, "grad_norm": 0.7155604958534241, "learning_rate": 0.002828, "loss": 1.199, "step": 380480 }, { "epoch": 28.558649155722325, "grad_norm": 0.7222745418548584, "learning_rate": 0.002828, "loss": 1.1983, "step": 380544 }, { "epoch": 28.563452157598498, "grad_norm": 0.5591253042221069, "learning_rate": 0.002828, "loss": 1.202, "step": 380608 }, { "epoch": 28.56825515947467, "grad_norm": 0.7596259117126465, "learning_rate": 0.002828, "loss": 1.2022, "step": 380672 }, { "epoch": 28.573058161350843, "grad_norm": 0.5827973484992981, "learning_rate": 0.002828, "loss": 1.1981, "step": 380736 }, { "epoch": 28.577861163227016, "grad_norm": 0.6316086649894714, "learning_rate": 0.002828, "loss": 1.1995, "step": 380800 }, { "epoch": 28.58266416510319, "grad_norm": 0.5625211000442505, "learning_rate": 0.002828, "loss": 1.1939, "step": 380864 }, { "epoch": 28.587467166979362, "grad_norm": 0.6687682271003723, "learning_rate": 0.002828, "loss": 1.1999, "step": 380928 }, { "epoch": 28.592270168855535, "grad_norm": 0.6526170969009399, "learning_rate": 0.002828, "loss": 1.1969, "step": 380992 }, { "epoch": 28.597073170731708, "grad_norm": 0.6328181028366089, "learning_rate": 0.002828, "loss": 1.1953, "step": 381056 }, { "epoch": 28.60187617260788, "grad_norm": 0.8068972826004028, "learning_rate": 0.002828, "loss": 1.1942, "step": 381120 }, { "epoch": 28.606679174484054, "grad_norm": 0.45512983202934265, "learning_rate": 0.002828, "loss": 1.2051, "step": 381184 }, { "epoch": 28.611482176360227, "grad_norm": 0.5556621551513672, "learning_rate": 0.002828, "loss": 1.1977, "step": 381248 }, { "epoch": 28.6162851782364, "grad_norm": 0.6210782527923584, "learning_rate": 0.002828, "loss": 1.1963, "step": 381312 }, { "epoch": 28.62108818011257, "grad_norm": 0.5571366548538208, "learning_rate": 0.002828, "loss": 1.1948, "step": 381376 }, { "epoch": 28.62589118198874, "grad_norm": 0.6097261309623718, "learning_rate": 0.002828, "loss": 1.1938, "step": 381440 }, { "epoch": 28.630694183864914, "grad_norm": 0.5357059240341187, "learning_rate": 0.002828, "loss": 1.2026, "step": 381504 }, { "epoch": 28.635497185741087, "grad_norm": 0.5957450866699219, "learning_rate": 0.002828, "loss": 1.1954, "step": 381568 }, { "epoch": 28.64030018761726, "grad_norm": 0.6477972269058228, "learning_rate": 0.002828, "loss": 1.1953, "step": 381632 }, { "epoch": 28.645103189493433, "grad_norm": 0.5469118356704712, "learning_rate": 0.002828, "loss": 1.1959, "step": 381696 }, { "epoch": 28.649906191369606, "grad_norm": 0.5682870745658875, "learning_rate": 0.002828, "loss": 1.1966, "step": 381760 }, { "epoch": 28.65470919324578, "grad_norm": 0.5928640365600586, "learning_rate": 0.002828, "loss": 1.1945, "step": 381824 }, { "epoch": 28.659512195121952, "grad_norm": 0.6658616662025452, "learning_rate": 0.002828, "loss": 1.1969, "step": 381888 }, { "epoch": 28.664315196998125, "grad_norm": 0.733881413936615, "learning_rate": 0.002828, "loss": 1.2032, "step": 381952 }, { "epoch": 28.669118198874298, "grad_norm": 0.5383415222167969, "learning_rate": 0.002828, "loss": 1.1996, "step": 382016 }, { "epoch": 28.67392120075047, "grad_norm": 0.6444189548492432, "learning_rate": 0.002828, "loss": 1.1998, "step": 382080 }, { "epoch": 28.678724202626643, "grad_norm": 0.5679410696029663, "learning_rate": 0.002828, "loss": 1.2054, "step": 382144 }, { "epoch": 28.683527204502813, "grad_norm": 0.7500342726707458, "learning_rate": 0.002828, "loss": 1.1948, "step": 382208 }, { "epoch": 28.688330206378986, "grad_norm": 0.5816766619682312, "learning_rate": 0.002828, "loss": 1.1937, "step": 382272 }, { "epoch": 28.69313320825516, "grad_norm": 0.7372063398361206, "learning_rate": 0.002828, "loss": 1.2074, "step": 382336 }, { "epoch": 28.69793621013133, "grad_norm": 0.5986203551292419, "learning_rate": 0.002828, "loss": 1.1959, "step": 382400 }, { "epoch": 28.702739212007504, "grad_norm": 0.7330045104026794, "learning_rate": 0.002828, "loss": 1.1967, "step": 382464 }, { "epoch": 28.707542213883677, "grad_norm": 0.5793071985244751, "learning_rate": 0.002828, "loss": 1.1961, "step": 382528 }, { "epoch": 28.71234521575985, "grad_norm": 0.6789469718933105, "learning_rate": 0.002828, "loss": 1.1986, "step": 382592 }, { "epoch": 28.717148217636023, "grad_norm": 0.6991356015205383, "learning_rate": 0.002828, "loss": 1.2007, "step": 382656 }, { "epoch": 28.721951219512196, "grad_norm": 0.4840135872364044, "learning_rate": 0.002828, "loss": 1.1987, "step": 382720 }, { "epoch": 28.72675422138837, "grad_norm": 0.5444275736808777, "learning_rate": 0.002828, "loss": 1.1973, "step": 382784 }, { "epoch": 28.73155722326454, "grad_norm": 0.6061873435974121, "learning_rate": 0.002828, "loss": 1.1999, "step": 382848 }, { "epoch": 28.736360225140714, "grad_norm": 0.5796545743942261, "learning_rate": 0.002828, "loss": 1.1985, "step": 382912 }, { "epoch": 28.741163227016884, "grad_norm": 0.5784839391708374, "learning_rate": 0.002828, "loss": 1.1993, "step": 382976 }, { "epoch": 28.745966228893057, "grad_norm": 0.6885812282562256, "learning_rate": 0.002828, "loss": 1.1995, "step": 383040 }, { "epoch": 28.75076923076923, "grad_norm": 0.6657517552375793, "learning_rate": 0.002828, "loss": 1.1992, "step": 383104 }, { "epoch": 28.755572232645402, "grad_norm": 0.5068328380584717, "learning_rate": 0.002828, "loss": 1.1941, "step": 383168 }, { "epoch": 28.760375234521575, "grad_norm": 0.5651753544807434, "learning_rate": 0.002828, "loss": 1.2008, "step": 383232 }, { "epoch": 28.765178236397748, "grad_norm": 0.5559189319610596, "learning_rate": 0.002828, "loss": 1.1995, "step": 383296 }, { "epoch": 28.76998123827392, "grad_norm": 0.6028053760528564, "learning_rate": 0.002828, "loss": 1.1993, "step": 383360 }, { "epoch": 28.774784240150094, "grad_norm": 0.6071565747261047, "learning_rate": 0.002828, "loss": 1.1979, "step": 383424 }, { "epoch": 28.779587242026267, "grad_norm": 0.6035954356193542, "learning_rate": 0.002828, "loss": 1.1982, "step": 383488 }, { "epoch": 28.78439024390244, "grad_norm": 0.6009169816970825, "learning_rate": 0.002828, "loss": 1.2044, "step": 383552 }, { "epoch": 28.789193245778613, "grad_norm": 0.5744941830635071, "learning_rate": 0.002828, "loss": 1.2034, "step": 383616 }, { "epoch": 28.793996247654785, "grad_norm": 0.748096764087677, "learning_rate": 0.002828, "loss": 1.2062, "step": 383680 }, { "epoch": 28.79879924953096, "grad_norm": 0.5846696496009827, "learning_rate": 0.002828, "loss": 1.2016, "step": 383744 }, { "epoch": 28.803602251407128, "grad_norm": 0.6742605566978455, "learning_rate": 0.002828, "loss": 1.1999, "step": 383808 }, { "epoch": 28.8084052532833, "grad_norm": 0.5580283999443054, "learning_rate": 0.002828, "loss": 1.1971, "step": 383872 }, { "epoch": 28.813208255159473, "grad_norm": 0.6639071106910706, "learning_rate": 0.002828, "loss": 1.2022, "step": 383936 }, { "epoch": 28.818011257035646, "grad_norm": 0.5421018600463867, "learning_rate": 0.002828, "loss": 1.2019, "step": 384000 }, { "epoch": 28.82281425891182, "grad_norm": 0.6587681174278259, "learning_rate": 0.002828, "loss": 1.2022, "step": 384064 }, { "epoch": 28.827617260787992, "grad_norm": 0.5655040144920349, "learning_rate": 0.002828, "loss": 1.202, "step": 384128 }, { "epoch": 28.832420262664165, "grad_norm": 0.6196697354316711, "learning_rate": 0.002828, "loss": 1.2007, "step": 384192 }, { "epoch": 28.837223264540338, "grad_norm": 0.716361939907074, "learning_rate": 0.002828, "loss": 1.192, "step": 384256 }, { "epoch": 28.84202626641651, "grad_norm": 0.5832474827766418, "learning_rate": 0.002828, "loss": 1.1963, "step": 384320 }, { "epoch": 28.846829268292684, "grad_norm": 0.5728902220726013, "learning_rate": 0.002828, "loss": 1.2075, "step": 384384 }, { "epoch": 28.851632270168857, "grad_norm": 0.6255494952201843, "learning_rate": 0.002828, "loss": 1.1992, "step": 384448 }, { "epoch": 28.85643527204503, "grad_norm": 0.5721420645713806, "learning_rate": 0.002828, "loss": 1.2027, "step": 384512 }, { "epoch": 28.861238273921202, "grad_norm": 0.594143807888031, "learning_rate": 0.002828, "loss": 1.2043, "step": 384576 }, { "epoch": 28.86604127579737, "grad_norm": 0.6351729035377502, "learning_rate": 0.002828, "loss": 1.2026, "step": 384640 }, { "epoch": 28.870844277673545, "grad_norm": 0.5315951704978943, "learning_rate": 0.002828, "loss": 1.2042, "step": 384704 }, { "epoch": 28.875647279549717, "grad_norm": 0.6394718885421753, "learning_rate": 0.002828, "loss": 1.2012, "step": 384768 }, { "epoch": 28.88045028142589, "grad_norm": 0.6304981708526611, "learning_rate": 0.002828, "loss": 1.2001, "step": 384832 }, { "epoch": 28.885253283302063, "grad_norm": 0.6619015336036682, "learning_rate": 0.002828, "loss": 1.2032, "step": 384896 }, { "epoch": 28.890056285178236, "grad_norm": 0.6189813017845154, "learning_rate": 0.002828, "loss": 1.2033, "step": 384960 }, { "epoch": 28.89485928705441, "grad_norm": 0.5804136395454407, "learning_rate": 0.002828, "loss": 1.2039, "step": 385024 }, { "epoch": 28.899662288930582, "grad_norm": 0.5444638133049011, "learning_rate": 0.002828, "loss": 1.2017, "step": 385088 }, { "epoch": 28.904465290806755, "grad_norm": 0.6017876863479614, "learning_rate": 0.002828, "loss": 1.2043, "step": 385152 }, { "epoch": 28.909268292682928, "grad_norm": 0.5880628824234009, "learning_rate": 0.002828, "loss": 1.2009, "step": 385216 }, { "epoch": 28.9140712945591, "grad_norm": 0.5595766305923462, "learning_rate": 0.002828, "loss": 1.1918, "step": 385280 }, { "epoch": 28.918874296435273, "grad_norm": 0.5918551087379456, "learning_rate": 0.002828, "loss": 1.2037, "step": 385344 }, { "epoch": 28.923677298311446, "grad_norm": 0.5559405088424683, "learning_rate": 0.002828, "loss": 1.2015, "step": 385408 }, { "epoch": 28.928480300187616, "grad_norm": 0.4949721693992615, "learning_rate": 0.002828, "loss": 1.2027, "step": 385472 }, { "epoch": 28.93328330206379, "grad_norm": 0.6168365478515625, "learning_rate": 0.002828, "loss": 1.2004, "step": 385536 }, { "epoch": 28.93808630393996, "grad_norm": 0.7354480624198914, "learning_rate": 0.002828, "loss": 1.2064, "step": 385600 }, { "epoch": 28.942889305816134, "grad_norm": 0.5195580720901489, "learning_rate": 0.002828, "loss": 1.1999, "step": 385664 }, { "epoch": 28.947692307692307, "grad_norm": 0.6821034550666809, "learning_rate": 0.002828, "loss": 1.2013, "step": 385728 }, { "epoch": 28.95249530956848, "grad_norm": 0.5596335530281067, "learning_rate": 0.002828, "loss": 1.1978, "step": 385792 }, { "epoch": 28.957298311444653, "grad_norm": 0.6192792057991028, "learning_rate": 0.002828, "loss": 1.1977, "step": 385856 }, { "epoch": 28.962101313320826, "grad_norm": 0.6030036211013794, "learning_rate": 0.002828, "loss": 1.2033, "step": 385920 }, { "epoch": 28.966904315197, "grad_norm": 0.728481650352478, "learning_rate": 0.002828, "loss": 1.2045, "step": 385984 }, { "epoch": 28.97170731707317, "grad_norm": 0.5132448077201843, "learning_rate": 0.002828, "loss": 1.1986, "step": 386048 }, { "epoch": 28.976510318949344, "grad_norm": 0.6252719163894653, "learning_rate": 0.002828, "loss": 1.1988, "step": 386112 }, { "epoch": 28.981313320825517, "grad_norm": 0.5599966645240784, "learning_rate": 0.002828, "loss": 1.1967, "step": 386176 }, { "epoch": 28.98611632270169, "grad_norm": 0.6355006098747253, "learning_rate": 0.002828, "loss": 1.205, "step": 386240 }, { "epoch": 28.99091932457786, "grad_norm": 0.5937554240226746, "learning_rate": 0.002828, "loss": 1.206, "step": 386304 }, { "epoch": 28.995722326454032, "grad_norm": 0.5503448843955994, "learning_rate": 0.002828, "loss": 1.2013, "step": 386368 }, { "epoch": 29.000525328330205, "grad_norm": 0.6377638578414917, "learning_rate": 0.002828, "loss": 1.1941, "step": 386432 }, { "epoch": 29.00532833020638, "grad_norm": 0.6582196950912476, "learning_rate": 0.002828, "loss": 1.1697, "step": 386496 }, { "epoch": 29.01013133208255, "grad_norm": 0.6100175380706787, "learning_rate": 0.002828, "loss": 1.1675, "step": 386560 }, { "epoch": 29.014934333958724, "grad_norm": 0.5015468001365662, "learning_rate": 0.002828, "loss": 1.1648, "step": 386624 }, { "epoch": 29.019737335834897, "grad_norm": 0.6561148166656494, "learning_rate": 0.002828, "loss": 1.1636, "step": 386688 }, { "epoch": 29.02454033771107, "grad_norm": 0.6683859825134277, "learning_rate": 0.002828, "loss": 1.1636, "step": 386752 }, { "epoch": 29.029343339587243, "grad_norm": 0.5666760206222534, "learning_rate": 0.002828, "loss": 1.1625, "step": 386816 }, { "epoch": 29.034146341463416, "grad_norm": 0.5546724796295166, "learning_rate": 0.002828, "loss": 1.1639, "step": 386880 }, { "epoch": 29.03894934333959, "grad_norm": 0.603721022605896, "learning_rate": 0.002828, "loss": 1.1633, "step": 386944 }, { "epoch": 29.04375234521576, "grad_norm": 0.6631825566291809, "learning_rate": 0.002828, "loss": 1.1628, "step": 387008 }, { "epoch": 29.048555347091934, "grad_norm": 0.575922429561615, "learning_rate": 0.002828, "loss": 1.1694, "step": 387072 }, { "epoch": 29.053358348968104, "grad_norm": 0.562066912651062, "learning_rate": 0.002828, "loss": 1.173, "step": 387136 }, { "epoch": 29.058161350844276, "grad_norm": 0.6301705837249756, "learning_rate": 0.002828, "loss": 1.1676, "step": 387200 }, { "epoch": 29.06296435272045, "grad_norm": 0.6013914942741394, "learning_rate": 0.002828, "loss": 1.1736, "step": 387264 }, { "epoch": 29.067767354596622, "grad_norm": 0.6656597256660461, "learning_rate": 0.002828, "loss": 1.1721, "step": 387328 }, { "epoch": 29.072570356472795, "grad_norm": 0.5685966610908508, "learning_rate": 0.002828, "loss": 1.1694, "step": 387392 }, { "epoch": 29.077373358348968, "grad_norm": 0.5013291835784912, "learning_rate": 0.002828, "loss": 1.1677, "step": 387456 }, { "epoch": 29.08217636022514, "grad_norm": 0.5750505924224854, "learning_rate": 0.002828, "loss": 1.1678, "step": 387520 }, { "epoch": 29.086979362101314, "grad_norm": 0.560761570930481, "learning_rate": 0.002828, "loss": 1.1659, "step": 387584 }, { "epoch": 29.091782363977487, "grad_norm": 0.5667386651039124, "learning_rate": 0.002828, "loss": 1.1646, "step": 387648 }, { "epoch": 29.09658536585366, "grad_norm": 0.5522923469543457, "learning_rate": 0.002828, "loss": 1.1777, "step": 387712 }, { "epoch": 29.101388367729832, "grad_norm": 0.599494457244873, "learning_rate": 0.002828, "loss": 1.1697, "step": 387776 }, { "epoch": 29.106191369606005, "grad_norm": 0.593332052230835, "learning_rate": 0.002828, "loss": 1.1694, "step": 387840 }, { "epoch": 29.110994371482175, "grad_norm": 0.603303849697113, "learning_rate": 0.002828, "loss": 1.1776, "step": 387904 }, { "epoch": 29.115797373358347, "grad_norm": 0.5618759989738464, "learning_rate": 0.002828, "loss": 1.1654, "step": 387968 }, { "epoch": 29.12060037523452, "grad_norm": 0.6696698665618896, "learning_rate": 0.002828, "loss": 1.1697, "step": 388032 }, { "epoch": 29.125403377110693, "grad_norm": 0.5471336841583252, "learning_rate": 0.002828, "loss": 1.1711, "step": 388096 }, { "epoch": 29.130206378986866, "grad_norm": 0.6245209574699402, "learning_rate": 0.002828, "loss": 1.1734, "step": 388160 }, { "epoch": 29.13500938086304, "grad_norm": 0.5719093084335327, "learning_rate": 0.002828, "loss": 1.1667, "step": 388224 }, { "epoch": 29.139812382739212, "grad_norm": 0.6765281558036804, "learning_rate": 0.002828, "loss": 1.1736, "step": 388288 }, { "epoch": 29.144615384615385, "grad_norm": 0.642324686050415, "learning_rate": 0.002828, "loss": 1.1729, "step": 388352 }, { "epoch": 29.149418386491558, "grad_norm": 0.5967279076576233, "learning_rate": 0.002828, "loss": 1.171, "step": 388416 }, { "epoch": 29.15422138836773, "grad_norm": 0.5580810904502869, "learning_rate": 0.002828, "loss": 1.1735, "step": 388480 }, { "epoch": 29.159024390243903, "grad_norm": 0.5501552224159241, "learning_rate": 0.002828, "loss": 1.1742, "step": 388544 }, { "epoch": 29.163827392120076, "grad_norm": 0.4875175356864929, "learning_rate": 0.002828, "loss": 1.171, "step": 388608 }, { "epoch": 29.16863039399625, "grad_norm": 0.4810082018375397, "learning_rate": 0.002828, "loss": 1.1753, "step": 388672 }, { "epoch": 29.17343339587242, "grad_norm": 0.6213483214378357, "learning_rate": 0.002828, "loss": 1.1722, "step": 388736 }, { "epoch": 29.17823639774859, "grad_norm": 0.6881386637687683, "learning_rate": 0.002828, "loss": 1.1712, "step": 388800 }, { "epoch": 29.183039399624764, "grad_norm": 0.5882981419563293, "learning_rate": 0.002828, "loss": 1.1747, "step": 388864 }, { "epoch": 29.187842401500937, "grad_norm": 0.8282897472381592, "learning_rate": 0.002828, "loss": 1.1726, "step": 388928 }, { "epoch": 29.19264540337711, "grad_norm": 0.6687741875648499, "learning_rate": 0.002828, "loss": 1.1707, "step": 388992 }, { "epoch": 29.197448405253283, "grad_norm": 0.6514523029327393, "learning_rate": 0.002828, "loss": 1.1734, "step": 389056 }, { "epoch": 29.202251407129456, "grad_norm": 0.7071826457977295, "learning_rate": 0.002828, "loss": 1.1726, "step": 389120 }, { "epoch": 29.20705440900563, "grad_norm": 0.539300262928009, "learning_rate": 0.002828, "loss": 1.1787, "step": 389184 }, { "epoch": 29.2118574108818, "grad_norm": 0.5657750368118286, "learning_rate": 0.002828, "loss": 1.1711, "step": 389248 }, { "epoch": 29.216660412757975, "grad_norm": 0.6181819438934326, "learning_rate": 0.002828, "loss": 1.1764, "step": 389312 }, { "epoch": 29.221463414634147, "grad_norm": 0.529534101486206, "learning_rate": 0.002828, "loss": 1.1675, "step": 389376 }, { "epoch": 29.22626641651032, "grad_norm": 0.5814141631126404, "learning_rate": 0.002828, "loss": 1.1776, "step": 389440 }, { "epoch": 29.231069418386493, "grad_norm": 0.5800368189811707, "learning_rate": 0.002828, "loss": 1.1763, "step": 389504 }, { "epoch": 29.235872420262663, "grad_norm": 0.591761589050293, "learning_rate": 0.002828, "loss": 1.1738, "step": 389568 }, { "epoch": 29.240675422138835, "grad_norm": 0.5492026805877686, "learning_rate": 0.002828, "loss": 1.1831, "step": 389632 }, { "epoch": 29.24547842401501, "grad_norm": 0.5412047505378723, "learning_rate": 0.002828, "loss": 1.1769, "step": 389696 }, { "epoch": 29.25028142589118, "grad_norm": 0.6620146036148071, "learning_rate": 0.002828, "loss": 1.1749, "step": 389760 }, { "epoch": 29.255084427767354, "grad_norm": 0.5703447461128235, "learning_rate": 0.002828, "loss": 1.1765, "step": 389824 }, { "epoch": 29.259887429643527, "grad_norm": 0.6181512475013733, "learning_rate": 0.002828, "loss": 1.1729, "step": 389888 }, { "epoch": 29.2646904315197, "grad_norm": 0.6570577621459961, "learning_rate": 0.002828, "loss": 1.1766, "step": 389952 }, { "epoch": 29.269493433395873, "grad_norm": 0.58260577917099, "learning_rate": 0.002828, "loss": 1.1764, "step": 390016 }, { "epoch": 29.274296435272046, "grad_norm": 0.5680594444274902, "learning_rate": 0.002828, "loss": 1.1803, "step": 390080 }, { "epoch": 29.27909943714822, "grad_norm": 0.5765116214752197, "learning_rate": 0.002828, "loss": 1.1812, "step": 390144 }, { "epoch": 29.28390243902439, "grad_norm": 0.6817169189453125, "learning_rate": 0.002828, "loss": 1.1792, "step": 390208 }, { "epoch": 29.288705440900564, "grad_norm": 0.5913732647895813, "learning_rate": 0.002828, "loss": 1.1774, "step": 390272 }, { "epoch": 29.293508442776737, "grad_norm": 0.5463665127754211, "learning_rate": 0.002828, "loss": 1.1787, "step": 390336 }, { "epoch": 29.298311444652906, "grad_norm": 0.5897245407104492, "learning_rate": 0.002828, "loss": 1.1752, "step": 390400 }, { "epoch": 29.30311444652908, "grad_norm": 0.5375869870185852, "learning_rate": 0.002828, "loss": 1.1772, "step": 390464 }, { "epoch": 29.307917448405252, "grad_norm": 0.5869905948638916, "learning_rate": 0.002828, "loss": 1.1808, "step": 390528 }, { "epoch": 29.312720450281425, "grad_norm": 0.6224765777587891, "learning_rate": 0.002828, "loss": 1.1729, "step": 390592 }, { "epoch": 29.317523452157598, "grad_norm": 0.5576164126396179, "learning_rate": 0.002828, "loss": 1.1754, "step": 390656 }, { "epoch": 29.32232645403377, "grad_norm": 0.6380188465118408, "learning_rate": 0.002828, "loss": 1.1786, "step": 390720 }, { "epoch": 29.327129455909944, "grad_norm": 0.5919608473777771, "learning_rate": 0.002828, "loss": 1.1842, "step": 390784 }, { "epoch": 29.331932457786117, "grad_norm": 0.5888688564300537, "learning_rate": 0.002828, "loss": 1.181, "step": 390848 }, { "epoch": 29.33673545966229, "grad_norm": 0.5569725632667542, "learning_rate": 0.002828, "loss": 1.1782, "step": 390912 }, { "epoch": 29.341538461538462, "grad_norm": 0.5338151454925537, "learning_rate": 0.002828, "loss": 1.1805, "step": 390976 }, { "epoch": 29.346341463414635, "grad_norm": 0.5496394038200378, "learning_rate": 0.002828, "loss": 1.1758, "step": 391040 }, { "epoch": 29.35114446529081, "grad_norm": 0.646307110786438, "learning_rate": 0.002828, "loss": 1.1824, "step": 391104 }, { "epoch": 29.355947467166978, "grad_norm": 0.5882683992385864, "learning_rate": 0.002828, "loss": 1.178, "step": 391168 }, { "epoch": 29.36075046904315, "grad_norm": 0.5194476842880249, "learning_rate": 0.002828, "loss": 1.1809, "step": 391232 }, { "epoch": 29.365553470919323, "grad_norm": 0.7695441842079163, "learning_rate": 0.002828, "loss": 1.1834, "step": 391296 }, { "epoch": 29.370356472795496, "grad_norm": 0.5390833020210266, "learning_rate": 0.002828, "loss": 1.1781, "step": 391360 }, { "epoch": 29.37515947467167, "grad_norm": 0.5380575656890869, "learning_rate": 0.002828, "loss": 1.1775, "step": 391424 }, { "epoch": 29.379962476547842, "grad_norm": 0.5972825884819031, "learning_rate": 0.002828, "loss": 1.1859, "step": 391488 }, { "epoch": 29.384765478424015, "grad_norm": 0.5862144827842712, "learning_rate": 0.002828, "loss": 1.1772, "step": 391552 }, { "epoch": 29.389568480300188, "grad_norm": 0.5644845962524414, "learning_rate": 0.002828, "loss": 1.1845, "step": 391616 }, { "epoch": 29.39437148217636, "grad_norm": 0.6240797638893127, "learning_rate": 0.002828, "loss": 1.1843, "step": 391680 }, { "epoch": 29.399174484052534, "grad_norm": 0.5433744192123413, "learning_rate": 0.002828, "loss": 1.1805, "step": 391744 }, { "epoch": 29.403977485928706, "grad_norm": 0.6091045141220093, "learning_rate": 0.002828, "loss": 1.1845, "step": 391808 }, { "epoch": 29.40878048780488, "grad_norm": 0.6940107941627502, "learning_rate": 0.002828, "loss": 1.187, "step": 391872 }, { "epoch": 29.413583489681052, "grad_norm": 0.6596461534500122, "learning_rate": 0.002828, "loss": 1.1863, "step": 391936 }, { "epoch": 29.41838649155722, "grad_norm": 0.6700625419616699, "learning_rate": 0.002828, "loss": 1.1876, "step": 392000 }, { "epoch": 29.423189493433394, "grad_norm": 0.5847828388214111, "learning_rate": 0.002828, "loss": 1.1808, "step": 392064 }, { "epoch": 29.427992495309567, "grad_norm": 0.6482068300247192, "learning_rate": 0.002828, "loss": 1.1774, "step": 392128 }, { "epoch": 29.43279549718574, "grad_norm": 0.6086841821670532, "learning_rate": 0.002828, "loss": 1.1828, "step": 392192 }, { "epoch": 29.437598499061913, "grad_norm": 0.606248140335083, "learning_rate": 0.002828, "loss": 1.1879, "step": 392256 }, { "epoch": 29.442401500938086, "grad_norm": 0.62447589635849, "learning_rate": 0.002828, "loss": 1.18, "step": 392320 }, { "epoch": 29.44720450281426, "grad_norm": 0.6819410920143127, "learning_rate": 0.002828, "loss": 1.1808, "step": 392384 }, { "epoch": 29.45200750469043, "grad_norm": 0.6249415278434753, "learning_rate": 0.002828, "loss": 1.1813, "step": 392448 }, { "epoch": 29.456810506566605, "grad_norm": 0.5132665038108826, "learning_rate": 0.002828, "loss": 1.1814, "step": 392512 }, { "epoch": 29.461613508442777, "grad_norm": 0.6131729483604431, "learning_rate": 0.002828, "loss": 1.1816, "step": 392576 }, { "epoch": 29.46641651031895, "grad_norm": 0.8811164498329163, "learning_rate": 0.002828, "loss": 1.1885, "step": 392640 }, { "epoch": 29.471219512195123, "grad_norm": 0.6464258432388306, "learning_rate": 0.002828, "loss": 1.1874, "step": 392704 }, { "epoch": 29.476022514071296, "grad_norm": 0.49052509665489197, "learning_rate": 0.002828, "loss": 1.1847, "step": 392768 }, { "epoch": 29.480825515947465, "grad_norm": 0.6686356067657471, "learning_rate": 0.002828, "loss": 1.1796, "step": 392832 }, { "epoch": 29.48562851782364, "grad_norm": 0.6112715005874634, "learning_rate": 0.002828, "loss": 1.1853, "step": 392896 }, { "epoch": 29.49043151969981, "grad_norm": 0.6032724976539612, "learning_rate": 0.002828, "loss": 1.1844, "step": 392960 }, { "epoch": 29.495234521575984, "grad_norm": 0.5601452589035034, "learning_rate": 0.002828, "loss": 1.1815, "step": 393024 }, { "epoch": 29.500037523452157, "grad_norm": 0.6651715636253357, "learning_rate": 0.002828, "loss": 1.1806, "step": 393088 }, { "epoch": 29.50484052532833, "grad_norm": 0.6076048016548157, "learning_rate": 0.002828, "loss": 1.1802, "step": 393152 }, { "epoch": 29.509643527204503, "grad_norm": 0.5798808336257935, "learning_rate": 0.002828, "loss": 1.1856, "step": 393216 }, { "epoch": 29.514446529080676, "grad_norm": 0.5465596318244934, "learning_rate": 0.002828, "loss": 1.1876, "step": 393280 }, { "epoch": 29.51924953095685, "grad_norm": 0.5441251397132874, "learning_rate": 0.002828, "loss": 1.1848, "step": 393344 }, { "epoch": 29.52405253283302, "grad_norm": 0.5961697101593018, "learning_rate": 0.002828, "loss": 1.182, "step": 393408 }, { "epoch": 29.528855534709194, "grad_norm": 0.7228647470474243, "learning_rate": 0.002828, "loss": 1.1837, "step": 393472 }, { "epoch": 29.533658536585367, "grad_norm": 0.633174479007721, "learning_rate": 0.002828, "loss": 1.1857, "step": 393536 }, { "epoch": 29.53846153846154, "grad_norm": 0.5586833357810974, "learning_rate": 0.002828, "loss": 1.1828, "step": 393600 }, { "epoch": 29.54326454033771, "grad_norm": 0.5021085143089294, "learning_rate": 0.002828, "loss": 1.1836, "step": 393664 }, { "epoch": 29.548067542213882, "grad_norm": 0.6552308201789856, "learning_rate": 0.002828, "loss": 1.1857, "step": 393728 }, { "epoch": 29.552870544090055, "grad_norm": 0.5525025725364685, "learning_rate": 0.002828, "loss": 1.1926, "step": 393792 }, { "epoch": 29.557673545966228, "grad_norm": 0.7086669206619263, "learning_rate": 0.002828, "loss": 1.1865, "step": 393856 }, { "epoch": 29.5624765478424, "grad_norm": 0.47913986444473267, "learning_rate": 0.002828, "loss": 1.1828, "step": 393920 }, { "epoch": 29.567279549718574, "grad_norm": 0.636206865310669, "learning_rate": 0.002828, "loss": 1.1842, "step": 393984 }, { "epoch": 29.572082551594747, "grad_norm": 0.5519427061080933, "learning_rate": 0.002828, "loss": 1.1885, "step": 394048 }, { "epoch": 29.57688555347092, "grad_norm": 0.4739179015159607, "learning_rate": 0.002828, "loss": 1.1836, "step": 394112 }, { "epoch": 29.581688555347093, "grad_norm": 0.5761741399765015, "learning_rate": 0.002828, "loss": 1.1823, "step": 394176 }, { "epoch": 29.586491557223265, "grad_norm": 0.6126164793968201, "learning_rate": 0.002828, "loss": 1.1857, "step": 394240 }, { "epoch": 29.59129455909944, "grad_norm": 0.491203635931015, "learning_rate": 0.002828, "loss": 1.1867, "step": 394304 }, { "epoch": 29.59609756097561, "grad_norm": 0.5796656608581543, "learning_rate": 0.002828, "loss": 1.1893, "step": 394368 }, { "epoch": 29.600900562851784, "grad_norm": 0.5100173950195312, "learning_rate": 0.002828, "loss": 1.1805, "step": 394432 }, { "epoch": 29.605703564727953, "grad_norm": 0.659576952457428, "learning_rate": 0.002828, "loss": 1.1874, "step": 394496 }, { "epoch": 29.610506566604126, "grad_norm": 0.5324804186820984, "learning_rate": 0.002828, "loss": 1.1889, "step": 394560 }, { "epoch": 29.6153095684803, "grad_norm": 0.5801253914833069, "learning_rate": 0.002828, "loss": 1.1865, "step": 394624 }, { "epoch": 29.620112570356472, "grad_norm": 0.5561210513114929, "learning_rate": 0.002828, "loss": 1.1859, "step": 394688 }, { "epoch": 29.624915572232645, "grad_norm": 0.49353739619255066, "learning_rate": 0.002828, "loss": 1.1835, "step": 394752 }, { "epoch": 29.629718574108818, "grad_norm": 0.648216962814331, "learning_rate": 0.002828, "loss": 1.1848, "step": 394816 }, { "epoch": 29.63452157598499, "grad_norm": 0.5576372146606445, "learning_rate": 0.002828, "loss": 1.1854, "step": 394880 }, { "epoch": 29.639324577861164, "grad_norm": 0.5799958109855652, "learning_rate": 0.002828, "loss": 1.184, "step": 394944 }, { "epoch": 29.644127579737336, "grad_norm": 0.5746650099754333, "learning_rate": 0.002828, "loss": 1.1855, "step": 395008 }, { "epoch": 29.64893058161351, "grad_norm": 0.6273934245109558, "learning_rate": 0.002828, "loss": 1.1866, "step": 395072 }, { "epoch": 29.653733583489682, "grad_norm": 0.5673152804374695, "learning_rate": 0.002828, "loss": 1.189, "step": 395136 }, { "epoch": 29.658536585365855, "grad_norm": 0.48399415612220764, "learning_rate": 0.002828, "loss": 1.1844, "step": 395200 }, { "epoch": 29.663339587242028, "grad_norm": 0.5884276032447815, "learning_rate": 0.002828, "loss": 1.19, "step": 395264 }, { "epoch": 29.668142589118197, "grad_norm": 0.5149835348129272, "learning_rate": 0.002828, "loss": 1.1916, "step": 395328 }, { "epoch": 29.67294559099437, "grad_norm": 0.5658831596374512, "learning_rate": 0.002828, "loss": 1.1876, "step": 395392 }, { "epoch": 29.677748592870543, "grad_norm": 0.5463730692863464, "learning_rate": 0.002828, "loss": 1.1936, "step": 395456 }, { "epoch": 29.682551594746716, "grad_norm": 0.8649245500564575, "learning_rate": 0.002828, "loss": 1.1888, "step": 395520 }, { "epoch": 29.68735459662289, "grad_norm": 0.6761359572410583, "learning_rate": 0.002828, "loss": 1.1836, "step": 395584 }, { "epoch": 29.692157598499062, "grad_norm": 0.5373187065124512, "learning_rate": 0.002828, "loss": 1.189, "step": 395648 }, { "epoch": 29.696960600375235, "grad_norm": 0.5609176158905029, "learning_rate": 0.002828, "loss": 1.1842, "step": 395712 }, { "epoch": 29.701763602251408, "grad_norm": 0.6463390588760376, "learning_rate": 0.002828, "loss": 1.1854, "step": 395776 }, { "epoch": 29.70656660412758, "grad_norm": 0.6921478509902954, "learning_rate": 0.002828, "loss": 1.1868, "step": 395840 }, { "epoch": 29.711369606003753, "grad_norm": 0.531934916973114, "learning_rate": 0.002828, "loss": 1.1914, "step": 395904 }, { "epoch": 29.716172607879926, "grad_norm": 0.6882066130638123, "learning_rate": 0.002828, "loss": 1.1874, "step": 395968 }, { "epoch": 29.7209756097561, "grad_norm": 0.5847033858299255, "learning_rate": 0.002828, "loss": 1.1921, "step": 396032 }, { "epoch": 29.725778611632272, "grad_norm": 0.557515025138855, "learning_rate": 0.002828, "loss": 1.1892, "step": 396096 }, { "epoch": 29.73058161350844, "grad_norm": 0.599223792552948, "learning_rate": 0.002828, "loss": 1.1873, "step": 396160 }, { "epoch": 29.735384615384614, "grad_norm": 0.6043716073036194, "learning_rate": 0.002828, "loss": 1.187, "step": 396224 }, { "epoch": 29.740187617260787, "grad_norm": 0.66204833984375, "learning_rate": 0.002828, "loss": 1.1891, "step": 396288 }, { "epoch": 29.74499061913696, "grad_norm": 0.5589165091514587, "learning_rate": 0.002828, "loss": 1.1914, "step": 396352 }, { "epoch": 29.749793621013133, "grad_norm": 0.46428748965263367, "learning_rate": 0.002828, "loss": 1.1847, "step": 396416 }, { "epoch": 29.754596622889306, "grad_norm": 0.5608261823654175, "learning_rate": 0.002828, "loss": 1.1884, "step": 396480 }, { "epoch": 29.75939962476548, "grad_norm": 0.694983184337616, "learning_rate": 0.002828, "loss": 1.1918, "step": 396544 }, { "epoch": 29.76420262664165, "grad_norm": 0.5633040070533752, "learning_rate": 0.002828, "loss": 1.1923, "step": 396608 }, { "epoch": 29.769005628517824, "grad_norm": 0.49123290181159973, "learning_rate": 0.002828, "loss": 1.1911, "step": 396672 }, { "epoch": 29.773808630393997, "grad_norm": 0.7319791316986084, "learning_rate": 0.002828, "loss": 1.1924, "step": 396736 }, { "epoch": 29.77861163227017, "grad_norm": 0.5839659571647644, "learning_rate": 0.002828, "loss": 1.1833, "step": 396800 }, { "epoch": 29.783414634146343, "grad_norm": 0.5470877885818481, "learning_rate": 0.002828, "loss": 1.1867, "step": 396864 }, { "epoch": 29.788217636022512, "grad_norm": 0.7589606642723083, "learning_rate": 0.002828, "loss": 1.1882, "step": 396928 }, { "epoch": 29.793020637898685, "grad_norm": 0.6475275158882141, "learning_rate": 0.002828, "loss": 1.1856, "step": 396992 }, { "epoch": 29.797823639774858, "grad_norm": 0.5769539475440979, "learning_rate": 0.002828, "loss": 1.1959, "step": 397056 }, { "epoch": 29.80262664165103, "grad_norm": 0.6455031633377075, "learning_rate": 0.002828, "loss": 1.1894, "step": 397120 }, { "epoch": 29.807429643527204, "grad_norm": 0.5357875227928162, "learning_rate": 0.002828, "loss": 1.1878, "step": 397184 }, { "epoch": 29.812232645403377, "grad_norm": 0.5677160620689392, "learning_rate": 0.002828, "loss": 1.1872, "step": 397248 }, { "epoch": 29.81703564727955, "grad_norm": 0.6778916716575623, "learning_rate": 0.002828, "loss": 1.1899, "step": 397312 }, { "epoch": 29.821838649155723, "grad_norm": 0.4837956130504608, "learning_rate": 0.002828, "loss": 1.1831, "step": 397376 }, { "epoch": 29.826641651031895, "grad_norm": 0.536710262298584, "learning_rate": 0.002828, "loss": 1.1884, "step": 397440 }, { "epoch": 29.83144465290807, "grad_norm": 0.5414986610412598, "learning_rate": 0.002828, "loss": 1.1848, "step": 397504 }, { "epoch": 29.83624765478424, "grad_norm": 0.5258774757385254, "learning_rate": 0.002828, "loss": 1.1907, "step": 397568 }, { "epoch": 29.841050656660414, "grad_norm": 0.5413457751274109, "learning_rate": 0.002828, "loss": 1.1883, "step": 397632 }, { "epoch": 29.845853658536587, "grad_norm": 0.5534824728965759, "learning_rate": 0.002828, "loss": 1.1866, "step": 397696 }, { "epoch": 29.850656660412756, "grad_norm": 0.5617156624794006, "learning_rate": 0.002828, "loss": 1.1848, "step": 397760 }, { "epoch": 29.85545966228893, "grad_norm": 0.6044380068778992, "learning_rate": 0.002828, "loss": 1.1914, "step": 397824 }, { "epoch": 29.860262664165102, "grad_norm": 0.6871965527534485, "learning_rate": 0.002828, "loss": 1.1887, "step": 397888 }, { "epoch": 29.865065666041275, "grad_norm": 0.6516048312187195, "learning_rate": 0.002828, "loss": 1.1877, "step": 397952 }, { "epoch": 29.869868667917448, "grad_norm": 0.5782538652420044, "learning_rate": 0.002828, "loss": 1.191, "step": 398016 }, { "epoch": 29.87467166979362, "grad_norm": 0.6860178709030151, "learning_rate": 0.002828, "loss": 1.1884, "step": 398080 }, { "epoch": 29.879474671669794, "grad_norm": 0.5459174513816833, "learning_rate": 0.002828, "loss": 1.1922, "step": 398144 }, { "epoch": 29.884277673545967, "grad_norm": 0.6087077260017395, "learning_rate": 0.002828, "loss": 1.1944, "step": 398208 }, { "epoch": 29.88908067542214, "grad_norm": 0.5355796813964844, "learning_rate": 0.002828, "loss": 1.1925, "step": 398272 }, { "epoch": 29.893883677298312, "grad_norm": 0.6955886483192444, "learning_rate": 0.002828, "loss": 1.1919, "step": 398336 }, { "epoch": 29.898686679174485, "grad_norm": 0.5552802085876465, "learning_rate": 0.002828, "loss": 1.1857, "step": 398400 }, { "epoch": 29.903489681050658, "grad_norm": 0.5979511141777039, "learning_rate": 0.002828, "loss": 1.1863, "step": 398464 }, { "epoch": 29.90829268292683, "grad_norm": 0.6473926305770874, "learning_rate": 0.002828, "loss": 1.1908, "step": 398528 }, { "epoch": 29.913095684803, "grad_norm": 0.5830625891685486, "learning_rate": 0.002828, "loss": 1.1893, "step": 398592 }, { "epoch": 29.917898686679173, "grad_norm": 0.6030213832855225, "learning_rate": 0.002828, "loss": 1.1897, "step": 398656 }, { "epoch": 29.922701688555346, "grad_norm": 0.6415476202964783, "learning_rate": 0.002828, "loss": 1.1884, "step": 398720 }, { "epoch": 29.92750469043152, "grad_norm": 0.5384945273399353, "learning_rate": 0.002828, "loss": 1.1921, "step": 398784 }, { "epoch": 29.932307692307692, "grad_norm": 0.5820983648300171, "learning_rate": 0.002828, "loss": 1.1942, "step": 398848 }, { "epoch": 29.937110694183865, "grad_norm": 0.545009195804596, "learning_rate": 0.002828, "loss": 1.1913, "step": 398912 }, { "epoch": 29.941913696060038, "grad_norm": 0.6134216785430908, "learning_rate": 0.002828, "loss": 1.1897, "step": 398976 }, { "epoch": 29.94671669793621, "grad_norm": 0.6903364658355713, "learning_rate": 0.002828, "loss": 1.1927, "step": 399040 }, { "epoch": 29.951519699812383, "grad_norm": 0.5660452246665955, "learning_rate": 0.002828, "loss": 1.1906, "step": 399104 }, { "epoch": 29.956322701688556, "grad_norm": 0.6166103482246399, "learning_rate": 0.002828, "loss": 1.1873, "step": 399168 }, { "epoch": 29.96112570356473, "grad_norm": 0.5798050761222839, "learning_rate": 0.002828, "loss": 1.1923, "step": 399232 }, { "epoch": 29.965928705440902, "grad_norm": 0.630953848361969, "learning_rate": 0.002828, "loss": 1.1904, "step": 399296 }, { "epoch": 29.97073170731707, "grad_norm": 0.66792893409729, "learning_rate": 0.002828, "loss": 1.1901, "step": 399360 }, { "epoch": 29.975534709193244, "grad_norm": 0.6293218731880188, "learning_rate": 0.002828, "loss": 1.195, "step": 399424 }, { "epoch": 29.980337711069417, "grad_norm": 0.5722078680992126, "learning_rate": 0.002828, "loss": 1.1934, "step": 399488 }, { "epoch": 29.98514071294559, "grad_norm": 0.6023445129394531, "learning_rate": 0.002828, "loss": 1.1904, "step": 399552 }, { "epoch": 29.989943714821763, "grad_norm": 0.6318193674087524, "learning_rate": 0.002828, "loss": 1.1968, "step": 399616 }, { "epoch": 29.994746716697936, "grad_norm": 0.6327502131462097, "learning_rate": 0.002828, "loss": 1.1902, "step": 399680 }, { "epoch": 29.99954971857411, "grad_norm": 0.5372485518455505, "learning_rate": 0.002828, "loss": 1.1938, "step": 399744 }, { "epoch": 30.00435272045028, "grad_norm": 0.5668807029724121, "learning_rate": 0.002828, "loss": 1.1554, "step": 399808 }, { "epoch": 30.009155722326454, "grad_norm": 0.6089488863945007, "learning_rate": 0.002828, "loss": 1.155, "step": 399872 }, { "epoch": 30.013958724202627, "grad_norm": 0.5145072937011719, "learning_rate": 0.002828, "loss": 1.1566, "step": 399936 }, { "epoch": 30.0187617260788, "grad_norm": 0.5616568326950073, "learning_rate": 0.002828, "loss": 1.1564, "step": 400000 }, { "epoch": 30.023564727954973, "grad_norm": 0.5550433993339539, "learning_rate": 0.002828, "loss": 1.1566, "step": 400064 }, { "epoch": 30.028367729831146, "grad_norm": 0.543992280960083, "learning_rate": 0.002828, "loss": 1.154, "step": 400128 }, { "epoch": 30.033170731707315, "grad_norm": 0.5426164269447327, "learning_rate": 0.002828, "loss": 1.1551, "step": 400192 }, { "epoch": 30.037973733583488, "grad_norm": 0.7619397044181824, "learning_rate": 0.002828, "loss": 1.1549, "step": 400256 }, { "epoch": 30.04277673545966, "grad_norm": 0.5529201030731201, "learning_rate": 0.002828, "loss": 1.1537, "step": 400320 }, { "epoch": 30.047579737335834, "grad_norm": 0.6061589121818542, "learning_rate": 0.002828, "loss": 1.1577, "step": 400384 }, { "epoch": 30.052382739212007, "grad_norm": 0.8556471467018127, "learning_rate": 0.002828, "loss": 1.1606, "step": 400448 }, { "epoch": 30.05718574108818, "grad_norm": 0.5779027342796326, "learning_rate": 0.002828, "loss": 1.1617, "step": 400512 }, { "epoch": 30.061988742964353, "grad_norm": 0.6338462829589844, "learning_rate": 0.002828, "loss": 1.1603, "step": 400576 }, { "epoch": 30.066791744840526, "grad_norm": 0.5658112168312073, "learning_rate": 0.002828, "loss": 1.1573, "step": 400640 }, { "epoch": 30.0715947467167, "grad_norm": 0.6836487054824829, "learning_rate": 0.002828, "loss": 1.1625, "step": 400704 }, { "epoch": 30.07639774859287, "grad_norm": 0.5295142531394958, "learning_rate": 0.002828, "loss": 1.1641, "step": 400768 }, { "epoch": 30.081200750469044, "grad_norm": 0.8519558906555176, "learning_rate": 0.002828, "loss": 1.1587, "step": 400832 }, { "epoch": 30.086003752345217, "grad_norm": 0.5779620409011841, "learning_rate": 0.002828, "loss": 1.1607, "step": 400896 }, { "epoch": 30.09080675422139, "grad_norm": 0.628088653087616, "learning_rate": 0.002828, "loss": 1.1626, "step": 400960 }, { "epoch": 30.09560975609756, "grad_norm": 0.69316166639328, "learning_rate": 0.002828, "loss": 1.1602, "step": 401024 }, { "epoch": 30.100412757973732, "grad_norm": 0.5918229222297668, "learning_rate": 0.002828, "loss": 1.1635, "step": 401088 }, { "epoch": 30.105215759849905, "grad_norm": 0.6181484460830688, "learning_rate": 0.002828, "loss": 1.1621, "step": 401152 }, { "epoch": 30.110018761726078, "grad_norm": 0.6306777000427246, "learning_rate": 0.002828, "loss": 1.1561, "step": 401216 }, { "epoch": 30.11482176360225, "grad_norm": 0.6380670666694641, "learning_rate": 0.002828, "loss": 1.1625, "step": 401280 }, { "epoch": 30.119624765478424, "grad_norm": 0.5497437119483948, "learning_rate": 0.002828, "loss": 1.1793, "step": 401344 }, { "epoch": 30.124427767354597, "grad_norm": 0.6903517246246338, "learning_rate": 0.002828, "loss": 1.166, "step": 401408 }, { "epoch": 30.12923076923077, "grad_norm": 0.610875129699707, "learning_rate": 0.002828, "loss": 1.163, "step": 401472 }, { "epoch": 30.134033771106942, "grad_norm": 0.5418033599853516, "learning_rate": 0.002828, "loss": 1.1626, "step": 401536 }, { "epoch": 30.138836772983115, "grad_norm": 0.7114241719245911, "learning_rate": 0.002828, "loss": 1.1668, "step": 401600 }, { "epoch": 30.143639774859288, "grad_norm": 0.49383074045181274, "learning_rate": 0.002828, "loss": 1.1636, "step": 401664 }, { "epoch": 30.14844277673546, "grad_norm": 0.6972509026527405, "learning_rate": 0.002828, "loss": 1.1642, "step": 401728 }, { "epoch": 30.153245778611634, "grad_norm": 0.5523052215576172, "learning_rate": 0.002828, "loss": 1.1636, "step": 401792 }, { "epoch": 30.158048780487803, "grad_norm": 0.5856896638870239, "learning_rate": 0.002828, "loss": 1.1632, "step": 401856 }, { "epoch": 30.162851782363976, "grad_norm": 0.6693354249000549, "learning_rate": 0.002828, "loss": 1.1665, "step": 401920 }, { "epoch": 30.16765478424015, "grad_norm": 0.5271797180175781, "learning_rate": 0.002828, "loss": 1.162, "step": 401984 }, { "epoch": 30.172457786116322, "grad_norm": 0.6253278255462646, "learning_rate": 0.002828, "loss": 1.1624, "step": 402048 }, { "epoch": 30.177260787992495, "grad_norm": 0.6121106743812561, "learning_rate": 0.002828, "loss": 1.1641, "step": 402112 }, { "epoch": 30.182063789868668, "grad_norm": 0.5015893578529358, "learning_rate": 0.002828, "loss": 1.1685, "step": 402176 }, { "epoch": 30.18686679174484, "grad_norm": 0.5743915438652039, "learning_rate": 0.002828, "loss": 1.1663, "step": 402240 }, { "epoch": 30.191669793621013, "grad_norm": 0.543053150177002, "learning_rate": 0.002828, "loss": 1.1644, "step": 402304 }, { "epoch": 30.196472795497186, "grad_norm": 0.6323408484458923, "learning_rate": 0.002828, "loss": 1.1635, "step": 402368 }, { "epoch": 30.20127579737336, "grad_norm": 0.5255658626556396, "learning_rate": 0.002828, "loss": 1.1688, "step": 402432 }, { "epoch": 30.206078799249532, "grad_norm": 0.5964052677154541, "learning_rate": 0.002828, "loss": 1.1654, "step": 402496 }, { "epoch": 30.210881801125705, "grad_norm": 0.6892465353012085, "learning_rate": 0.002828, "loss": 1.1659, "step": 402560 }, { "epoch": 30.215684803001878, "grad_norm": 0.5554350018501282, "learning_rate": 0.002828, "loss": 1.171, "step": 402624 }, { "epoch": 30.220487804878047, "grad_norm": 0.5373419523239136, "learning_rate": 0.002828, "loss": 1.1719, "step": 402688 }, { "epoch": 30.22529080675422, "grad_norm": 0.6016690731048584, "learning_rate": 0.002828, "loss": 1.1673, "step": 402752 }, { "epoch": 30.230093808630393, "grad_norm": 0.6829864978790283, "learning_rate": 0.002828, "loss": 1.1661, "step": 402816 }, { "epoch": 30.234896810506566, "grad_norm": 0.5701805353164673, "learning_rate": 0.002828, "loss": 1.1665, "step": 402880 }, { "epoch": 30.23969981238274, "grad_norm": 0.7625254988670349, "learning_rate": 0.002828, "loss": 1.1678, "step": 402944 }, { "epoch": 30.24450281425891, "grad_norm": 0.6311930418014526, "learning_rate": 0.002828, "loss": 1.1668, "step": 403008 }, { "epoch": 30.249305816135085, "grad_norm": 0.6099370121955872, "learning_rate": 0.002828, "loss": 1.1676, "step": 403072 }, { "epoch": 30.254108818011257, "grad_norm": 0.6538092494010925, "learning_rate": 0.002828, "loss": 1.1697, "step": 403136 }, { "epoch": 30.25891181988743, "grad_norm": 0.6979556679725647, "learning_rate": 0.002828, "loss": 1.1688, "step": 403200 }, { "epoch": 30.263714821763603, "grad_norm": 0.5625706911087036, "learning_rate": 0.002828, "loss": 1.169, "step": 403264 }, { "epoch": 30.268517823639776, "grad_norm": 0.7455666065216064, "learning_rate": 0.002828, "loss": 1.1688, "step": 403328 }, { "epoch": 30.27332082551595, "grad_norm": 0.5044279098510742, "learning_rate": 0.002828, "loss": 1.1712, "step": 403392 }, { "epoch": 30.278123827392122, "grad_norm": 0.6426975727081299, "learning_rate": 0.002828, "loss": 1.1709, "step": 403456 }, { "epoch": 30.28292682926829, "grad_norm": 0.6818196773529053, "learning_rate": 0.002828, "loss": 1.1684, "step": 403520 }, { "epoch": 30.287729831144464, "grad_norm": 0.5548245310783386, "learning_rate": 0.002828, "loss": 1.1676, "step": 403584 }, { "epoch": 30.292532833020637, "grad_norm": 0.5054613351821899, "learning_rate": 0.002828, "loss": 1.1725, "step": 403648 }, { "epoch": 30.29733583489681, "grad_norm": 0.6327176094055176, "learning_rate": 0.002828, "loss": 1.1651, "step": 403712 }, { "epoch": 30.302138836772983, "grad_norm": 0.6453624367713928, "learning_rate": 0.002828, "loss": 1.163, "step": 403776 }, { "epoch": 30.306941838649156, "grad_norm": 0.6579575538635254, "learning_rate": 0.002828, "loss": 1.1666, "step": 403840 }, { "epoch": 30.31174484052533, "grad_norm": 0.6196261644363403, "learning_rate": 0.002828, "loss": 1.1617, "step": 403904 }, { "epoch": 30.3165478424015, "grad_norm": 0.6769335865974426, "learning_rate": 0.002828, "loss": 1.1679, "step": 403968 }, { "epoch": 30.321350844277674, "grad_norm": 0.542854905128479, "learning_rate": 0.002828, "loss": 1.1681, "step": 404032 }, { "epoch": 30.326153846153847, "grad_norm": 0.6319664716720581, "learning_rate": 0.002828, "loss": 1.1684, "step": 404096 }, { "epoch": 30.33095684803002, "grad_norm": 0.629665732383728, "learning_rate": 0.002828, "loss": 1.174, "step": 404160 }, { "epoch": 30.335759849906193, "grad_norm": 0.6037463545799255, "learning_rate": 0.002828, "loss": 1.17, "step": 404224 }, { "epoch": 30.340562851782362, "grad_norm": 0.532941997051239, "learning_rate": 0.002828, "loss": 1.1655, "step": 404288 }, { "epoch": 30.345365853658535, "grad_norm": 0.5925180912017822, "learning_rate": 0.002828, "loss": 1.1652, "step": 404352 }, { "epoch": 30.350168855534708, "grad_norm": 0.5588472485542297, "learning_rate": 0.002828, "loss": 1.1725, "step": 404416 }, { "epoch": 30.35497185741088, "grad_norm": 0.6447106003761292, "learning_rate": 0.002828, "loss": 1.1721, "step": 404480 }, { "epoch": 30.359774859287054, "grad_norm": 0.5808579325675964, "learning_rate": 0.002828, "loss": 1.1677, "step": 404544 }, { "epoch": 30.364577861163227, "grad_norm": 0.5732340812683105, "learning_rate": 0.002828, "loss": 1.1697, "step": 404608 }, { "epoch": 30.3693808630394, "grad_norm": 0.5909567475318909, "learning_rate": 0.002828, "loss": 1.1743, "step": 404672 }, { "epoch": 30.374183864915572, "grad_norm": 0.5653650164604187, "learning_rate": 0.002828, "loss": 1.1726, "step": 404736 }, { "epoch": 30.378986866791745, "grad_norm": 0.61961430311203, "learning_rate": 0.002828, "loss": 1.175, "step": 404800 }, { "epoch": 30.383789868667918, "grad_norm": 0.6074270009994507, "learning_rate": 0.002828, "loss": 1.1674, "step": 404864 }, { "epoch": 30.38859287054409, "grad_norm": 0.5093051195144653, "learning_rate": 0.002828, "loss": 1.1762, "step": 404928 }, { "epoch": 30.393395872420264, "grad_norm": 0.6120220422744751, "learning_rate": 0.002828, "loss": 1.1694, "step": 404992 }, { "epoch": 30.398198874296437, "grad_norm": 0.6272151470184326, "learning_rate": 0.002828, "loss": 1.1674, "step": 405056 }, { "epoch": 30.403001876172606, "grad_norm": 0.5591142773628235, "learning_rate": 0.002828, "loss": 1.1673, "step": 405120 }, { "epoch": 30.40780487804878, "grad_norm": 0.6053633689880371, "learning_rate": 0.002828, "loss": 1.1729, "step": 405184 }, { "epoch": 30.412607879924952, "grad_norm": 0.5584940314292908, "learning_rate": 0.002828, "loss": 1.1755, "step": 405248 }, { "epoch": 30.417410881801125, "grad_norm": 0.5795779824256897, "learning_rate": 0.002828, "loss": 1.1747, "step": 405312 }, { "epoch": 30.422213883677298, "grad_norm": 0.700214684009552, "learning_rate": 0.002828, "loss": 1.1737, "step": 405376 }, { "epoch": 30.42701688555347, "grad_norm": 0.6408358216285706, "learning_rate": 0.002828, "loss": 1.1706, "step": 405440 }, { "epoch": 30.431819887429644, "grad_norm": 0.6774629950523376, "learning_rate": 0.002828, "loss": 1.1704, "step": 405504 }, { "epoch": 30.436622889305816, "grad_norm": 0.5555515289306641, "learning_rate": 0.002828, "loss": 1.1658, "step": 405568 }, { "epoch": 30.44142589118199, "grad_norm": 0.6035341620445251, "learning_rate": 0.002828, "loss": 1.1712, "step": 405632 }, { "epoch": 30.446228893058162, "grad_norm": 0.7900217771530151, "learning_rate": 0.002828, "loss": 1.1751, "step": 405696 }, { "epoch": 30.451031894934335, "grad_norm": 0.6393078565597534, "learning_rate": 0.002828, "loss": 1.1708, "step": 405760 }, { "epoch": 30.455834896810508, "grad_norm": 0.7335425615310669, "learning_rate": 0.002828, "loss": 1.1701, "step": 405824 }, { "epoch": 30.46063789868668, "grad_norm": 0.5384011268615723, "learning_rate": 0.002828, "loss": 1.1769, "step": 405888 }, { "epoch": 30.46544090056285, "grad_norm": 0.6363877058029175, "learning_rate": 0.002828, "loss": 1.1712, "step": 405952 }, { "epoch": 30.470243902439023, "grad_norm": 0.657049834728241, "learning_rate": 0.002828, "loss": 1.1776, "step": 406016 }, { "epoch": 30.475046904315196, "grad_norm": 0.6141687631607056, "learning_rate": 0.002828, "loss": 1.172, "step": 406080 }, { "epoch": 30.47984990619137, "grad_norm": 0.6134195327758789, "learning_rate": 0.002828, "loss": 1.1708, "step": 406144 }, { "epoch": 30.48465290806754, "grad_norm": 0.5732262134552002, "learning_rate": 0.002828, "loss": 1.1659, "step": 406208 }, { "epoch": 30.489455909943715, "grad_norm": 0.6324205994606018, "learning_rate": 0.002828, "loss": 1.1776, "step": 406272 }, { "epoch": 30.494258911819887, "grad_norm": 0.6956483125686646, "learning_rate": 0.002828, "loss": 1.1718, "step": 406336 }, { "epoch": 30.49906191369606, "grad_norm": 0.5355058908462524, "learning_rate": 0.002828, "loss": 1.1809, "step": 406400 }, { "epoch": 30.503864915572233, "grad_norm": 0.6604107022285461, "learning_rate": 0.002828, "loss": 1.1728, "step": 406464 }, { "epoch": 30.508667917448406, "grad_norm": 0.6063787937164307, "learning_rate": 0.002828, "loss": 1.1769, "step": 406528 }, { "epoch": 30.51347091932458, "grad_norm": 0.6198680996894836, "learning_rate": 0.002828, "loss": 1.1718, "step": 406592 }, { "epoch": 30.518273921200752, "grad_norm": 0.5494663715362549, "learning_rate": 0.002828, "loss": 1.1737, "step": 406656 }, { "epoch": 30.523076923076925, "grad_norm": 0.5736897587776184, "learning_rate": 0.002828, "loss": 1.1763, "step": 406720 }, { "epoch": 30.527879924953094, "grad_norm": 0.541961133480072, "learning_rate": 0.002828, "loss": 1.1742, "step": 406784 }, { "epoch": 30.532682926829267, "grad_norm": 0.6026626229286194, "learning_rate": 0.002828, "loss": 1.1762, "step": 406848 }, { "epoch": 30.53748592870544, "grad_norm": 0.5529982447624207, "learning_rate": 0.002828, "loss": 1.1736, "step": 406912 }, { "epoch": 30.542288930581613, "grad_norm": 0.7588163018226624, "learning_rate": 0.002828, "loss": 1.1778, "step": 406976 }, { "epoch": 30.547091932457786, "grad_norm": 0.5233981013298035, "learning_rate": 0.002828, "loss": 1.1703, "step": 407040 }, { "epoch": 30.55189493433396, "grad_norm": 0.5268771052360535, "learning_rate": 0.002828, "loss": 1.1731, "step": 407104 }, { "epoch": 30.55669793621013, "grad_norm": 0.6000686883926392, "learning_rate": 0.002828, "loss": 1.1786, "step": 407168 }, { "epoch": 30.561500938086304, "grad_norm": 0.6501026749610901, "learning_rate": 0.002828, "loss": 1.1776, "step": 407232 }, { "epoch": 30.566303939962477, "grad_norm": 0.5419455766677856, "learning_rate": 0.002828, "loss": 1.1815, "step": 407296 }, { "epoch": 30.57110694183865, "grad_norm": 0.526297390460968, "learning_rate": 0.002828, "loss": 1.1766, "step": 407360 }, { "epoch": 30.575909943714823, "grad_norm": 0.5357848405838013, "learning_rate": 0.002828, "loss": 1.1807, "step": 407424 }, { "epoch": 30.580712945590996, "grad_norm": 0.6130989789962769, "learning_rate": 0.002828, "loss": 1.1686, "step": 407488 }, { "epoch": 30.585515947467165, "grad_norm": 0.5149111747741699, "learning_rate": 0.002828, "loss": 1.167, "step": 407552 }, { "epoch": 30.590318949343338, "grad_norm": 0.7189837098121643, "learning_rate": 0.002828, "loss": 1.1808, "step": 407616 }, { "epoch": 30.59512195121951, "grad_norm": 0.5723125338554382, "learning_rate": 0.002828, "loss": 1.1721, "step": 407680 }, { "epoch": 30.599924953095684, "grad_norm": 0.6711981892585754, "learning_rate": 0.002828, "loss": 1.1741, "step": 407744 }, { "epoch": 30.604727954971857, "grad_norm": 0.5369583964347839, "learning_rate": 0.002828, "loss": 1.1755, "step": 407808 }, { "epoch": 30.60953095684803, "grad_norm": 0.5850250124931335, "learning_rate": 0.002828, "loss": 1.1781, "step": 407872 }, { "epoch": 30.614333958724202, "grad_norm": 0.601307213306427, "learning_rate": 0.002828, "loss": 1.1744, "step": 407936 }, { "epoch": 30.619136960600375, "grad_norm": 0.7238708734512329, "learning_rate": 0.002828, "loss": 1.1754, "step": 408000 }, { "epoch": 30.62393996247655, "grad_norm": 0.5604078769683838, "learning_rate": 0.002828, "loss": 1.1721, "step": 408064 }, { "epoch": 30.62874296435272, "grad_norm": 0.5621109008789062, "learning_rate": 0.002828, "loss": 1.1711, "step": 408128 }, { "epoch": 30.633545966228894, "grad_norm": 0.5630921721458435, "learning_rate": 0.002828, "loss": 1.1746, "step": 408192 }, { "epoch": 30.638348968105067, "grad_norm": 0.6420670747756958, "learning_rate": 0.002828, "loss": 1.1797, "step": 408256 }, { "epoch": 30.64315196998124, "grad_norm": 0.6493772864341736, "learning_rate": 0.002828, "loss": 1.1765, "step": 408320 }, { "epoch": 30.64795497185741, "grad_norm": 0.6907517910003662, "learning_rate": 0.002828, "loss": 1.1759, "step": 408384 }, { "epoch": 30.652757973733582, "grad_norm": 0.5132107734680176, "learning_rate": 0.002828, "loss": 1.1737, "step": 408448 }, { "epoch": 30.657560975609755, "grad_norm": 0.5009642839431763, "learning_rate": 0.002828, "loss": 1.1737, "step": 408512 }, { "epoch": 30.662363977485928, "grad_norm": 0.7794398069381714, "learning_rate": 0.002828, "loss": 1.1716, "step": 408576 }, { "epoch": 30.6671669793621, "grad_norm": 0.6804834604263306, "learning_rate": 0.002828, "loss": 1.1823, "step": 408640 }, { "epoch": 30.671969981238274, "grad_norm": 0.5983238220214844, "learning_rate": 0.002828, "loss": 1.1755, "step": 408704 }, { "epoch": 30.676772983114446, "grad_norm": 0.5429848432540894, "learning_rate": 0.002828, "loss": 1.1818, "step": 408768 }, { "epoch": 30.68157598499062, "grad_norm": 0.6377673149108887, "learning_rate": 0.002828, "loss": 1.1768, "step": 408832 }, { "epoch": 30.686378986866792, "grad_norm": 0.6683006286621094, "learning_rate": 0.002828, "loss": 1.1743, "step": 408896 }, { "epoch": 30.691181988742965, "grad_norm": 0.5785638689994812, "learning_rate": 0.002828, "loss": 1.1761, "step": 408960 }, { "epoch": 30.695984990619138, "grad_norm": 0.6333790421485901, "learning_rate": 0.002828, "loss": 1.1742, "step": 409024 }, { "epoch": 30.70078799249531, "grad_norm": 0.6007692217826843, "learning_rate": 0.002828, "loss": 1.1727, "step": 409088 }, { "epoch": 30.705590994371484, "grad_norm": 0.7093082070350647, "learning_rate": 0.002828, "loss": 1.1825, "step": 409152 }, { "epoch": 30.710393996247653, "grad_norm": 0.7057070136070251, "learning_rate": 0.002828, "loss": 1.1773, "step": 409216 }, { "epoch": 30.715196998123826, "grad_norm": 0.5125381946563721, "learning_rate": 0.002828, "loss": 1.1755, "step": 409280 }, { "epoch": 30.72, "grad_norm": 0.5635402202606201, "learning_rate": 0.002828, "loss": 1.1755, "step": 409344 }, { "epoch": 30.72480300187617, "grad_norm": 0.5870218873023987, "learning_rate": 0.002828, "loss": 1.179, "step": 409408 }, { "epoch": 30.729606003752345, "grad_norm": 0.5780309438705444, "learning_rate": 0.002828, "loss": 1.1775, "step": 409472 }, { "epoch": 30.734409005628518, "grad_norm": 0.6460204124450684, "learning_rate": 0.002828, "loss": 1.1762, "step": 409536 }, { "epoch": 30.73921200750469, "grad_norm": 0.5737305283546448, "learning_rate": 0.002828, "loss": 1.1709, "step": 409600 }, { "epoch": 30.744015009380863, "grad_norm": 0.5802620649337769, "learning_rate": 0.002828, "loss": 1.1795, "step": 409664 }, { "epoch": 30.748818011257036, "grad_norm": 0.587925910949707, "learning_rate": 0.002828, "loss": 1.1777, "step": 409728 }, { "epoch": 30.75362101313321, "grad_norm": 0.6866670846939087, "learning_rate": 0.002828, "loss": 1.1783, "step": 409792 }, { "epoch": 30.758424015009382, "grad_norm": 0.5596933364868164, "learning_rate": 0.002828, "loss": 1.1789, "step": 409856 }, { "epoch": 30.763227016885555, "grad_norm": 0.5398388504981995, "learning_rate": 0.002828, "loss": 1.177, "step": 409920 }, { "epoch": 30.768030018761728, "grad_norm": 0.5532910823822021, "learning_rate": 0.002828, "loss": 1.1841, "step": 409984 }, { "epoch": 30.772833020637897, "grad_norm": 0.6639517545700073, "learning_rate": 0.002828, "loss": 1.1726, "step": 410048 }, { "epoch": 30.77763602251407, "grad_norm": 0.5759304761886597, "learning_rate": 0.002828, "loss": 1.1795, "step": 410112 }, { "epoch": 30.782439024390243, "grad_norm": 0.5365297198295593, "learning_rate": 0.002828, "loss": 1.1781, "step": 410176 }, { "epoch": 30.787242026266416, "grad_norm": 0.6630024313926697, "learning_rate": 0.002828, "loss": 1.1813, "step": 410240 }, { "epoch": 30.79204502814259, "grad_norm": 0.6431249380111694, "learning_rate": 0.002828, "loss": 1.1851, "step": 410304 }, { "epoch": 30.79684803001876, "grad_norm": 0.6468825936317444, "learning_rate": 0.002828, "loss": 1.1761, "step": 410368 }, { "epoch": 30.801651031894934, "grad_norm": 0.572039008140564, "learning_rate": 0.002828, "loss": 1.1785, "step": 410432 }, { "epoch": 30.806454033771107, "grad_norm": 0.5533271431922913, "learning_rate": 0.002828, "loss": 1.1801, "step": 410496 }, { "epoch": 30.81125703564728, "grad_norm": 0.545091450214386, "learning_rate": 0.002828, "loss": 1.1834, "step": 410560 }, { "epoch": 30.816060037523453, "grad_norm": 0.6108741164207458, "learning_rate": 0.002828, "loss": 1.1827, "step": 410624 }, { "epoch": 30.820863039399626, "grad_norm": 0.5509781241416931, "learning_rate": 0.002828, "loss": 1.1791, "step": 410688 }, { "epoch": 30.8256660412758, "grad_norm": 0.5328975915908813, "learning_rate": 0.002828, "loss": 1.18, "step": 410752 }, { "epoch": 30.83046904315197, "grad_norm": 0.6925010085105896, "learning_rate": 0.002828, "loss": 1.179, "step": 410816 }, { "epoch": 30.83527204502814, "grad_norm": 0.5600910782814026, "learning_rate": 0.002828, "loss": 1.1772, "step": 410880 }, { "epoch": 30.840075046904314, "grad_norm": 0.5724281668663025, "learning_rate": 0.002828, "loss": 1.1798, "step": 410944 }, { "epoch": 30.844878048780487, "grad_norm": 0.5661110281944275, "learning_rate": 0.002828, "loss": 1.1794, "step": 411008 }, { "epoch": 30.84968105065666, "grad_norm": 0.6667885184288025, "learning_rate": 0.002828, "loss": 1.1812, "step": 411072 }, { "epoch": 30.854484052532833, "grad_norm": 0.5475946068763733, "learning_rate": 0.002828, "loss": 1.181, "step": 411136 }, { "epoch": 30.859287054409005, "grad_norm": 0.6985030770301819, "learning_rate": 0.002828, "loss": 1.1822, "step": 411200 }, { "epoch": 30.86409005628518, "grad_norm": 0.6968584656715393, "learning_rate": 0.002828, "loss": 1.1807, "step": 411264 }, { "epoch": 30.86889305816135, "grad_norm": 0.5080946087837219, "learning_rate": 0.002828, "loss": 1.1755, "step": 411328 }, { "epoch": 30.873696060037524, "grad_norm": 0.5723351836204529, "learning_rate": 0.002828, "loss": 1.1865, "step": 411392 }, { "epoch": 30.878499061913697, "grad_norm": 0.6020140051841736, "learning_rate": 0.002828, "loss": 1.1793, "step": 411456 }, { "epoch": 30.88330206378987, "grad_norm": 0.5678929686546326, "learning_rate": 0.002828, "loss": 1.1733, "step": 411520 }, { "epoch": 30.888105065666043, "grad_norm": 0.6971235275268555, "learning_rate": 0.002828, "loss": 1.1807, "step": 411584 }, { "epoch": 30.892908067542216, "grad_norm": 0.52821284532547, "learning_rate": 0.002828, "loss": 1.1818, "step": 411648 }, { "epoch": 30.897711069418385, "grad_norm": 0.6598101854324341, "learning_rate": 0.002828, "loss": 1.1825, "step": 411712 }, { "epoch": 30.902514071294558, "grad_norm": 0.6706920266151428, "learning_rate": 0.002828, "loss": 1.1868, "step": 411776 }, { "epoch": 30.90731707317073, "grad_norm": 0.5354040265083313, "learning_rate": 0.002828, "loss": 1.1813, "step": 411840 }, { "epoch": 30.912120075046904, "grad_norm": 0.5959027409553528, "learning_rate": 0.002828, "loss": 1.177, "step": 411904 }, { "epoch": 30.916923076923077, "grad_norm": 0.6717599034309387, "learning_rate": 0.002828, "loss": 1.1796, "step": 411968 }, { "epoch": 30.92172607879925, "grad_norm": 0.5805749297142029, "learning_rate": 0.002828, "loss": 1.1784, "step": 412032 }, { "epoch": 30.926529080675422, "grad_norm": 0.5120179057121277, "learning_rate": 0.002828, "loss": 1.1768, "step": 412096 }, { "epoch": 30.931332082551595, "grad_norm": 0.6287373304367065, "learning_rate": 0.002828, "loss": 1.1753, "step": 412160 }, { "epoch": 30.936135084427768, "grad_norm": 0.5255283117294312, "learning_rate": 0.002828, "loss": 1.1804, "step": 412224 }, { "epoch": 30.94093808630394, "grad_norm": 0.8690053224563599, "learning_rate": 0.002828, "loss": 1.187, "step": 412288 }, { "epoch": 30.945741088180114, "grad_norm": 0.5661391615867615, "learning_rate": 0.002828, "loss": 1.1774, "step": 412352 }, { "epoch": 30.950544090056287, "grad_norm": 0.6966512799263, "learning_rate": 0.002828, "loss": 1.1839, "step": 412416 }, { "epoch": 30.95534709193246, "grad_norm": 0.6669551134109497, "learning_rate": 0.002828, "loss": 1.1816, "step": 412480 }, { "epoch": 30.96015009380863, "grad_norm": 0.5155887603759766, "learning_rate": 0.002828, "loss": 1.186, "step": 412544 }, { "epoch": 30.964953095684802, "grad_norm": 0.514497697353363, "learning_rate": 0.002828, "loss": 1.1802, "step": 412608 }, { "epoch": 30.969756097560975, "grad_norm": 0.6499031782150269, "learning_rate": 0.002828, "loss": 1.1843, "step": 412672 }, { "epoch": 30.974559099437148, "grad_norm": 0.6161714792251587, "learning_rate": 0.002828, "loss": 1.1853, "step": 412736 }, { "epoch": 30.97936210131332, "grad_norm": 0.6288555860519409, "learning_rate": 0.002828, "loss": 1.1846, "step": 412800 }, { "epoch": 30.984165103189493, "grad_norm": 0.634675145149231, "learning_rate": 0.002828, "loss": 1.1807, "step": 412864 }, { "epoch": 30.988968105065666, "grad_norm": 0.5580902695655823, "learning_rate": 0.002828, "loss": 1.1842, "step": 412928 }, { "epoch": 30.99377110694184, "grad_norm": 0.5608106851577759, "learning_rate": 0.002828, "loss": 1.1795, "step": 412992 }, { "epoch": 30.998574108818012, "grad_norm": 0.6324924826622009, "learning_rate": 0.002828, "loss": 1.1744, "step": 413056 }, { "epoch": 31.003377110694185, "grad_norm": 0.7145226001739502, "learning_rate": 0.002828, "loss": 1.1578, "step": 413120 }, { "epoch": 31.008180112570358, "grad_norm": 0.6056089401245117, "learning_rate": 0.002828, "loss": 1.1462, "step": 413184 }, { "epoch": 31.01298311444653, "grad_norm": 0.7890845537185669, "learning_rate": 0.002828, "loss": 1.1467, "step": 413248 }, { "epoch": 31.0177861163227, "grad_norm": 0.6414325833320618, "learning_rate": 0.002828, "loss": 1.1444, "step": 413312 }, { "epoch": 31.022589118198873, "grad_norm": 0.6264815926551819, "learning_rate": 0.002828, "loss": 1.1489, "step": 413376 }, { "epoch": 31.027392120075046, "grad_norm": 0.5662343502044678, "learning_rate": 0.002828, "loss": 1.1404, "step": 413440 }, { "epoch": 31.03219512195122, "grad_norm": 0.5298193097114563, "learning_rate": 0.002828, "loss": 1.1442, "step": 413504 }, { "epoch": 31.03699812382739, "grad_norm": 0.5486260652542114, "learning_rate": 0.002828, "loss": 1.1468, "step": 413568 }, { "epoch": 31.041801125703564, "grad_norm": 0.6614625453948975, "learning_rate": 0.002828, "loss": 1.1462, "step": 413632 }, { "epoch": 31.046604127579737, "grad_norm": 0.7219079732894897, "learning_rate": 0.002828, "loss": 1.1469, "step": 413696 }, { "epoch": 31.05140712945591, "grad_norm": 0.7738757133483887, "learning_rate": 0.002828, "loss": 1.1488, "step": 413760 }, { "epoch": 31.056210131332083, "grad_norm": 0.7506507635116577, "learning_rate": 0.002828, "loss": 1.1512, "step": 413824 }, { "epoch": 31.061013133208256, "grad_norm": 0.6067076325416565, "learning_rate": 0.002828, "loss": 1.148, "step": 413888 }, { "epoch": 31.06581613508443, "grad_norm": 0.5615883469581604, "learning_rate": 0.002828, "loss": 1.1435, "step": 413952 }, { "epoch": 31.0706191369606, "grad_norm": 0.516376793384552, "learning_rate": 0.002828, "loss": 1.1453, "step": 414016 }, { "epoch": 31.075422138836775, "grad_norm": 0.47539058327674866, "learning_rate": 0.002828, "loss": 1.1508, "step": 414080 }, { "epoch": 31.080225140712944, "grad_norm": 0.7129861116409302, "learning_rate": 0.002828, "loss": 1.1499, "step": 414144 }, { "epoch": 31.085028142589117, "grad_norm": 0.6293662786483765, "learning_rate": 0.002828, "loss": 1.1505, "step": 414208 }, { "epoch": 31.08983114446529, "grad_norm": 0.6361925005912781, "learning_rate": 0.002828, "loss": 1.1526, "step": 414272 }, { "epoch": 31.094634146341463, "grad_norm": 0.6082159876823425, "learning_rate": 0.002828, "loss": 1.149, "step": 414336 }, { "epoch": 31.099437148217635, "grad_norm": 0.7075167298316956, "learning_rate": 0.002828, "loss": 1.1476, "step": 414400 }, { "epoch": 31.10424015009381, "grad_norm": 0.5679833889007568, "learning_rate": 0.002828, "loss": 1.1533, "step": 414464 }, { "epoch": 31.10904315196998, "grad_norm": 0.6799837350845337, "learning_rate": 0.002828, "loss": 1.1529, "step": 414528 }, { "epoch": 31.113846153846154, "grad_norm": 0.5135023593902588, "learning_rate": 0.002828, "loss": 1.155, "step": 414592 }, { "epoch": 31.118649155722327, "grad_norm": 0.5953816175460815, "learning_rate": 0.002828, "loss": 1.1558, "step": 414656 }, { "epoch": 31.1234521575985, "grad_norm": 0.6822885274887085, "learning_rate": 0.002828, "loss": 1.1483, "step": 414720 }, { "epoch": 31.128255159474673, "grad_norm": 0.5267248153686523, "learning_rate": 0.002828, "loss": 1.1501, "step": 414784 }, { "epoch": 31.133058161350846, "grad_norm": 0.6198325753211975, "learning_rate": 0.002828, "loss": 1.1484, "step": 414848 }, { "epoch": 31.13786116322702, "grad_norm": 0.6195210814476013, "learning_rate": 0.002828, "loss": 1.1517, "step": 414912 }, { "epoch": 31.142664165103188, "grad_norm": 0.7327703237533569, "learning_rate": 0.002828, "loss": 1.1582, "step": 414976 }, { "epoch": 31.14746716697936, "grad_norm": 0.603537917137146, "learning_rate": 0.002828, "loss": 1.1537, "step": 415040 }, { "epoch": 31.152270168855534, "grad_norm": 0.5337952971458435, "learning_rate": 0.002828, "loss": 1.1559, "step": 415104 }, { "epoch": 31.157073170731707, "grad_norm": 0.6618182063102722, "learning_rate": 0.002828, "loss": 1.1501, "step": 415168 }, { "epoch": 31.16187617260788, "grad_norm": 0.7155896425247192, "learning_rate": 0.002828, "loss": 1.1517, "step": 415232 }, { "epoch": 31.166679174484052, "grad_norm": 0.6300784945487976, "learning_rate": 0.002828, "loss": 1.1539, "step": 415296 }, { "epoch": 31.171482176360225, "grad_norm": 0.5707547068595886, "learning_rate": 0.002828, "loss": 1.1552, "step": 415360 }, { "epoch": 31.176285178236398, "grad_norm": 0.6165001392364502, "learning_rate": 0.002828, "loss": 1.147, "step": 415424 }, { "epoch": 31.18108818011257, "grad_norm": 0.6371972560882568, "learning_rate": 0.002828, "loss": 1.1593, "step": 415488 }, { "epoch": 31.185891181988744, "grad_norm": 0.5942570567131042, "learning_rate": 0.002828, "loss": 1.1571, "step": 415552 }, { "epoch": 31.190694183864917, "grad_norm": 0.5444065928459167, "learning_rate": 0.002828, "loss": 1.1561, "step": 415616 }, { "epoch": 31.19549718574109, "grad_norm": 0.7190663814544678, "learning_rate": 0.002828, "loss": 1.1551, "step": 415680 }, { "epoch": 31.200300187617263, "grad_norm": 0.5388247966766357, "learning_rate": 0.002828, "loss": 1.1554, "step": 415744 }, { "epoch": 31.205103189493432, "grad_norm": 0.8031442761421204, "learning_rate": 0.002828, "loss": 1.1585, "step": 415808 }, { "epoch": 31.209906191369605, "grad_norm": 0.6138423681259155, "learning_rate": 0.002828, "loss": 1.1591, "step": 415872 }, { "epoch": 31.214709193245778, "grad_norm": 0.5942168831825256, "learning_rate": 0.002828, "loss": 1.1572, "step": 415936 }, { "epoch": 31.21951219512195, "grad_norm": 0.6421748399734497, "learning_rate": 0.002828, "loss": 1.1552, "step": 416000 }, { "epoch": 31.224315196998123, "grad_norm": 0.6277354955673218, "learning_rate": 0.002828, "loss": 1.1539, "step": 416064 }, { "epoch": 31.229118198874296, "grad_norm": 0.5966684818267822, "learning_rate": 0.002828, "loss": 1.1542, "step": 416128 }, { "epoch": 31.23392120075047, "grad_norm": 0.628309428691864, "learning_rate": 0.002828, "loss": 1.1583, "step": 416192 }, { "epoch": 31.238724202626642, "grad_norm": 0.6220654845237732, "learning_rate": 0.002828, "loss": 1.1593, "step": 416256 }, { "epoch": 31.243527204502815, "grad_norm": 0.6429674029350281, "learning_rate": 0.002828, "loss": 1.1585, "step": 416320 }, { "epoch": 31.248330206378988, "grad_norm": 0.6579111814498901, "learning_rate": 0.002828, "loss": 1.154, "step": 416384 }, { "epoch": 31.25313320825516, "grad_norm": 0.6795548796653748, "learning_rate": 0.002828, "loss": 1.1572, "step": 416448 }, { "epoch": 31.257936210131334, "grad_norm": 0.6224991083145142, "learning_rate": 0.002828, "loss": 1.1569, "step": 416512 }, { "epoch": 31.262739212007503, "grad_norm": 0.526008129119873, "learning_rate": 0.002828, "loss": 1.1628, "step": 416576 }, { "epoch": 31.267542213883676, "grad_norm": 0.5240540504455566, "learning_rate": 0.002828, "loss": 1.1584, "step": 416640 }, { "epoch": 31.27234521575985, "grad_norm": 0.5980592370033264, "learning_rate": 0.002828, "loss": 1.1564, "step": 416704 }, { "epoch": 31.27714821763602, "grad_norm": 0.5666719079017639, "learning_rate": 0.002828, "loss": 1.1586, "step": 416768 }, { "epoch": 31.281951219512194, "grad_norm": 0.6986644268035889, "learning_rate": 0.002828, "loss": 1.1644, "step": 416832 }, { "epoch": 31.286754221388367, "grad_norm": 0.6172641515731812, "learning_rate": 0.002828, "loss": 1.1594, "step": 416896 }, { "epoch": 31.29155722326454, "grad_norm": 0.5929469466209412, "learning_rate": 0.002828, "loss": 1.1602, "step": 416960 }, { "epoch": 31.296360225140713, "grad_norm": 0.7132891416549683, "learning_rate": 0.002828, "loss": 1.1586, "step": 417024 }, { "epoch": 31.301163227016886, "grad_norm": 0.5946763753890991, "learning_rate": 0.002828, "loss": 1.1586, "step": 417088 }, { "epoch": 31.30596622889306, "grad_norm": 0.6350845098495483, "learning_rate": 0.002828, "loss": 1.1545, "step": 417152 }, { "epoch": 31.310769230769232, "grad_norm": 0.567474365234375, "learning_rate": 0.002828, "loss": 1.1594, "step": 417216 }, { "epoch": 31.315572232645405, "grad_norm": 0.6074239015579224, "learning_rate": 0.002828, "loss": 1.1551, "step": 417280 }, { "epoch": 31.320375234521578, "grad_norm": 0.7145832777023315, "learning_rate": 0.002828, "loss": 1.1566, "step": 417344 }, { "epoch": 31.325178236397747, "grad_norm": 0.6822763085365295, "learning_rate": 0.002828, "loss": 1.1582, "step": 417408 }, { "epoch": 31.32998123827392, "grad_norm": 0.6444886326789856, "learning_rate": 0.002828, "loss": 1.1576, "step": 417472 }, { "epoch": 31.334784240150093, "grad_norm": 0.6175189018249512, "learning_rate": 0.002828, "loss": 1.1586, "step": 417536 }, { "epoch": 31.339587242026266, "grad_norm": 0.6543102264404297, "learning_rate": 0.002828, "loss": 1.16, "step": 417600 }, { "epoch": 31.34439024390244, "grad_norm": 0.6924708485603333, "learning_rate": 0.002828, "loss": 1.1637, "step": 417664 }, { "epoch": 31.34919324577861, "grad_norm": 0.5551199913024902, "learning_rate": 0.002828, "loss": 1.1611, "step": 417728 }, { "epoch": 31.353996247654784, "grad_norm": 0.532099723815918, "learning_rate": 0.002828, "loss": 1.1605, "step": 417792 }, { "epoch": 31.358799249530957, "grad_norm": 0.5526866912841797, "learning_rate": 0.002828, "loss": 1.1582, "step": 417856 }, { "epoch": 31.36360225140713, "grad_norm": 0.5917322039604187, "learning_rate": 0.002828, "loss": 1.1591, "step": 417920 }, { "epoch": 31.368405253283303, "grad_norm": 0.5270063281059265, "learning_rate": 0.002828, "loss": 1.1663, "step": 417984 }, { "epoch": 31.373208255159476, "grad_norm": 0.50337153673172, "learning_rate": 0.002828, "loss": 1.1562, "step": 418048 }, { "epoch": 31.37801125703565, "grad_norm": 0.6552616953849792, "learning_rate": 0.002828, "loss": 1.162, "step": 418112 }, { "epoch": 31.38281425891182, "grad_norm": 0.6119304299354553, "learning_rate": 0.002828, "loss": 1.1607, "step": 418176 }, { "epoch": 31.38761726078799, "grad_norm": 0.5546829700469971, "learning_rate": 0.002828, "loss": 1.1641, "step": 418240 }, { "epoch": 31.392420262664164, "grad_norm": 0.606814444065094, "learning_rate": 0.002828, "loss": 1.167, "step": 418304 }, { "epoch": 31.397223264540337, "grad_norm": 0.6050477027893066, "learning_rate": 0.002828, "loss": 1.1601, "step": 418368 }, { "epoch": 31.40202626641651, "grad_norm": 0.6472161412239075, "learning_rate": 0.002828, "loss": 1.1654, "step": 418432 }, { "epoch": 31.406829268292682, "grad_norm": 0.6984646320343018, "learning_rate": 0.002828, "loss": 1.1628, "step": 418496 }, { "epoch": 31.411632270168855, "grad_norm": 0.758280873298645, "learning_rate": 0.002828, "loss": 1.1588, "step": 418560 }, { "epoch": 31.416435272045028, "grad_norm": 0.5866057872772217, "learning_rate": 0.002828, "loss": 1.161, "step": 418624 }, { "epoch": 31.4212382739212, "grad_norm": 0.6438357830047607, "learning_rate": 0.002828, "loss": 1.1589, "step": 418688 }, { "epoch": 31.426041275797374, "grad_norm": 0.5375080108642578, "learning_rate": 0.002828, "loss": 1.1602, "step": 418752 }, { "epoch": 31.430844277673547, "grad_norm": 0.6227983832359314, "learning_rate": 0.002828, "loss": 1.1646, "step": 418816 }, { "epoch": 31.43564727954972, "grad_norm": 0.5298811197280884, "learning_rate": 0.002828, "loss": 1.1617, "step": 418880 }, { "epoch": 31.440450281425893, "grad_norm": 0.5807226300239563, "learning_rate": 0.002828, "loss": 1.1638, "step": 418944 }, { "epoch": 31.445253283302065, "grad_norm": 0.5875961184501648, "learning_rate": 0.002828, "loss": 1.1649, "step": 419008 }, { "epoch": 31.450056285178235, "grad_norm": 0.5816643834114075, "learning_rate": 0.002828, "loss": 1.1615, "step": 419072 }, { "epoch": 31.454859287054408, "grad_norm": 0.7433393001556396, "learning_rate": 0.002828, "loss": 1.1664, "step": 419136 }, { "epoch": 31.45966228893058, "grad_norm": 0.6241632699966431, "learning_rate": 0.002828, "loss": 1.1659, "step": 419200 }, { "epoch": 31.464465290806753, "grad_norm": 0.5977398157119751, "learning_rate": 0.002828, "loss": 1.1657, "step": 419264 }, { "epoch": 31.469268292682926, "grad_norm": 0.5252711176872253, "learning_rate": 0.002828, "loss": 1.1614, "step": 419328 }, { "epoch": 31.4740712945591, "grad_norm": 0.533634603023529, "learning_rate": 0.002828, "loss": 1.1633, "step": 419392 }, { "epoch": 31.478874296435272, "grad_norm": 0.4986468553543091, "learning_rate": 0.002828, "loss": 1.1644, "step": 419456 }, { "epoch": 31.483677298311445, "grad_norm": 0.5706108808517456, "learning_rate": 0.002828, "loss": 1.1648, "step": 419520 }, { "epoch": 31.488480300187618, "grad_norm": 0.48681211471557617, "learning_rate": 0.002828, "loss": 1.1606, "step": 419584 }, { "epoch": 31.49328330206379, "grad_norm": 0.5788038372993469, "learning_rate": 0.002828, "loss": 1.1622, "step": 419648 }, { "epoch": 31.498086303939964, "grad_norm": 0.5305057764053345, "learning_rate": 0.002828, "loss": 1.1648, "step": 419712 }, { "epoch": 31.502889305816137, "grad_norm": 0.5407353639602661, "learning_rate": 0.002828, "loss": 1.1661, "step": 419776 }, { "epoch": 31.50769230769231, "grad_norm": 0.677531898021698, "learning_rate": 0.002828, "loss": 1.1607, "step": 419840 }, { "epoch": 31.51249530956848, "grad_norm": 0.6447296738624573, "learning_rate": 0.002828, "loss": 1.1635, "step": 419904 }, { "epoch": 31.51729831144465, "grad_norm": 0.6101669073104858, "learning_rate": 0.002828, "loss": 1.1591, "step": 419968 }, { "epoch": 31.522101313320825, "grad_norm": 0.5507171154022217, "learning_rate": 0.002828, "loss": 1.16, "step": 420032 }, { "epoch": 31.526904315196997, "grad_norm": 0.6411391496658325, "learning_rate": 0.002828, "loss": 1.1638, "step": 420096 }, { "epoch": 31.53170731707317, "grad_norm": 0.4897032380104065, "learning_rate": 0.002828, "loss": 1.1684, "step": 420160 }, { "epoch": 31.536510318949343, "grad_norm": 0.5630632042884827, "learning_rate": 0.002828, "loss": 1.1631, "step": 420224 }, { "epoch": 31.541313320825516, "grad_norm": 0.6872511506080627, "learning_rate": 0.002828, "loss": 1.1586, "step": 420288 }, { "epoch": 31.54611632270169, "grad_norm": 0.6808540225028992, "learning_rate": 0.002828, "loss": 1.1662, "step": 420352 }, { "epoch": 31.550919324577862, "grad_norm": 0.5341716408729553, "learning_rate": 0.002828, "loss": 1.1692, "step": 420416 }, { "epoch": 31.555722326454035, "grad_norm": 0.7518808245658875, "learning_rate": 0.002828, "loss": 1.1694, "step": 420480 }, { "epoch": 31.560525328330208, "grad_norm": 0.5088610649108887, "learning_rate": 0.002828, "loss": 1.1644, "step": 420544 }, { "epoch": 31.56532833020638, "grad_norm": 0.6243874430656433, "learning_rate": 0.002828, "loss": 1.1691, "step": 420608 }, { "epoch": 31.570131332082553, "grad_norm": 0.5158883929252625, "learning_rate": 0.002828, "loss": 1.1606, "step": 420672 }, { "epoch": 31.574934333958723, "grad_norm": 0.7455403208732605, "learning_rate": 0.002828, "loss": 1.165, "step": 420736 }, { "epoch": 31.579737335834896, "grad_norm": 0.5810747146606445, "learning_rate": 0.002828, "loss": 1.1674, "step": 420800 }, { "epoch": 31.58454033771107, "grad_norm": 0.6923263072967529, "learning_rate": 0.002828, "loss": 1.1597, "step": 420864 }, { "epoch": 31.58934333958724, "grad_norm": 0.669924259185791, "learning_rate": 0.002828, "loss": 1.1676, "step": 420928 }, { "epoch": 31.594146341463414, "grad_norm": 0.5412556529045105, "learning_rate": 0.002828, "loss": 1.1688, "step": 420992 }, { "epoch": 31.598949343339587, "grad_norm": 0.5750775933265686, "learning_rate": 0.002828, "loss": 1.1656, "step": 421056 }, { "epoch": 31.60375234521576, "grad_norm": 0.613204836845398, "learning_rate": 0.002828, "loss": 1.1648, "step": 421120 }, { "epoch": 31.608555347091933, "grad_norm": 0.6003854870796204, "learning_rate": 0.002828, "loss": 1.169, "step": 421184 }, { "epoch": 31.613358348968106, "grad_norm": 0.5949692130088806, "learning_rate": 0.002828, "loss": 1.1648, "step": 421248 }, { "epoch": 31.61816135084428, "grad_norm": 0.5880799293518066, "learning_rate": 0.002828, "loss": 1.1724, "step": 421312 }, { "epoch": 31.62296435272045, "grad_norm": 0.603401780128479, "learning_rate": 0.002828, "loss": 1.1664, "step": 421376 }, { "epoch": 31.627767354596624, "grad_norm": 0.5906250476837158, "learning_rate": 0.002828, "loss": 1.1645, "step": 421440 }, { "epoch": 31.632570356472794, "grad_norm": 0.6233527660369873, "learning_rate": 0.002828, "loss": 1.1674, "step": 421504 }, { "epoch": 31.637373358348967, "grad_norm": 0.6136730313301086, "learning_rate": 0.002828, "loss": 1.1586, "step": 421568 }, { "epoch": 31.64217636022514, "grad_norm": 0.6427733302116394, "learning_rate": 0.002828, "loss": 1.1695, "step": 421632 }, { "epoch": 31.646979362101312, "grad_norm": 0.5708802342414856, "learning_rate": 0.002828, "loss": 1.1654, "step": 421696 }, { "epoch": 31.651782363977485, "grad_norm": 0.7320801615715027, "learning_rate": 0.002828, "loss": 1.166, "step": 421760 }, { "epoch": 31.65658536585366, "grad_norm": 0.7255459427833557, "learning_rate": 0.002828, "loss": 1.1658, "step": 421824 }, { "epoch": 31.66138836772983, "grad_norm": 0.4449161887168884, "learning_rate": 0.002828, "loss": 1.1683, "step": 421888 }, { "epoch": 31.666191369606004, "grad_norm": 0.573015034198761, "learning_rate": 0.002828, "loss": 1.1687, "step": 421952 }, { "epoch": 31.670994371482177, "grad_norm": 0.566383421421051, "learning_rate": 0.002828, "loss": 1.1694, "step": 422016 }, { "epoch": 31.67579737335835, "grad_norm": 0.7386601567268372, "learning_rate": 0.002828, "loss": 1.174, "step": 422080 }, { "epoch": 31.680600375234523, "grad_norm": 0.6502916216850281, "learning_rate": 0.002828, "loss": 1.1616, "step": 422144 }, { "epoch": 31.685403377110696, "grad_norm": 0.6516982316970825, "learning_rate": 0.002828, "loss": 1.1654, "step": 422208 }, { "epoch": 31.69020637898687, "grad_norm": 0.5580388307571411, "learning_rate": 0.002828, "loss": 1.169, "step": 422272 }, { "epoch": 31.695009380863038, "grad_norm": 0.6566148996353149, "learning_rate": 0.002828, "loss": 1.1675, "step": 422336 }, { "epoch": 31.69981238273921, "grad_norm": 0.6050575375556946, "learning_rate": 0.002828, "loss": 1.1767, "step": 422400 }, { "epoch": 31.704615384615384, "grad_norm": 0.5732092261314392, "learning_rate": 0.002828, "loss": 1.1729, "step": 422464 }, { "epoch": 31.709418386491556, "grad_norm": 0.49055150151252747, "learning_rate": 0.002828, "loss": 1.1637, "step": 422528 }, { "epoch": 31.71422138836773, "grad_norm": 0.6409571766853333, "learning_rate": 0.002828, "loss": 1.1689, "step": 422592 }, { "epoch": 31.719024390243902, "grad_norm": 0.6670295596122742, "learning_rate": 0.002828, "loss": 1.166, "step": 422656 }, { "epoch": 31.723827392120075, "grad_norm": 0.5073591470718384, "learning_rate": 0.002828, "loss": 1.167, "step": 422720 }, { "epoch": 31.728630393996248, "grad_norm": 0.5917232632637024, "learning_rate": 0.002828, "loss": 1.1681, "step": 422784 }, { "epoch": 31.73343339587242, "grad_norm": 0.6723231077194214, "learning_rate": 0.002828, "loss": 1.164, "step": 422848 }, { "epoch": 31.738236397748594, "grad_norm": 0.619998037815094, "learning_rate": 0.002828, "loss": 1.1729, "step": 422912 }, { "epoch": 31.743039399624767, "grad_norm": 0.6515606045722961, "learning_rate": 0.002828, "loss": 1.1738, "step": 422976 }, { "epoch": 31.74784240150094, "grad_norm": 0.5862688422203064, "learning_rate": 0.002828, "loss": 1.1721, "step": 423040 }, { "epoch": 31.752645403377112, "grad_norm": 0.5546661615371704, "learning_rate": 0.002828, "loss": 1.1683, "step": 423104 }, { "epoch": 31.75744840525328, "grad_norm": 0.6349488496780396, "learning_rate": 0.002828, "loss": 1.1702, "step": 423168 }, { "epoch": 31.762251407129455, "grad_norm": 0.7520371675491333, "learning_rate": 0.002828, "loss": 1.1723, "step": 423232 }, { "epoch": 31.767054409005627, "grad_norm": 0.7457068562507629, "learning_rate": 0.002828, "loss": 1.1683, "step": 423296 }, { "epoch": 31.7718574108818, "grad_norm": 0.5763024091720581, "learning_rate": 0.002828, "loss": 1.1661, "step": 423360 }, { "epoch": 31.776660412757973, "grad_norm": 0.7857934832572937, "learning_rate": 0.002828, "loss": 1.1661, "step": 423424 }, { "epoch": 31.781463414634146, "grad_norm": 0.6475039720535278, "learning_rate": 0.002828, "loss": 1.172, "step": 423488 }, { "epoch": 31.78626641651032, "grad_norm": 0.6206485033035278, "learning_rate": 0.002828, "loss": 1.1691, "step": 423552 }, { "epoch": 31.791069418386492, "grad_norm": 0.5739595293998718, "learning_rate": 0.002828, "loss": 1.1657, "step": 423616 }, { "epoch": 31.795872420262665, "grad_norm": 0.6397718787193298, "learning_rate": 0.002828, "loss": 1.1719, "step": 423680 }, { "epoch": 31.800675422138838, "grad_norm": 0.6197733879089355, "learning_rate": 0.002828, "loss": 1.1668, "step": 423744 }, { "epoch": 31.80547842401501, "grad_norm": 0.6080317497253418, "learning_rate": 0.002828, "loss": 1.1711, "step": 423808 }, { "epoch": 31.810281425891183, "grad_norm": 0.684962272644043, "learning_rate": 0.002828, "loss": 1.1699, "step": 423872 }, { "epoch": 31.815084427767353, "grad_norm": 0.7387970685958862, "learning_rate": 0.002828, "loss": 1.1674, "step": 423936 }, { "epoch": 31.819887429643526, "grad_norm": 0.5995039343833923, "learning_rate": 0.002828, "loss": 1.1687, "step": 424000 }, { "epoch": 31.8246904315197, "grad_norm": 0.5478743314743042, "learning_rate": 0.002828, "loss": 1.1738, "step": 424064 }, { "epoch": 31.82949343339587, "grad_norm": 0.4925873577594757, "learning_rate": 0.002828, "loss": 1.1717, "step": 424128 }, { "epoch": 31.834296435272044, "grad_norm": 0.5601804256439209, "learning_rate": 0.002828, "loss": 1.1698, "step": 424192 }, { "epoch": 31.839099437148217, "grad_norm": 0.5623326897621155, "learning_rate": 0.002828, "loss": 1.1676, "step": 424256 }, { "epoch": 31.84390243902439, "grad_norm": 0.5536679029464722, "learning_rate": 0.002828, "loss": 1.1691, "step": 424320 }, { "epoch": 31.848705440900563, "grad_norm": 0.6321052312850952, "learning_rate": 0.002828, "loss": 1.1715, "step": 424384 }, { "epoch": 31.853508442776736, "grad_norm": 0.6257811784744263, "learning_rate": 0.002828, "loss": 1.1735, "step": 424448 }, { "epoch": 31.85831144465291, "grad_norm": 0.5375912189483643, "learning_rate": 0.002828, "loss": 1.1686, "step": 424512 }, { "epoch": 31.86311444652908, "grad_norm": 0.6057097315788269, "learning_rate": 0.002828, "loss": 1.175, "step": 424576 }, { "epoch": 31.867917448405255, "grad_norm": 0.6763109564781189, "learning_rate": 0.002828, "loss": 1.1708, "step": 424640 }, { "epoch": 31.872720450281427, "grad_norm": 0.5548925399780273, "learning_rate": 0.002828, "loss": 1.1662, "step": 424704 }, { "epoch": 31.877523452157597, "grad_norm": 0.65684974193573, "learning_rate": 0.002828, "loss": 1.1728, "step": 424768 }, { "epoch": 31.88232645403377, "grad_norm": 0.5662223100662231, "learning_rate": 0.002828, "loss": 1.169, "step": 424832 }, { "epoch": 31.887129455909943, "grad_norm": 0.591471791267395, "learning_rate": 0.002828, "loss": 1.1711, "step": 424896 }, { "epoch": 31.891932457786115, "grad_norm": 0.5270155668258667, "learning_rate": 0.002828, "loss": 1.171, "step": 424960 }, { "epoch": 31.89673545966229, "grad_norm": 0.6331884860992432, "learning_rate": 0.002828, "loss": 1.1762, "step": 425024 }, { "epoch": 31.90153846153846, "grad_norm": 0.6494824886322021, "learning_rate": 0.002828, "loss": 1.1682, "step": 425088 }, { "epoch": 31.906341463414634, "grad_norm": 0.5662149786949158, "learning_rate": 0.002828, "loss": 1.1738, "step": 425152 }, { "epoch": 31.911144465290807, "grad_norm": 0.6335403919219971, "learning_rate": 0.002828, "loss": 1.1717, "step": 425216 }, { "epoch": 31.91594746716698, "grad_norm": 0.5693516135215759, "learning_rate": 0.002828, "loss": 1.1689, "step": 425280 }, { "epoch": 31.920750469043153, "grad_norm": 0.7408801317214966, "learning_rate": 0.002828, "loss": 1.1772, "step": 425344 }, { "epoch": 31.925553470919326, "grad_norm": 0.5731872320175171, "learning_rate": 0.002828, "loss": 1.1721, "step": 425408 }, { "epoch": 31.9303564727955, "grad_norm": 0.7666354775428772, "learning_rate": 0.002828, "loss": 1.1711, "step": 425472 }, { "epoch": 31.93515947467167, "grad_norm": 0.5870010256767273, "learning_rate": 0.002828, "loss": 1.1761, "step": 425536 }, { "epoch": 31.93996247654784, "grad_norm": 0.6777177453041077, "learning_rate": 0.002828, "loss": 1.1723, "step": 425600 }, { "epoch": 31.944765478424014, "grad_norm": 0.613329291343689, "learning_rate": 0.002828, "loss": 1.1746, "step": 425664 }, { "epoch": 31.949568480300186, "grad_norm": 0.5409830212593079, "learning_rate": 0.002828, "loss": 1.1727, "step": 425728 }, { "epoch": 31.95437148217636, "grad_norm": 0.5840930938720703, "learning_rate": 0.002828, "loss": 1.1731, "step": 425792 }, { "epoch": 31.959174484052532, "grad_norm": 0.5402800440788269, "learning_rate": 0.002828, "loss": 1.1773, "step": 425856 }, { "epoch": 31.963977485928705, "grad_norm": 0.7048522233963013, "learning_rate": 0.002828, "loss": 1.1682, "step": 425920 }, { "epoch": 31.968780487804878, "grad_norm": 0.6770626306533813, "learning_rate": 0.002828, "loss": 1.1751, "step": 425984 }, { "epoch": 31.97358348968105, "grad_norm": 0.6093770265579224, "learning_rate": 0.002828, "loss": 1.1734, "step": 426048 }, { "epoch": 31.978386491557224, "grad_norm": 0.6553013920783997, "learning_rate": 0.002828, "loss": 1.1693, "step": 426112 }, { "epoch": 31.983189493433397, "grad_norm": 0.6486321091651917, "learning_rate": 0.002828, "loss": 1.1685, "step": 426176 }, { "epoch": 31.98799249530957, "grad_norm": 0.697055459022522, "learning_rate": 0.002828, "loss": 1.1714, "step": 426240 }, { "epoch": 31.992795497185742, "grad_norm": 0.6191198825836182, "learning_rate": 0.002828, "loss": 1.1763, "step": 426304 }, { "epoch": 31.997598499061915, "grad_norm": 0.5218004584312439, "learning_rate": 0.002828, "loss": 1.1689, "step": 426368 }, { "epoch": 32.002401500938085, "grad_norm": 0.5775057077407837, "learning_rate": 0.002828, "loss": 1.1579, "step": 426432 }, { "epoch": 32.00720450281426, "grad_norm": 0.5523258447647095, "learning_rate": 0.002828, "loss": 1.1342, "step": 426496 }, { "epoch": 32.01200750469043, "grad_norm": 0.6225584149360657, "learning_rate": 0.002828, "loss": 1.1318, "step": 426560 }, { "epoch": 32.01681050656661, "grad_norm": 0.5929083824157715, "learning_rate": 0.002828, "loss": 1.1372, "step": 426624 }, { "epoch": 32.021613508442776, "grad_norm": 0.6784092783927917, "learning_rate": 0.002828, "loss": 1.1344, "step": 426688 }, { "epoch": 32.02641651031895, "grad_norm": 0.546075701713562, "learning_rate": 0.002828, "loss": 1.1379, "step": 426752 }, { "epoch": 32.03121951219512, "grad_norm": 0.5539538860321045, "learning_rate": 0.002828, "loss": 1.1418, "step": 426816 }, { "epoch": 32.03602251407129, "grad_norm": 0.6780215501785278, "learning_rate": 0.002828, "loss": 1.1385, "step": 426880 }, { "epoch": 32.04082551594747, "grad_norm": 0.5612590312957764, "learning_rate": 0.002828, "loss": 1.134, "step": 426944 }, { "epoch": 32.04562851782364, "grad_norm": 0.49800795316696167, "learning_rate": 0.002828, "loss": 1.1353, "step": 427008 }, { "epoch": 32.05043151969981, "grad_norm": 0.6973561644554138, "learning_rate": 0.002828, "loss": 1.1387, "step": 427072 }, { "epoch": 32.05523452157598, "grad_norm": 0.8178017735481262, "learning_rate": 0.002828, "loss": 1.1467, "step": 427136 }, { "epoch": 32.06003752345216, "grad_norm": 0.6194310784339905, "learning_rate": 0.002828, "loss": 1.1386, "step": 427200 }, { "epoch": 32.06484052532833, "grad_norm": 0.5829865336418152, "learning_rate": 0.002828, "loss": 1.1411, "step": 427264 }, { "epoch": 32.069643527204505, "grad_norm": 0.493563175201416, "learning_rate": 0.002828, "loss": 1.1388, "step": 427328 }, { "epoch": 32.074446529080674, "grad_norm": 0.5824282765388489, "learning_rate": 0.002828, "loss": 1.1466, "step": 427392 }, { "epoch": 32.07924953095685, "grad_norm": 0.6629090905189514, "learning_rate": 0.002828, "loss": 1.1325, "step": 427456 }, { "epoch": 32.08405253283302, "grad_norm": 0.8915190696716309, "learning_rate": 0.002828, "loss": 1.1488, "step": 427520 }, { "epoch": 32.0888555347092, "grad_norm": 0.7942197918891907, "learning_rate": 0.002828, "loss": 1.1477, "step": 427584 }, { "epoch": 32.093658536585366, "grad_norm": 0.6769029498100281, "learning_rate": 0.002828, "loss": 1.1387, "step": 427648 }, { "epoch": 32.098461538461535, "grad_norm": 0.6665157079696655, "learning_rate": 0.002828, "loss": 1.1466, "step": 427712 }, { "epoch": 32.10326454033771, "grad_norm": 0.6797373294830322, "learning_rate": 0.002828, "loss": 1.1446, "step": 427776 }, { "epoch": 32.10806754221388, "grad_norm": 0.6158092617988586, "learning_rate": 0.002828, "loss": 1.1391, "step": 427840 }, { "epoch": 32.11287054409006, "grad_norm": 0.6532845497131348, "learning_rate": 0.002828, "loss": 1.1387, "step": 427904 }, { "epoch": 32.11767354596623, "grad_norm": 0.5372705459594727, "learning_rate": 0.002828, "loss": 1.1425, "step": 427968 }, { "epoch": 32.1224765478424, "grad_norm": 0.5978798866271973, "learning_rate": 0.002828, "loss": 1.145, "step": 428032 }, { "epoch": 32.12727954971857, "grad_norm": 0.6882296800613403, "learning_rate": 0.002828, "loss": 1.1387, "step": 428096 }, { "epoch": 32.13208255159475, "grad_norm": 0.5775601267814636, "learning_rate": 0.002828, "loss": 1.138, "step": 428160 }, { "epoch": 32.13688555347092, "grad_norm": 0.46795588731765747, "learning_rate": 0.002828, "loss": 1.1444, "step": 428224 }, { "epoch": 32.141688555347095, "grad_norm": 0.5983672142028809, "learning_rate": 0.002828, "loss": 1.1428, "step": 428288 }, { "epoch": 32.146491557223264, "grad_norm": 0.5804497599601746, "learning_rate": 0.002828, "loss": 1.1385, "step": 428352 }, { "epoch": 32.15129455909944, "grad_norm": 0.6478627920150757, "learning_rate": 0.002828, "loss": 1.1418, "step": 428416 }, { "epoch": 32.15609756097561, "grad_norm": 0.6652305722236633, "learning_rate": 0.002828, "loss": 1.1436, "step": 428480 }, { "epoch": 32.16090056285178, "grad_norm": 0.7055870890617371, "learning_rate": 0.002828, "loss": 1.1455, "step": 428544 }, { "epoch": 32.165703564727956, "grad_norm": 0.6123886704444885, "learning_rate": 0.002828, "loss": 1.1439, "step": 428608 }, { "epoch": 32.170506566604125, "grad_norm": 0.7516705393791199, "learning_rate": 0.002828, "loss": 1.1475, "step": 428672 }, { "epoch": 32.1753095684803, "grad_norm": 0.5789108276367188, "learning_rate": 0.002828, "loss": 1.1496, "step": 428736 }, { "epoch": 32.18011257035647, "grad_norm": 0.5870668292045593, "learning_rate": 0.002828, "loss": 1.144, "step": 428800 }, { "epoch": 32.18491557223265, "grad_norm": 0.6455457210540771, "learning_rate": 0.002828, "loss": 1.1413, "step": 428864 }, { "epoch": 32.18971857410882, "grad_norm": 0.6160818934440613, "learning_rate": 0.002828, "loss": 1.141, "step": 428928 }, { "epoch": 32.19452157598499, "grad_norm": 0.559694230556488, "learning_rate": 0.002828, "loss": 1.1412, "step": 428992 }, { "epoch": 32.19932457786116, "grad_norm": 0.587887167930603, "learning_rate": 0.002828, "loss": 1.1478, "step": 429056 }, { "epoch": 32.20412757973734, "grad_norm": 0.6603989005088806, "learning_rate": 0.002828, "loss": 1.1464, "step": 429120 }, { "epoch": 32.20893058161351, "grad_norm": 0.539109468460083, "learning_rate": 0.002828, "loss": 1.1494, "step": 429184 }, { "epoch": 32.213733583489685, "grad_norm": 0.6352860927581787, "learning_rate": 0.002828, "loss": 1.1468, "step": 429248 }, { "epoch": 32.218536585365854, "grad_norm": 0.6434819102287292, "learning_rate": 0.002828, "loss": 1.1466, "step": 429312 }, { "epoch": 32.22333958724202, "grad_norm": 0.5428691506385803, "learning_rate": 0.002828, "loss": 1.1452, "step": 429376 }, { "epoch": 32.2281425891182, "grad_norm": 0.6501672267913818, "learning_rate": 0.002828, "loss": 1.1452, "step": 429440 }, { "epoch": 32.23294559099437, "grad_norm": 0.5786007642745972, "learning_rate": 0.002828, "loss": 1.1456, "step": 429504 }, { "epoch": 32.237748592870545, "grad_norm": 0.5150612592697144, "learning_rate": 0.002828, "loss": 1.1513, "step": 429568 }, { "epoch": 32.242551594746715, "grad_norm": 0.502147912979126, "learning_rate": 0.002828, "loss": 1.1423, "step": 429632 }, { "epoch": 32.24735459662289, "grad_norm": 0.6412995457649231, "learning_rate": 0.002828, "loss": 1.1512, "step": 429696 }, { "epoch": 32.25215759849906, "grad_norm": 0.5578099489212036, "learning_rate": 0.002828, "loss": 1.1474, "step": 429760 }, { "epoch": 32.25696060037524, "grad_norm": 0.7341820597648621, "learning_rate": 0.002828, "loss": 1.1477, "step": 429824 }, { "epoch": 32.261763602251406, "grad_norm": 0.586214005947113, "learning_rate": 0.002828, "loss": 1.1449, "step": 429888 }, { "epoch": 32.26656660412758, "grad_norm": 0.6538716554641724, "learning_rate": 0.002828, "loss": 1.1443, "step": 429952 }, { "epoch": 32.27136960600375, "grad_norm": 0.6261554956436157, "learning_rate": 0.002828, "loss": 1.1498, "step": 430016 }, { "epoch": 32.27617260787993, "grad_norm": 0.561710000038147, "learning_rate": 0.002828, "loss": 1.1503, "step": 430080 }, { "epoch": 32.2809756097561, "grad_norm": 0.5323227643966675, "learning_rate": 0.002828, "loss": 1.148, "step": 430144 }, { "epoch": 32.28577861163227, "grad_norm": 0.6599904298782349, "learning_rate": 0.002828, "loss": 1.1521, "step": 430208 }, { "epoch": 32.290581613508444, "grad_norm": 0.7270208597183228, "learning_rate": 0.002828, "loss": 1.1475, "step": 430272 }, { "epoch": 32.29538461538461, "grad_norm": 0.4618419110774994, "learning_rate": 0.002828, "loss": 1.1474, "step": 430336 }, { "epoch": 32.30018761726079, "grad_norm": 0.640720784664154, "learning_rate": 0.002828, "loss": 1.1492, "step": 430400 }, { "epoch": 32.30499061913696, "grad_norm": 0.6126275658607483, "learning_rate": 0.002828, "loss": 1.1491, "step": 430464 }, { "epoch": 32.309793621013135, "grad_norm": 0.5292582511901855, "learning_rate": 0.002828, "loss": 1.1567, "step": 430528 }, { "epoch": 32.314596622889304, "grad_norm": 0.6568554043769836, "learning_rate": 0.002828, "loss": 1.1491, "step": 430592 }, { "epoch": 32.31939962476548, "grad_norm": 0.6243530511856079, "learning_rate": 0.002828, "loss": 1.1482, "step": 430656 }, { "epoch": 32.32420262664165, "grad_norm": 0.6516066789627075, "learning_rate": 0.002828, "loss": 1.1551, "step": 430720 }, { "epoch": 32.32900562851783, "grad_norm": 0.5971904993057251, "learning_rate": 0.002828, "loss": 1.1523, "step": 430784 }, { "epoch": 32.333808630393996, "grad_norm": 0.606874942779541, "learning_rate": 0.002828, "loss": 1.1477, "step": 430848 }, { "epoch": 32.33861163227017, "grad_norm": 0.7398237586021423, "learning_rate": 0.002828, "loss": 1.1501, "step": 430912 }, { "epoch": 32.34341463414634, "grad_norm": 0.6455931663513184, "learning_rate": 0.002828, "loss": 1.1486, "step": 430976 }, { "epoch": 32.34821763602251, "grad_norm": 0.6093574166297913, "learning_rate": 0.002828, "loss": 1.1534, "step": 431040 }, { "epoch": 32.35302063789869, "grad_norm": 0.5911879539489746, "learning_rate": 0.002828, "loss": 1.1492, "step": 431104 }, { "epoch": 32.35782363977486, "grad_norm": 0.6537988781929016, "learning_rate": 0.002828, "loss": 1.1508, "step": 431168 }, { "epoch": 32.36262664165103, "grad_norm": 0.6023768782615662, "learning_rate": 0.002828, "loss": 1.1543, "step": 431232 }, { "epoch": 32.3674296435272, "grad_norm": 0.5989535450935364, "learning_rate": 0.002828, "loss": 1.1476, "step": 431296 }, { "epoch": 32.37223264540338, "grad_norm": 0.5429506301879883, "learning_rate": 0.002828, "loss": 1.1467, "step": 431360 }, { "epoch": 32.37703564727955, "grad_norm": 0.5513024926185608, "learning_rate": 0.002828, "loss": 1.1446, "step": 431424 }, { "epoch": 32.381838649155725, "grad_norm": 0.6115459203720093, "learning_rate": 0.002828, "loss": 1.1523, "step": 431488 }, { "epoch": 32.386641651031894, "grad_norm": 0.7115570306777954, "learning_rate": 0.002828, "loss": 1.1481, "step": 431552 }, { "epoch": 32.39144465290807, "grad_norm": 0.561923086643219, "learning_rate": 0.002828, "loss": 1.1505, "step": 431616 }, { "epoch": 32.39624765478424, "grad_norm": 0.6152815818786621, "learning_rate": 0.002828, "loss": 1.1485, "step": 431680 }, { "epoch": 32.40105065666041, "grad_norm": 0.5495036840438843, "learning_rate": 0.002828, "loss": 1.1506, "step": 431744 }, { "epoch": 32.405853658536586, "grad_norm": 0.6558094620704651, "learning_rate": 0.002828, "loss": 1.1489, "step": 431808 }, { "epoch": 32.410656660412755, "grad_norm": 0.6343786120414734, "learning_rate": 0.002828, "loss": 1.1564, "step": 431872 }, { "epoch": 32.41545966228893, "grad_norm": 0.5835247039794922, "learning_rate": 0.002828, "loss": 1.1553, "step": 431936 }, { "epoch": 32.4202626641651, "grad_norm": 0.6473934054374695, "learning_rate": 0.002828, "loss": 1.1518, "step": 432000 }, { "epoch": 32.42506566604128, "grad_norm": 0.5743913650512695, "learning_rate": 0.002828, "loss": 1.1523, "step": 432064 }, { "epoch": 32.42986866791745, "grad_norm": 0.9068625569343567, "learning_rate": 0.002828, "loss": 1.1512, "step": 432128 }, { "epoch": 32.43467166979362, "grad_norm": 0.6137439608573914, "learning_rate": 0.002828, "loss": 1.1539, "step": 432192 }, { "epoch": 32.43947467166979, "grad_norm": 0.5453054904937744, "learning_rate": 0.002828, "loss": 1.1545, "step": 432256 }, { "epoch": 32.44427767354597, "grad_norm": 0.8272791504859924, "learning_rate": 0.002828, "loss": 1.1496, "step": 432320 }, { "epoch": 32.44908067542214, "grad_norm": 0.5712248682975769, "learning_rate": 0.002828, "loss": 1.1575, "step": 432384 }, { "epoch": 32.453883677298315, "grad_norm": 0.5890712141990662, "learning_rate": 0.002828, "loss": 1.1478, "step": 432448 }, { "epoch": 32.458686679174484, "grad_norm": 0.6274373531341553, "learning_rate": 0.002828, "loss": 1.1567, "step": 432512 }, { "epoch": 32.46348968105065, "grad_norm": 0.5753583908081055, "learning_rate": 0.002828, "loss": 1.15, "step": 432576 }, { "epoch": 32.46829268292683, "grad_norm": 0.6169336438179016, "learning_rate": 0.002828, "loss": 1.1548, "step": 432640 }, { "epoch": 32.473095684803, "grad_norm": 0.5989509224891663, "learning_rate": 0.002828, "loss": 1.1593, "step": 432704 }, { "epoch": 32.477898686679175, "grad_norm": 0.4917387068271637, "learning_rate": 0.002828, "loss": 1.1532, "step": 432768 }, { "epoch": 32.482701688555345, "grad_norm": 0.7819995284080505, "learning_rate": 0.002828, "loss": 1.1521, "step": 432832 }, { "epoch": 32.48750469043152, "grad_norm": 0.6098030805587769, "learning_rate": 0.002828, "loss": 1.1583, "step": 432896 }, { "epoch": 32.49230769230769, "grad_norm": 0.5828316807746887, "learning_rate": 0.002828, "loss": 1.1552, "step": 432960 }, { "epoch": 32.49711069418387, "grad_norm": 0.6648019552230835, "learning_rate": 0.002828, "loss": 1.1509, "step": 433024 }, { "epoch": 32.501913696060036, "grad_norm": 0.6286805272102356, "learning_rate": 0.002828, "loss": 1.1499, "step": 433088 }, { "epoch": 32.50671669793621, "grad_norm": 0.5933751463890076, "learning_rate": 0.002828, "loss": 1.1579, "step": 433152 }, { "epoch": 32.51151969981238, "grad_norm": 0.6346881985664368, "learning_rate": 0.002828, "loss": 1.1556, "step": 433216 }, { "epoch": 32.51632270168856, "grad_norm": 0.5340132713317871, "learning_rate": 0.002828, "loss": 1.1561, "step": 433280 }, { "epoch": 32.52112570356473, "grad_norm": 0.5881491899490356, "learning_rate": 0.002828, "loss": 1.1546, "step": 433344 }, { "epoch": 32.5259287054409, "grad_norm": 0.7711880207061768, "learning_rate": 0.002828, "loss": 1.1495, "step": 433408 }, { "epoch": 32.530731707317074, "grad_norm": 0.5821669101715088, "learning_rate": 0.002828, "loss": 1.1585, "step": 433472 }, { "epoch": 32.53553470919324, "grad_norm": 0.5389909148216248, "learning_rate": 0.002828, "loss": 1.1549, "step": 433536 }, { "epoch": 32.54033771106942, "grad_norm": 0.6722609996795654, "learning_rate": 0.002828, "loss": 1.1579, "step": 433600 }, { "epoch": 32.54514071294559, "grad_norm": 0.6273888349533081, "learning_rate": 0.002828, "loss": 1.1573, "step": 433664 }, { "epoch": 32.549943714821765, "grad_norm": 0.5908598303794861, "learning_rate": 0.002828, "loss": 1.1499, "step": 433728 }, { "epoch": 32.554746716697935, "grad_norm": 0.5363947153091431, "learning_rate": 0.002828, "loss": 1.1575, "step": 433792 }, { "epoch": 32.55954971857411, "grad_norm": 0.6329306364059448, "learning_rate": 0.002828, "loss": 1.1615, "step": 433856 }, { "epoch": 32.56435272045028, "grad_norm": 0.6444524526596069, "learning_rate": 0.002828, "loss": 1.1544, "step": 433920 }, { "epoch": 32.56915572232646, "grad_norm": 0.606938898563385, "learning_rate": 0.002828, "loss": 1.1586, "step": 433984 }, { "epoch": 32.573958724202626, "grad_norm": 0.6989884376525879, "learning_rate": 0.002828, "loss": 1.1526, "step": 434048 }, { "epoch": 32.5787617260788, "grad_norm": 0.6586000323295593, "learning_rate": 0.002828, "loss": 1.1515, "step": 434112 }, { "epoch": 32.58356472795497, "grad_norm": 0.591772198677063, "learning_rate": 0.002828, "loss": 1.1585, "step": 434176 }, { "epoch": 32.58836772983114, "grad_norm": 0.5981971025466919, "learning_rate": 0.002828, "loss": 1.1605, "step": 434240 }, { "epoch": 32.59317073170732, "grad_norm": 0.6120181679725647, "learning_rate": 0.002828, "loss": 1.1584, "step": 434304 }, { "epoch": 32.59797373358349, "grad_norm": 0.5320422649383545, "learning_rate": 0.002828, "loss": 1.1561, "step": 434368 }, { "epoch": 32.60277673545966, "grad_norm": 0.6150845885276794, "learning_rate": 0.002828, "loss": 1.1532, "step": 434432 }, { "epoch": 32.60757973733583, "grad_norm": 0.4970727860927582, "learning_rate": 0.002828, "loss": 1.1556, "step": 434496 }, { "epoch": 32.61238273921201, "grad_norm": 0.6972767114639282, "learning_rate": 0.002828, "loss": 1.1542, "step": 434560 }, { "epoch": 32.61718574108818, "grad_norm": 0.5277301669120789, "learning_rate": 0.002828, "loss": 1.1609, "step": 434624 }, { "epoch": 32.621988742964355, "grad_norm": 0.5520920157432556, "learning_rate": 0.002828, "loss": 1.155, "step": 434688 }, { "epoch": 32.626791744840524, "grad_norm": 0.645092785358429, "learning_rate": 0.002828, "loss": 1.1557, "step": 434752 }, { "epoch": 32.6315947467167, "grad_norm": 0.5922141075134277, "learning_rate": 0.002828, "loss": 1.1569, "step": 434816 }, { "epoch": 32.63639774859287, "grad_norm": 0.5581287741661072, "learning_rate": 0.002828, "loss": 1.1563, "step": 434880 }, { "epoch": 32.64120075046905, "grad_norm": 0.5025448799133301, "learning_rate": 0.002828, "loss": 1.1583, "step": 434944 }, { "epoch": 32.646003752345216, "grad_norm": 0.8015186786651611, "learning_rate": 0.002828, "loss": 1.1562, "step": 435008 }, { "epoch": 32.650806754221385, "grad_norm": 0.6109132170677185, "learning_rate": 0.002828, "loss": 1.1543, "step": 435072 }, { "epoch": 32.65560975609756, "grad_norm": 0.6086292266845703, "learning_rate": 0.002828, "loss": 1.1568, "step": 435136 }, { "epoch": 32.66041275797373, "grad_norm": 0.637120246887207, "learning_rate": 0.002828, "loss": 1.1556, "step": 435200 }, { "epoch": 32.66521575984991, "grad_norm": 0.6186965107917786, "learning_rate": 0.002828, "loss": 1.1586, "step": 435264 }, { "epoch": 32.67001876172608, "grad_norm": 0.7160683274269104, "learning_rate": 0.002828, "loss": 1.1552, "step": 435328 }, { "epoch": 32.67482176360225, "grad_norm": 0.5873770117759705, "learning_rate": 0.002828, "loss": 1.1558, "step": 435392 }, { "epoch": 32.67962476547842, "grad_norm": 0.6060091257095337, "learning_rate": 0.002828, "loss": 1.1648, "step": 435456 }, { "epoch": 32.6844277673546, "grad_norm": 0.682604193687439, "learning_rate": 0.002828, "loss": 1.163, "step": 435520 }, { "epoch": 32.68923076923077, "grad_norm": 0.5475385189056396, "learning_rate": 0.002828, "loss": 1.1619, "step": 435584 }, { "epoch": 32.694033771106945, "grad_norm": 0.6687578558921814, "learning_rate": 0.002828, "loss": 1.1584, "step": 435648 }, { "epoch": 32.698836772983114, "grad_norm": 0.5442762970924377, "learning_rate": 0.002828, "loss": 1.1594, "step": 435712 }, { "epoch": 32.70363977485929, "grad_norm": 0.6415442824363708, "learning_rate": 0.002828, "loss": 1.1621, "step": 435776 }, { "epoch": 32.70844277673546, "grad_norm": 0.5130046606063843, "learning_rate": 0.002828, "loss": 1.1586, "step": 435840 }, { "epoch": 32.71324577861163, "grad_norm": 0.610876202583313, "learning_rate": 0.002828, "loss": 1.1588, "step": 435904 }, { "epoch": 32.718048780487806, "grad_norm": 0.5902680158615112, "learning_rate": 0.002828, "loss": 1.1635, "step": 435968 }, { "epoch": 32.722851782363975, "grad_norm": 0.5602878332138062, "learning_rate": 0.002828, "loss": 1.1582, "step": 436032 }, { "epoch": 32.72765478424015, "grad_norm": 0.6569159626960754, "learning_rate": 0.002828, "loss": 1.1593, "step": 436096 }, { "epoch": 32.73245778611632, "grad_norm": 0.7306807637214661, "learning_rate": 0.002828, "loss": 1.1594, "step": 436160 }, { "epoch": 32.7372607879925, "grad_norm": 0.6899413466453552, "learning_rate": 0.002828, "loss": 1.1562, "step": 436224 }, { "epoch": 32.742063789868666, "grad_norm": 0.5341850519180298, "learning_rate": 0.002828, "loss": 1.1588, "step": 436288 }, { "epoch": 32.74686679174484, "grad_norm": 0.5809001326560974, "learning_rate": 0.002828, "loss": 1.1602, "step": 436352 }, { "epoch": 32.75166979362101, "grad_norm": 0.5650394558906555, "learning_rate": 0.002828, "loss": 1.158, "step": 436416 }, { "epoch": 32.75647279549719, "grad_norm": 0.5740302801132202, "learning_rate": 0.002828, "loss": 1.1595, "step": 436480 }, { "epoch": 32.76127579737336, "grad_norm": 0.57270747423172, "learning_rate": 0.002828, "loss": 1.1572, "step": 436544 }, { "epoch": 32.766078799249534, "grad_norm": 0.568086564540863, "learning_rate": 0.002828, "loss": 1.1609, "step": 436608 }, { "epoch": 32.770881801125704, "grad_norm": 0.6031185984611511, "learning_rate": 0.002828, "loss": 1.1559, "step": 436672 }, { "epoch": 32.77568480300187, "grad_norm": 0.6868975162506104, "learning_rate": 0.002828, "loss": 1.1593, "step": 436736 }, { "epoch": 32.78048780487805, "grad_norm": 0.6145572662353516, "learning_rate": 0.002828, "loss": 1.166, "step": 436800 }, { "epoch": 32.78529080675422, "grad_norm": 0.5906713604927063, "learning_rate": 0.002828, "loss": 1.1612, "step": 436864 }, { "epoch": 32.790093808630395, "grad_norm": 0.584947943687439, "learning_rate": 0.002828, "loss": 1.1581, "step": 436928 }, { "epoch": 32.794896810506565, "grad_norm": 0.6431328654289246, "learning_rate": 0.002828, "loss": 1.1649, "step": 436992 }, { "epoch": 32.79969981238274, "grad_norm": 0.5539917945861816, "learning_rate": 0.002828, "loss": 1.1606, "step": 437056 }, { "epoch": 32.80450281425891, "grad_norm": 0.4958864450454712, "learning_rate": 0.002828, "loss": 1.1635, "step": 437120 }, { "epoch": 32.80930581613509, "grad_norm": 0.6840506196022034, "learning_rate": 0.002828, "loss": 1.1619, "step": 437184 }, { "epoch": 32.814108818011256, "grad_norm": 0.6359443664550781, "learning_rate": 0.002828, "loss": 1.1603, "step": 437248 }, { "epoch": 32.81891181988743, "grad_norm": 0.578838050365448, "learning_rate": 0.002828, "loss": 1.1568, "step": 437312 }, { "epoch": 32.8237148217636, "grad_norm": 0.5920944809913635, "learning_rate": 0.002828, "loss": 1.1673, "step": 437376 }, { "epoch": 32.82851782363978, "grad_norm": 0.5457953214645386, "learning_rate": 0.002828, "loss": 1.1608, "step": 437440 }, { "epoch": 32.83332082551595, "grad_norm": 0.878715455532074, "learning_rate": 0.002828, "loss": 1.161, "step": 437504 }, { "epoch": 32.83812382739212, "grad_norm": 0.5791822671890259, "learning_rate": 0.002828, "loss": 1.1599, "step": 437568 }, { "epoch": 32.84292682926829, "grad_norm": 0.5815683603286743, "learning_rate": 0.002828, "loss": 1.1599, "step": 437632 }, { "epoch": 32.84772983114446, "grad_norm": 0.5945708751678467, "learning_rate": 0.002828, "loss": 1.1608, "step": 437696 }, { "epoch": 32.85253283302064, "grad_norm": 0.5922000408172607, "learning_rate": 0.002828, "loss": 1.1616, "step": 437760 }, { "epoch": 32.85733583489681, "grad_norm": 0.568234384059906, "learning_rate": 0.002828, "loss": 1.1621, "step": 437824 }, { "epoch": 32.862138836772985, "grad_norm": 0.7399768829345703, "learning_rate": 0.002828, "loss": 1.1614, "step": 437888 }, { "epoch": 32.866941838649154, "grad_norm": 0.4981638193130493, "learning_rate": 0.002828, "loss": 1.1622, "step": 437952 }, { "epoch": 32.87174484052533, "grad_norm": 0.5751581192016602, "learning_rate": 0.002828, "loss": 1.1604, "step": 438016 }, { "epoch": 32.8765478424015, "grad_norm": 0.5195406675338745, "learning_rate": 0.002828, "loss": 1.1633, "step": 438080 }, { "epoch": 32.88135084427768, "grad_norm": 0.49524155259132385, "learning_rate": 0.002828, "loss": 1.1667, "step": 438144 }, { "epoch": 32.886153846153846, "grad_norm": 0.684440553188324, "learning_rate": 0.002828, "loss": 1.16, "step": 438208 }, { "epoch": 32.890956848030015, "grad_norm": 0.6392771005630493, "learning_rate": 0.002828, "loss": 1.1637, "step": 438272 }, { "epoch": 32.89575984990619, "grad_norm": 0.6096252799034119, "learning_rate": 0.002828, "loss": 1.1601, "step": 438336 }, { "epoch": 32.90056285178236, "grad_norm": 0.6157074570655823, "learning_rate": 0.002828, "loss": 1.1674, "step": 438400 }, { "epoch": 32.90536585365854, "grad_norm": 0.5888133645057678, "learning_rate": 0.002828, "loss": 1.1647, "step": 438464 }, { "epoch": 32.91016885553471, "grad_norm": 0.7054190635681152, "learning_rate": 0.002828, "loss": 1.1604, "step": 438528 }, { "epoch": 32.91497185741088, "grad_norm": 0.6485116481781006, "learning_rate": 0.002828, "loss": 1.1671, "step": 438592 }, { "epoch": 32.91977485928705, "grad_norm": 0.5710200071334839, "learning_rate": 0.002828, "loss": 1.1681, "step": 438656 }, { "epoch": 32.92457786116323, "grad_norm": 0.6301112771034241, "learning_rate": 0.002828, "loss": 1.1568, "step": 438720 }, { "epoch": 32.9293808630394, "grad_norm": 0.5653761625289917, "learning_rate": 0.002828, "loss": 1.1616, "step": 438784 }, { "epoch": 32.934183864915575, "grad_norm": 0.6624953150749207, "learning_rate": 0.002828, "loss": 1.1616, "step": 438848 }, { "epoch": 32.938986866791744, "grad_norm": 0.5718125104904175, "learning_rate": 0.002828, "loss": 1.162, "step": 438912 }, { "epoch": 32.94378986866792, "grad_norm": 0.5164399147033691, "learning_rate": 0.002828, "loss": 1.1634, "step": 438976 }, { "epoch": 32.94859287054409, "grad_norm": 0.46567627787590027, "learning_rate": 0.002828, "loss": 1.1591, "step": 439040 }, { "epoch": 32.95339587242026, "grad_norm": 0.5681803226470947, "learning_rate": 0.002828, "loss": 1.1603, "step": 439104 }, { "epoch": 32.958198874296436, "grad_norm": 0.6604852080345154, "learning_rate": 0.002828, "loss": 1.1637, "step": 439168 }, { "epoch": 32.963001876172605, "grad_norm": 0.5455286502838135, "learning_rate": 0.002828, "loss": 1.1566, "step": 439232 }, { "epoch": 32.96780487804878, "grad_norm": 0.555708646774292, "learning_rate": 0.002828, "loss": 1.1659, "step": 439296 }, { "epoch": 32.97260787992495, "grad_norm": 0.698499321937561, "learning_rate": 0.002828, "loss": 1.16, "step": 439360 }, { "epoch": 32.97741088180113, "grad_norm": 0.5202065110206604, "learning_rate": 0.002828, "loss": 1.1676, "step": 439424 }, { "epoch": 32.9822138836773, "grad_norm": 0.5934895873069763, "learning_rate": 0.002828, "loss": 1.1661, "step": 439488 }, { "epoch": 32.98701688555347, "grad_norm": 0.5989176034927368, "learning_rate": 0.002828, "loss": 1.1609, "step": 439552 }, { "epoch": 32.99181988742964, "grad_norm": 0.5574471950531006, "learning_rate": 0.002828, "loss": 1.1624, "step": 439616 }, { "epoch": 32.99662288930582, "grad_norm": 0.6035043597221375, "learning_rate": 0.002828, "loss": 1.1581, "step": 439680 }, { "epoch": 33.00142589118199, "grad_norm": 0.6401668787002563, "learning_rate": 0.002828, "loss": 1.1551, "step": 439744 }, { "epoch": 33.006228893058164, "grad_norm": 0.6571485996246338, "learning_rate": 0.002828, "loss": 1.125, "step": 439808 }, { "epoch": 33.011031894934334, "grad_norm": 0.6424153447151184, "learning_rate": 0.002828, "loss": 1.1318, "step": 439872 }, { "epoch": 33.0158348968105, "grad_norm": 0.5356622338294983, "learning_rate": 0.002828, "loss": 1.1314, "step": 439936 }, { "epoch": 33.02063789868668, "grad_norm": 0.6436302065849304, "learning_rate": 0.002828, "loss": 1.1284, "step": 440000 }, { "epoch": 33.02544090056285, "grad_norm": 0.63233482837677, "learning_rate": 0.002828, "loss": 1.1314, "step": 440064 }, { "epoch": 33.030243902439025, "grad_norm": 0.5510907173156738, "learning_rate": 0.002828, "loss": 1.1284, "step": 440128 }, { "epoch": 33.035046904315195, "grad_norm": 0.590936541557312, "learning_rate": 0.002828, "loss": 1.1246, "step": 440192 }, { "epoch": 33.03984990619137, "grad_norm": 0.5466500520706177, "learning_rate": 0.002828, "loss": 1.1267, "step": 440256 }, { "epoch": 33.04465290806754, "grad_norm": 0.6178146004676819, "learning_rate": 0.002828, "loss": 1.1288, "step": 440320 }, { "epoch": 33.04945590994372, "grad_norm": 0.6533997058868408, "learning_rate": 0.002828, "loss": 1.1275, "step": 440384 }, { "epoch": 33.054258911819886, "grad_norm": 0.7475684285163879, "learning_rate": 0.002828, "loss": 1.132, "step": 440448 }, { "epoch": 33.05906191369606, "grad_norm": 0.6964446902275085, "learning_rate": 0.002828, "loss": 1.1329, "step": 440512 }, { "epoch": 33.06386491557223, "grad_norm": 0.6572120785713196, "learning_rate": 0.002828, "loss": 1.1313, "step": 440576 }, { "epoch": 33.06866791744841, "grad_norm": 0.6430488228797913, "learning_rate": 0.002828, "loss": 1.1312, "step": 440640 }, { "epoch": 33.07347091932458, "grad_norm": 0.5044344663619995, "learning_rate": 0.002828, "loss": 1.1287, "step": 440704 }, { "epoch": 33.07827392120075, "grad_norm": 0.7026501297950745, "learning_rate": 0.002828, "loss": 1.1306, "step": 440768 }, { "epoch": 33.08307692307692, "grad_norm": 0.5623020529747009, "learning_rate": 0.002828, "loss": 1.1241, "step": 440832 }, { "epoch": 33.08787992495309, "grad_norm": 0.7098539471626282, "learning_rate": 0.002828, "loss": 1.1364, "step": 440896 }, { "epoch": 33.09268292682927, "grad_norm": 0.6493317484855652, "learning_rate": 0.002828, "loss": 1.1341, "step": 440960 }, { "epoch": 33.09748592870544, "grad_norm": 0.4738115072250366, "learning_rate": 0.002828, "loss": 1.1348, "step": 441024 }, { "epoch": 33.102288930581615, "grad_norm": 0.6053628325462341, "learning_rate": 0.002828, "loss": 1.1364, "step": 441088 }, { "epoch": 33.107091932457784, "grad_norm": 0.6097379326820374, "learning_rate": 0.002828, "loss": 1.1236, "step": 441152 }, { "epoch": 33.11189493433396, "grad_norm": 0.650699257850647, "learning_rate": 0.002828, "loss": 1.1299, "step": 441216 }, { "epoch": 33.11669793621013, "grad_norm": 0.5362073183059692, "learning_rate": 0.002828, "loss": 1.1331, "step": 441280 }, { "epoch": 33.12150093808631, "grad_norm": 0.6099264621734619, "learning_rate": 0.002828, "loss": 1.1291, "step": 441344 }, { "epoch": 33.126303939962476, "grad_norm": 0.5848670601844788, "learning_rate": 0.002828, "loss": 1.1305, "step": 441408 }, { "epoch": 33.13110694183865, "grad_norm": 0.668308675289154, "learning_rate": 0.002828, "loss": 1.1348, "step": 441472 }, { "epoch": 33.13590994371482, "grad_norm": 0.6490448117256165, "learning_rate": 0.002828, "loss": 1.1398, "step": 441536 }, { "epoch": 33.14071294559099, "grad_norm": 0.5751400589942932, "learning_rate": 0.002828, "loss": 1.1296, "step": 441600 }, { "epoch": 33.14551594746717, "grad_norm": 0.6624541282653809, "learning_rate": 0.002828, "loss": 1.1303, "step": 441664 }, { "epoch": 33.15031894934334, "grad_norm": 0.5531780123710632, "learning_rate": 0.002828, "loss": 1.1377, "step": 441728 }, { "epoch": 33.15512195121951, "grad_norm": 0.5842620730400085, "learning_rate": 0.002828, "loss": 1.1351, "step": 441792 }, { "epoch": 33.15992495309568, "grad_norm": 0.6528531908988953, "learning_rate": 0.002828, "loss": 1.1372, "step": 441856 }, { "epoch": 33.16472795497186, "grad_norm": 0.6758348941802979, "learning_rate": 0.002828, "loss": 1.1381, "step": 441920 }, { "epoch": 33.16953095684803, "grad_norm": 0.7188329100608826, "learning_rate": 0.002828, "loss": 1.1354, "step": 441984 }, { "epoch": 33.174333958724205, "grad_norm": 0.8271748423576355, "learning_rate": 0.002828, "loss": 1.1332, "step": 442048 }, { "epoch": 33.179136960600374, "grad_norm": 0.6577697992324829, "learning_rate": 0.002828, "loss": 1.136, "step": 442112 }, { "epoch": 33.18393996247655, "grad_norm": 0.5746137499809265, "learning_rate": 0.002828, "loss": 1.1386, "step": 442176 }, { "epoch": 33.18874296435272, "grad_norm": 0.6213378310203552, "learning_rate": 0.002828, "loss": 1.1384, "step": 442240 }, { "epoch": 33.193545966228896, "grad_norm": 0.5850825309753418, "learning_rate": 0.002828, "loss": 1.1349, "step": 442304 }, { "epoch": 33.198348968105066, "grad_norm": 0.525640606880188, "learning_rate": 0.002828, "loss": 1.1373, "step": 442368 }, { "epoch": 33.203151969981235, "grad_norm": 0.575154721736908, "learning_rate": 0.002828, "loss": 1.1338, "step": 442432 }, { "epoch": 33.20795497185741, "grad_norm": 0.5524837374687195, "learning_rate": 0.002828, "loss": 1.1409, "step": 442496 }, { "epoch": 33.21275797373358, "grad_norm": 0.568781852722168, "learning_rate": 0.002828, "loss": 1.1369, "step": 442560 }, { "epoch": 33.21756097560976, "grad_norm": 0.7200085520744324, "learning_rate": 0.002828, "loss": 1.1332, "step": 442624 }, { "epoch": 33.22236397748593, "grad_norm": 0.564530611038208, "learning_rate": 0.002828, "loss": 1.142, "step": 442688 }, { "epoch": 33.2271669793621, "grad_norm": 0.7461503744125366, "learning_rate": 0.002828, "loss": 1.1355, "step": 442752 }, { "epoch": 33.23196998123827, "grad_norm": 0.564323365688324, "learning_rate": 0.002828, "loss": 1.1374, "step": 442816 }, { "epoch": 33.23677298311445, "grad_norm": 0.595663845539093, "learning_rate": 0.002828, "loss": 1.1365, "step": 442880 }, { "epoch": 33.24157598499062, "grad_norm": 0.7581208348274231, "learning_rate": 0.002828, "loss": 1.1421, "step": 442944 }, { "epoch": 33.246378986866794, "grad_norm": 0.581870973110199, "learning_rate": 0.002828, "loss": 1.1392, "step": 443008 }, { "epoch": 33.251181988742964, "grad_norm": 0.7423933148384094, "learning_rate": 0.002828, "loss": 1.1411, "step": 443072 }, { "epoch": 33.25598499061914, "grad_norm": 0.496860146522522, "learning_rate": 0.002828, "loss": 1.1322, "step": 443136 }, { "epoch": 33.26078799249531, "grad_norm": 0.5430870056152344, "learning_rate": 0.002828, "loss": 1.1357, "step": 443200 }, { "epoch": 33.26559099437148, "grad_norm": 0.5885310769081116, "learning_rate": 0.002828, "loss": 1.1435, "step": 443264 }, { "epoch": 33.270393996247655, "grad_norm": 0.5474235415458679, "learning_rate": 0.002828, "loss": 1.1409, "step": 443328 }, { "epoch": 33.275196998123825, "grad_norm": 0.5819973349571228, "learning_rate": 0.002828, "loss": 1.1409, "step": 443392 }, { "epoch": 33.28, "grad_norm": 0.5733329653739929, "learning_rate": 0.002828, "loss": 1.1401, "step": 443456 }, { "epoch": 33.28480300187617, "grad_norm": 0.5910017490386963, "learning_rate": 0.002828, "loss": 1.1366, "step": 443520 }, { "epoch": 33.28960600375235, "grad_norm": 0.6527971029281616, "learning_rate": 0.002828, "loss": 1.1372, "step": 443584 }, { "epoch": 33.294409005628516, "grad_norm": 0.5934697389602661, "learning_rate": 0.002828, "loss": 1.1426, "step": 443648 }, { "epoch": 33.29921200750469, "grad_norm": 0.6255312561988831, "learning_rate": 0.002828, "loss": 1.1405, "step": 443712 }, { "epoch": 33.30401500938086, "grad_norm": 0.657773494720459, "learning_rate": 0.002828, "loss": 1.1419, "step": 443776 }, { "epoch": 33.30881801125704, "grad_norm": 0.6253570914268494, "learning_rate": 0.002828, "loss": 1.1394, "step": 443840 }, { "epoch": 33.31362101313321, "grad_norm": 0.7471127510070801, "learning_rate": 0.002828, "loss": 1.1382, "step": 443904 }, { "epoch": 33.318424015009384, "grad_norm": 0.6400924921035767, "learning_rate": 0.002828, "loss": 1.1447, "step": 443968 }, { "epoch": 33.323227016885554, "grad_norm": 0.5876902937889099, "learning_rate": 0.002828, "loss": 1.137, "step": 444032 }, { "epoch": 33.32803001876172, "grad_norm": 0.566333532333374, "learning_rate": 0.002828, "loss": 1.139, "step": 444096 }, { "epoch": 33.3328330206379, "grad_norm": 0.4966079294681549, "learning_rate": 0.002828, "loss": 1.1432, "step": 444160 }, { "epoch": 33.33763602251407, "grad_norm": 0.5877018570899963, "learning_rate": 0.002828, "loss": 1.1398, "step": 444224 }, { "epoch": 33.342439024390245, "grad_norm": 0.5185602307319641, "learning_rate": 0.002828, "loss": 1.139, "step": 444288 }, { "epoch": 33.347242026266414, "grad_norm": 0.8135582804679871, "learning_rate": 0.002828, "loss": 1.1387, "step": 444352 }, { "epoch": 33.35204502814259, "grad_norm": 0.6387743353843689, "learning_rate": 0.002828, "loss": 1.1381, "step": 444416 }, { "epoch": 33.35684803001876, "grad_norm": 0.5668075084686279, "learning_rate": 0.002828, "loss": 1.1435, "step": 444480 }, { "epoch": 33.36165103189494, "grad_norm": 0.7903865575790405, "learning_rate": 0.002828, "loss": 1.14, "step": 444544 }, { "epoch": 33.366454033771106, "grad_norm": 0.6860894560813904, "learning_rate": 0.002828, "loss": 1.1449, "step": 444608 }, { "epoch": 33.37125703564728, "grad_norm": 0.6163083910942078, "learning_rate": 0.002828, "loss": 1.1408, "step": 444672 }, { "epoch": 33.37606003752345, "grad_norm": 0.5252005457878113, "learning_rate": 0.002828, "loss": 1.1445, "step": 444736 }, { "epoch": 33.38086303939963, "grad_norm": 0.6299996972084045, "learning_rate": 0.002828, "loss": 1.1385, "step": 444800 }, { "epoch": 33.3856660412758, "grad_norm": 0.6038787961006165, "learning_rate": 0.002828, "loss": 1.1419, "step": 444864 }, { "epoch": 33.39046904315197, "grad_norm": 0.6690816879272461, "learning_rate": 0.002828, "loss": 1.1429, "step": 444928 }, { "epoch": 33.39527204502814, "grad_norm": 0.4716529846191406, "learning_rate": 0.002828, "loss": 1.1408, "step": 444992 }, { "epoch": 33.40007504690431, "grad_norm": 0.5922883749008179, "learning_rate": 0.002828, "loss": 1.1458, "step": 445056 }, { "epoch": 33.40487804878049, "grad_norm": 0.6195250153541565, "learning_rate": 0.002828, "loss": 1.1429, "step": 445120 }, { "epoch": 33.40968105065666, "grad_norm": 0.6857700943946838, "learning_rate": 0.002828, "loss": 1.1406, "step": 445184 }, { "epoch": 33.414484052532835, "grad_norm": 0.5511137843132019, "learning_rate": 0.002828, "loss": 1.1419, "step": 445248 }, { "epoch": 33.419287054409004, "grad_norm": 0.6851285099983215, "learning_rate": 0.002828, "loss": 1.1423, "step": 445312 }, { "epoch": 33.42409005628518, "grad_norm": 0.7443287372589111, "learning_rate": 0.002828, "loss": 1.1462, "step": 445376 }, { "epoch": 33.42889305816135, "grad_norm": 0.5800443887710571, "learning_rate": 0.002828, "loss": 1.1457, "step": 445440 }, { "epoch": 33.433696060037526, "grad_norm": 0.5672863125801086, "learning_rate": 0.002828, "loss": 1.1464, "step": 445504 }, { "epoch": 33.438499061913696, "grad_norm": 0.5842262506484985, "learning_rate": 0.002828, "loss": 1.1483, "step": 445568 }, { "epoch": 33.44330206378987, "grad_norm": 0.5936047434806824, "learning_rate": 0.002828, "loss": 1.1433, "step": 445632 }, { "epoch": 33.44810506566604, "grad_norm": 0.6675282120704651, "learning_rate": 0.002828, "loss": 1.1482, "step": 445696 }, { "epoch": 33.45290806754221, "grad_norm": 0.6432201266288757, "learning_rate": 0.002828, "loss": 1.1464, "step": 445760 }, { "epoch": 33.45771106941839, "grad_norm": 0.5785965323448181, "learning_rate": 0.002828, "loss": 1.1421, "step": 445824 }, { "epoch": 33.46251407129456, "grad_norm": 0.6529351472854614, "learning_rate": 0.002828, "loss": 1.1452, "step": 445888 }, { "epoch": 33.46731707317073, "grad_norm": 0.6261816620826721, "learning_rate": 0.002828, "loss": 1.1392, "step": 445952 }, { "epoch": 33.4721200750469, "grad_norm": 0.6078612804412842, "learning_rate": 0.002828, "loss": 1.1529, "step": 446016 }, { "epoch": 33.47692307692308, "grad_norm": 0.7155763506889343, "learning_rate": 0.002828, "loss": 1.1392, "step": 446080 }, { "epoch": 33.48172607879925, "grad_norm": 0.6430063843727112, "learning_rate": 0.002828, "loss": 1.1464, "step": 446144 }, { "epoch": 33.486529080675425, "grad_norm": 0.5534988641738892, "learning_rate": 0.002828, "loss": 1.1417, "step": 446208 }, { "epoch": 33.491332082551594, "grad_norm": 0.5888988971710205, "learning_rate": 0.002828, "loss": 1.1525, "step": 446272 }, { "epoch": 33.49613508442777, "grad_norm": 0.5352635383605957, "learning_rate": 0.002828, "loss": 1.145, "step": 446336 }, { "epoch": 33.50093808630394, "grad_norm": 0.4819788336753845, "learning_rate": 0.002828, "loss": 1.1435, "step": 446400 }, { "epoch": 33.505741088180116, "grad_norm": 0.6127516627311707, "learning_rate": 0.002828, "loss": 1.1517, "step": 446464 }, { "epoch": 33.510544090056285, "grad_norm": 0.6050450801849365, "learning_rate": 0.002828, "loss": 1.1435, "step": 446528 }, { "epoch": 33.515347091932455, "grad_norm": 0.4720316231250763, "learning_rate": 0.002828, "loss": 1.1456, "step": 446592 }, { "epoch": 33.52015009380863, "grad_norm": 0.5383008718490601, "learning_rate": 0.002828, "loss": 1.1454, "step": 446656 }, { "epoch": 33.5249530956848, "grad_norm": 0.6708769798278809, "learning_rate": 0.002828, "loss": 1.1396, "step": 446720 }, { "epoch": 33.52975609756098, "grad_norm": 0.5709279775619507, "learning_rate": 0.002828, "loss": 1.1475, "step": 446784 }, { "epoch": 33.534559099437146, "grad_norm": 0.6338633298873901, "learning_rate": 0.002828, "loss": 1.1509, "step": 446848 }, { "epoch": 33.53936210131332, "grad_norm": 0.6444924473762512, "learning_rate": 0.002828, "loss": 1.1445, "step": 446912 }, { "epoch": 33.54416510318949, "grad_norm": 0.5632554888725281, "learning_rate": 0.002828, "loss": 1.1482, "step": 446976 }, { "epoch": 33.54896810506567, "grad_norm": 0.578456461429596, "learning_rate": 0.002828, "loss": 1.1448, "step": 447040 }, { "epoch": 33.55377110694184, "grad_norm": 0.6251580715179443, "learning_rate": 0.002828, "loss": 1.1452, "step": 447104 }, { "epoch": 33.558574108818014, "grad_norm": 0.6657046675682068, "learning_rate": 0.002828, "loss": 1.1459, "step": 447168 }, { "epoch": 33.563377110694184, "grad_norm": 0.7276448011398315, "learning_rate": 0.002828, "loss": 1.1472, "step": 447232 }, { "epoch": 33.56818011257036, "grad_norm": 0.612055778503418, "learning_rate": 0.002828, "loss": 1.1442, "step": 447296 }, { "epoch": 33.57298311444653, "grad_norm": 0.5227575898170471, "learning_rate": 0.002828, "loss": 1.1482, "step": 447360 }, { "epoch": 33.5777861163227, "grad_norm": 0.6529867053031921, "learning_rate": 0.002828, "loss": 1.144, "step": 447424 }, { "epoch": 33.582589118198875, "grad_norm": 0.5356407165527344, "learning_rate": 0.002828, "loss": 1.1446, "step": 447488 }, { "epoch": 33.587392120075044, "grad_norm": 0.5991299748420715, "learning_rate": 0.002828, "loss": 1.1451, "step": 447552 }, { "epoch": 33.59219512195122, "grad_norm": 0.6475511193275452, "learning_rate": 0.002828, "loss": 1.1528, "step": 447616 }, { "epoch": 33.59699812382739, "grad_norm": 0.6805687546730042, "learning_rate": 0.002828, "loss": 1.1452, "step": 447680 }, { "epoch": 33.60180112570357, "grad_norm": 0.5567482709884644, "learning_rate": 0.002828, "loss": 1.151, "step": 447744 }, { "epoch": 33.606604127579736, "grad_norm": 0.7172110080718994, "learning_rate": 0.002828, "loss": 1.1463, "step": 447808 }, { "epoch": 33.61140712945591, "grad_norm": 0.6430109739303589, "learning_rate": 0.002828, "loss": 1.1466, "step": 447872 }, { "epoch": 33.61621013133208, "grad_norm": 0.525716245174408, "learning_rate": 0.002828, "loss": 1.1508, "step": 447936 }, { "epoch": 33.62101313320826, "grad_norm": 0.6650141477584839, "learning_rate": 0.002828, "loss": 1.1528, "step": 448000 }, { "epoch": 33.62581613508443, "grad_norm": 0.5755172371864319, "learning_rate": 0.002828, "loss": 1.1478, "step": 448064 }, { "epoch": 33.6306191369606, "grad_norm": 0.6707451343536377, "learning_rate": 0.002828, "loss": 1.1486, "step": 448128 }, { "epoch": 33.63542213883677, "grad_norm": 0.5326623916625977, "learning_rate": 0.002828, "loss": 1.1545, "step": 448192 }, { "epoch": 33.64022514071294, "grad_norm": 0.7620773911476135, "learning_rate": 0.002828, "loss": 1.1525, "step": 448256 }, { "epoch": 33.64502814258912, "grad_norm": 0.7047601342201233, "learning_rate": 0.002828, "loss": 1.1534, "step": 448320 }, { "epoch": 33.64983114446529, "grad_norm": 0.6242048740386963, "learning_rate": 0.002828, "loss": 1.1518, "step": 448384 }, { "epoch": 33.654634146341465, "grad_norm": 0.6024118065834045, "learning_rate": 0.002828, "loss": 1.1533, "step": 448448 }, { "epoch": 33.659437148217634, "grad_norm": 0.593969464302063, "learning_rate": 0.002828, "loss": 1.1434, "step": 448512 }, { "epoch": 33.66424015009381, "grad_norm": 0.6917033195495605, "learning_rate": 0.002828, "loss": 1.1489, "step": 448576 }, { "epoch": 33.66904315196998, "grad_norm": 0.6661319136619568, "learning_rate": 0.002828, "loss": 1.1468, "step": 448640 }, { "epoch": 33.673846153846156, "grad_norm": 0.5408964157104492, "learning_rate": 0.002828, "loss": 1.1471, "step": 448704 }, { "epoch": 33.678649155722326, "grad_norm": 0.7085400819778442, "learning_rate": 0.002828, "loss": 1.1569, "step": 448768 }, { "epoch": 33.6834521575985, "grad_norm": 0.6100766658782959, "learning_rate": 0.002828, "loss": 1.1529, "step": 448832 }, { "epoch": 33.68825515947467, "grad_norm": 0.7409677505493164, "learning_rate": 0.002828, "loss": 1.1514, "step": 448896 }, { "epoch": 33.69305816135084, "grad_norm": 0.5764803886413574, "learning_rate": 0.002828, "loss": 1.1519, "step": 448960 }, { "epoch": 33.69786116322702, "grad_norm": 0.6267707943916321, "learning_rate": 0.002828, "loss": 1.1465, "step": 449024 }, { "epoch": 33.70266416510319, "grad_norm": 0.6436747312545776, "learning_rate": 0.002828, "loss": 1.154, "step": 449088 }, { "epoch": 33.70746716697936, "grad_norm": 0.5536535382270813, "learning_rate": 0.002828, "loss": 1.1471, "step": 449152 }, { "epoch": 33.71227016885553, "grad_norm": 0.554177463054657, "learning_rate": 0.002828, "loss": 1.1482, "step": 449216 }, { "epoch": 33.71707317073171, "grad_norm": 0.5661305785179138, "learning_rate": 0.002828, "loss": 1.1524, "step": 449280 }, { "epoch": 33.72187617260788, "grad_norm": 0.6556863188743591, "learning_rate": 0.002828, "loss": 1.1497, "step": 449344 }, { "epoch": 33.726679174484055, "grad_norm": 0.6361242532730103, "learning_rate": 0.002828, "loss": 1.1536, "step": 449408 }, { "epoch": 33.731482176360224, "grad_norm": 0.5205981731414795, "learning_rate": 0.002828, "loss": 1.1532, "step": 449472 }, { "epoch": 33.7362851782364, "grad_norm": 0.6734727025032043, "learning_rate": 0.002828, "loss": 1.1533, "step": 449536 }, { "epoch": 33.74108818011257, "grad_norm": 0.6392199397087097, "learning_rate": 0.002828, "loss": 1.1498, "step": 449600 }, { "epoch": 33.745891181988746, "grad_norm": 0.6905595660209656, "learning_rate": 0.002828, "loss": 1.147, "step": 449664 }, { "epoch": 33.750694183864915, "grad_norm": 0.6617002487182617, "learning_rate": 0.002828, "loss": 1.1502, "step": 449728 }, { "epoch": 33.755497185741085, "grad_norm": 0.5721489191055298, "learning_rate": 0.002828, "loss": 1.152, "step": 449792 }, { "epoch": 33.76030018761726, "grad_norm": 0.6114367842674255, "learning_rate": 0.002828, "loss": 1.1549, "step": 449856 }, { "epoch": 33.76510318949343, "grad_norm": 0.5258558988571167, "learning_rate": 0.002828, "loss": 1.1524, "step": 449920 }, { "epoch": 33.76990619136961, "grad_norm": 0.5574025511741638, "learning_rate": 0.002828, "loss": 1.1475, "step": 449984 }, { "epoch": 33.774709193245776, "grad_norm": 0.544082760810852, "learning_rate": 0.002828, "loss": 1.1556, "step": 450048 }, { "epoch": 33.77951219512195, "grad_norm": 0.6477503776550293, "learning_rate": 0.002828, "loss": 1.1496, "step": 450112 }, { "epoch": 33.78431519699812, "grad_norm": 0.5774228572845459, "learning_rate": 0.002828, "loss": 1.1572, "step": 450176 }, { "epoch": 33.7891181988743, "grad_norm": 0.633441686630249, "learning_rate": 0.002828, "loss": 1.1545, "step": 450240 }, { "epoch": 33.79392120075047, "grad_norm": 0.7636280655860901, "learning_rate": 0.002828, "loss": 1.1517, "step": 450304 }, { "epoch": 33.798724202626644, "grad_norm": 0.5494087934494019, "learning_rate": 0.002828, "loss": 1.1514, "step": 450368 }, { "epoch": 33.803527204502814, "grad_norm": 0.654360294342041, "learning_rate": 0.002828, "loss": 1.1582, "step": 450432 }, { "epoch": 33.80833020637899, "grad_norm": 0.6439893245697021, "learning_rate": 0.002828, "loss": 1.156, "step": 450496 }, { "epoch": 33.81313320825516, "grad_norm": 0.6434905529022217, "learning_rate": 0.002828, "loss": 1.1461, "step": 450560 }, { "epoch": 33.81793621013133, "grad_norm": 0.5695158839225769, "learning_rate": 0.002828, "loss": 1.1493, "step": 450624 }, { "epoch": 33.822739212007505, "grad_norm": 0.6693202257156372, "learning_rate": 0.002828, "loss": 1.1543, "step": 450688 }, { "epoch": 33.827542213883675, "grad_norm": 0.5362828969955444, "learning_rate": 0.002828, "loss": 1.1537, "step": 450752 }, { "epoch": 33.83234521575985, "grad_norm": 0.5842569470405579, "learning_rate": 0.002828, "loss": 1.1573, "step": 450816 }, { "epoch": 33.83714821763602, "grad_norm": 0.8023243546485901, "learning_rate": 0.002828, "loss": 1.1573, "step": 450880 }, { "epoch": 33.8419512195122, "grad_norm": 0.6493667364120483, "learning_rate": 0.002828, "loss": 1.1456, "step": 450944 }, { "epoch": 33.846754221388366, "grad_norm": 0.6712051630020142, "learning_rate": 0.002828, "loss": 1.1491, "step": 451008 }, { "epoch": 33.85155722326454, "grad_norm": 0.7749685645103455, "learning_rate": 0.002828, "loss": 1.1523, "step": 451072 }, { "epoch": 33.85636022514071, "grad_norm": 0.5475408434867859, "learning_rate": 0.002828, "loss": 1.1497, "step": 451136 }, { "epoch": 33.86116322701689, "grad_norm": 0.570763885974884, "learning_rate": 0.002828, "loss": 1.1498, "step": 451200 }, { "epoch": 33.86596622889306, "grad_norm": 0.6396635174751282, "learning_rate": 0.002828, "loss": 1.1529, "step": 451264 }, { "epoch": 33.870769230769234, "grad_norm": 0.6525161862373352, "learning_rate": 0.002828, "loss": 1.1569, "step": 451328 }, { "epoch": 33.8755722326454, "grad_norm": 0.6440078020095825, "learning_rate": 0.002828, "loss": 1.1531, "step": 451392 }, { "epoch": 33.88037523452157, "grad_norm": 0.557655930519104, "learning_rate": 0.002828, "loss": 1.1514, "step": 451456 }, { "epoch": 33.88517823639775, "grad_norm": 0.6325395107269287, "learning_rate": 0.002828, "loss": 1.1484, "step": 451520 }, { "epoch": 33.88998123827392, "grad_norm": 0.6961609721183777, "learning_rate": 0.002828, "loss": 1.1562, "step": 451584 }, { "epoch": 33.894784240150095, "grad_norm": 0.6224822402000427, "learning_rate": 0.002828, "loss": 1.1533, "step": 451648 }, { "epoch": 33.899587242026264, "grad_norm": 0.5128984451293945, "learning_rate": 0.002828, "loss": 1.1521, "step": 451712 }, { "epoch": 33.90439024390244, "grad_norm": 0.5620936155319214, "learning_rate": 0.002828, "loss": 1.7302, "step": 451776 }, { "epoch": 33.90919324577861, "grad_norm": 0.5801410675048828, "learning_rate": 0.002828, "loss": 3.5526, "step": 451840 }, { "epoch": 33.91399624765479, "grad_norm": 0.5933526158332825, "learning_rate": 0.002828, "loss": 3.1783, "step": 451904 }, { "epoch": 33.918799249530956, "grad_norm": 0.6869750618934631, "learning_rate": 0.002828, "loss": 3.4, "step": 451968 }, { "epoch": 33.92360225140713, "grad_norm": 0.5913759469985962, "learning_rate": 0.002828, "loss": 3.3694, "step": 452032 }, { "epoch": 33.9284052532833, "grad_norm": 0.5191683173179626, "learning_rate": 0.002828, "loss": 3.4183, "step": 452096 }, { "epoch": 33.93320825515948, "grad_norm": 0.5402680039405823, "learning_rate": 0.002828, "loss": 3.5793, "step": 452160 }, { "epoch": 33.93801125703565, "grad_norm": 0.5815765857696533, "learning_rate": 0.002828, "loss": 3.2655, "step": 452224 }, { "epoch": 33.94281425891182, "grad_norm": 0.5683310627937317, "learning_rate": 0.002828, "loss": 3.0861, "step": 452288 }, { "epoch": 33.94761726078799, "grad_norm": 0.6940186619758606, "learning_rate": 0.002828, "loss": 2.9187, "step": 452352 }, { "epoch": 33.95242026266416, "grad_norm": 0.5648021697998047, "learning_rate": 0.002828, "loss": 2.7537, "step": 452416 }, { "epoch": 33.95722326454034, "grad_norm": 0.6573771834373474, "learning_rate": 0.002828, "loss": 2.71, "step": 452480 }, { "epoch": 33.96202626641651, "grad_norm": 0.645450234413147, "learning_rate": 0.002828, "loss": 2.6809, "step": 452544 }, { "epoch": 33.966829268292685, "grad_norm": 0.5694723725318909, "learning_rate": 0.002828, "loss": 2.8897, "step": 452608 }, { "epoch": 33.971632270168854, "grad_norm": 0.6184071898460388, "learning_rate": 0.002828, "loss": 2.9641, "step": 452672 }, { "epoch": 33.97643527204503, "grad_norm": 0.7220840454101562, "learning_rate": 0.002828, "loss": 2.8128, "step": 452736 }, { "epoch": 33.9812382739212, "grad_norm": 0.7192930579185486, "learning_rate": 0.002828, "loss": 2.7682, "step": 452800 }, { "epoch": 33.986041275797376, "grad_norm": 0.7508559226989746, "learning_rate": 0.002828, "loss": 2.768, "step": 452864 }, { "epoch": 33.990844277673546, "grad_norm": 0.5012671947479248, "learning_rate": 0.002828, "loss": 2.6946, "step": 452928 }, { "epoch": 33.99564727954972, "grad_norm": 0.5871996283531189, "learning_rate": 0.002828, "loss": 2.6686, "step": 452992 }, { "epoch": 34.00045028142589, "grad_norm": 0.6014786958694458, "learning_rate": 0.002828, "loss": 2.6525, "step": 453056 }, { "epoch": 34.00525328330206, "grad_norm": 0.6529600024223328, "learning_rate": 0.002828, "loss": 2.618, "step": 453120 }, { "epoch": 34.01005628517824, "grad_norm": 0.6394699215888977, "learning_rate": 0.002828, "loss": 2.6139, "step": 453184 }, { "epoch": 34.014859287054406, "grad_norm": 0.6919693350791931, "learning_rate": 0.002828, "loss": 2.5977, "step": 453248 }, { "epoch": 34.01966228893058, "grad_norm": 0.5869269967079163, "learning_rate": 0.002828, "loss": 2.579, "step": 453312 }, { "epoch": 34.02446529080675, "grad_norm": 0.6233799457550049, "learning_rate": 0.002828, "loss": 2.575, "step": 453376 }, { "epoch": 34.02926829268293, "grad_norm": 0.7056044340133667, "learning_rate": 0.002828, "loss": 2.568, "step": 453440 }, { "epoch": 34.0340712945591, "grad_norm": 0.6083732843399048, "learning_rate": 0.002828, "loss": 2.5443, "step": 453504 }, { "epoch": 34.038874296435274, "grad_norm": 0.6978457570075989, "learning_rate": 0.002828, "loss": 2.5126, "step": 453568 }, { "epoch": 34.043677298311444, "grad_norm": 0.6388948559761047, "learning_rate": 0.002828, "loss": 2.5081, "step": 453632 }, { "epoch": 34.04848030018762, "grad_norm": 0.5888407230377197, "learning_rate": 0.002828, "loss": 2.4963, "step": 453696 }, { "epoch": 34.05328330206379, "grad_norm": 0.5932326912879944, "learning_rate": 0.002828, "loss": 2.4559, "step": 453760 }, { "epoch": 34.058086303939966, "grad_norm": 0.7458741664886475, "learning_rate": 0.002828, "loss": 2.4359, "step": 453824 }, { "epoch": 34.062889305816135, "grad_norm": 0.5495181679725647, "learning_rate": 0.002828, "loss": 2.3964, "step": 453888 }, { "epoch": 34.067692307692305, "grad_norm": 0.5372458696365356, "learning_rate": 0.002828, "loss": 2.4007, "step": 453952 }, { "epoch": 34.07249530956848, "grad_norm": 0.525921106338501, "learning_rate": 0.002828, "loss": 2.3913, "step": 454016 }, { "epoch": 34.07729831144465, "grad_norm": 0.6002963781356812, "learning_rate": 0.002828, "loss": 2.2537, "step": 454080 }, { "epoch": 34.08210131332083, "grad_norm": 0.6559569239616394, "learning_rate": 0.002828, "loss": 2.1257, "step": 454144 }, { "epoch": 34.086904315196996, "grad_norm": 0.6380961537361145, "learning_rate": 0.002828, "loss": 2.0642, "step": 454208 }, { "epoch": 34.09170731707317, "grad_norm": 0.6877055168151855, "learning_rate": 0.002828, "loss": 2.0349, "step": 454272 }, { "epoch": 34.09651031894934, "grad_norm": 0.6221262216567993, "learning_rate": 0.002828, "loss": 1.9857, "step": 454336 }, { "epoch": 34.10131332082552, "grad_norm": 0.7388865351676941, "learning_rate": 0.002828, "loss": 1.9721, "step": 454400 }, { "epoch": 34.10611632270169, "grad_norm": 0.5388085246086121, "learning_rate": 0.002828, "loss": 1.9403, "step": 454464 }, { "epoch": 34.110919324577864, "grad_norm": 0.4879409074783325, "learning_rate": 0.002828, "loss": 1.9614, "step": 454528 }, { "epoch": 34.11572232645403, "grad_norm": 0.5843081474304199, "learning_rate": 0.002828, "loss": 1.9817, "step": 454592 }, { "epoch": 34.12052532833021, "grad_norm": 0.671530544757843, "learning_rate": 0.002828, "loss": 2.1895, "step": 454656 }, { "epoch": 34.12532833020638, "grad_norm": 0.6688728928565979, "learning_rate": 0.002828, "loss": 2.1792, "step": 454720 }, { "epoch": 34.13013133208255, "grad_norm": 0.524191677570343, "learning_rate": 0.002828, "loss": 2.1088, "step": 454784 }, { "epoch": 34.134934333958725, "grad_norm": 0.5124364495277405, "learning_rate": 0.002828, "loss": 2.0359, "step": 454848 }, { "epoch": 34.139737335834894, "grad_norm": 0.5780380964279175, "learning_rate": 0.002828, "loss": 1.9583, "step": 454912 }, { "epoch": 34.14454033771107, "grad_norm": 0.5620104074478149, "learning_rate": 0.002828, "loss": 1.9113, "step": 454976 }, { "epoch": 34.14934333958724, "grad_norm": 0.7543662786483765, "learning_rate": 0.002828, "loss": 1.9022, "step": 455040 }, { "epoch": 34.15414634146342, "grad_norm": 0.6978630423545837, "learning_rate": 0.002828, "loss": 1.8686, "step": 455104 }, { "epoch": 34.158949343339586, "grad_norm": 0.6158016920089722, "learning_rate": 0.002828, "loss": 1.833, "step": 455168 }, { "epoch": 34.16375234521576, "grad_norm": 0.5737127065658569, "learning_rate": 0.002828, "loss": 1.8432, "step": 455232 }, { "epoch": 34.16855534709193, "grad_norm": 0.5505557060241699, "learning_rate": 0.002828, "loss": 1.7996, "step": 455296 }, { "epoch": 34.17335834896811, "grad_norm": 0.7005407214164734, "learning_rate": 0.002828, "loss": 1.7848, "step": 455360 }, { "epoch": 34.17816135084428, "grad_norm": 0.6658907532691956, "learning_rate": 0.002828, "loss": 1.7505, "step": 455424 }, { "epoch": 34.18296435272045, "grad_norm": 0.6440961360931396, "learning_rate": 0.002828, "loss": 1.7453, "step": 455488 }, { "epoch": 34.18776735459662, "grad_norm": 0.5199069380760193, "learning_rate": 0.002828, "loss": 1.721, "step": 455552 }, { "epoch": 34.19257035647279, "grad_norm": 0.6788700222969055, "learning_rate": 0.002828, "loss": 1.7196, "step": 455616 }, { "epoch": 34.19737335834897, "grad_norm": 0.6521267294883728, "learning_rate": 0.002828, "loss": 1.676, "step": 455680 }, { "epoch": 34.20217636022514, "grad_norm": 0.5314558148384094, "learning_rate": 0.002828, "loss": 1.678, "step": 455744 }, { "epoch": 34.206979362101315, "grad_norm": 0.8381668925285339, "learning_rate": 0.002828, "loss": 1.6474, "step": 455808 }, { "epoch": 34.211782363977484, "grad_norm": 0.6929973363876343, "learning_rate": 0.002828, "loss": 1.6476, "step": 455872 }, { "epoch": 34.21658536585366, "grad_norm": 0.594032347202301, "learning_rate": 0.002828, "loss": 1.6283, "step": 455936 }, { "epoch": 34.22138836772983, "grad_norm": 0.7464234828948975, "learning_rate": 0.002828, "loss": 1.6262, "step": 456000 }, { "epoch": 34.226191369606006, "grad_norm": 0.6788607239723206, "learning_rate": 0.002828, "loss": 1.6083, "step": 456064 }, { "epoch": 34.230994371482176, "grad_norm": 0.6504068970680237, "learning_rate": 0.002828, "loss": 1.6146, "step": 456128 }, { "epoch": 34.23579737335835, "grad_norm": 0.6420795917510986, "learning_rate": 0.002828, "loss": 1.6126, "step": 456192 }, { "epoch": 34.24060037523452, "grad_norm": 0.6302408576011658, "learning_rate": 0.002828, "loss": 1.5947, "step": 456256 }, { "epoch": 34.24540337711069, "grad_norm": 0.7099950909614563, "learning_rate": 0.002828, "loss": 1.5792, "step": 456320 }, { "epoch": 34.25020637898687, "grad_norm": 0.6178498268127441, "learning_rate": 0.002828, "loss": 1.5584, "step": 456384 }, { "epoch": 34.25500938086304, "grad_norm": 0.6391167640686035, "learning_rate": 0.002828, "loss": 1.5621, "step": 456448 }, { "epoch": 34.25981238273921, "grad_norm": 0.48477670550346375, "learning_rate": 0.002828, "loss": 1.547, "step": 456512 }, { "epoch": 34.26461538461538, "grad_norm": 0.5507023334503174, "learning_rate": 0.002828, "loss": 1.5239, "step": 456576 }, { "epoch": 34.26941838649156, "grad_norm": 0.6492827534675598, "learning_rate": 0.002828, "loss": 1.5009, "step": 456640 }, { "epoch": 34.27422138836773, "grad_norm": 0.5808483362197876, "learning_rate": 0.002828, "loss": 1.747, "step": 456704 }, { "epoch": 34.279024390243904, "grad_norm": 0.7006205916404724, "learning_rate": 0.002828, "loss": 1.5044, "step": 456768 }, { "epoch": 34.283827392120074, "grad_norm": 0.5767326951026917, "learning_rate": 0.002828, "loss": 1.4884, "step": 456832 }, { "epoch": 34.28863039399625, "grad_norm": 0.5823490619659424, "learning_rate": 0.002828, "loss": 1.5015, "step": 456896 }, { "epoch": 34.29343339587242, "grad_norm": 0.6484927535057068, "learning_rate": 0.002828, "loss": 1.4816, "step": 456960 }, { "epoch": 34.298236397748596, "grad_norm": 0.5902293920516968, "learning_rate": 0.002828, "loss": 1.4473, "step": 457024 }, { "epoch": 34.303039399624765, "grad_norm": 0.6143577694892883, "learning_rate": 0.002828, "loss": 1.4276, "step": 457088 }, { "epoch": 34.307842401500935, "grad_norm": 0.6154422760009766, "learning_rate": 0.002828, "loss": 1.642, "step": 457152 }, { "epoch": 34.31264540337711, "grad_norm": 0.5513522624969482, "learning_rate": 0.002828, "loss": 1.4159, "step": 457216 }, { "epoch": 34.31744840525328, "grad_norm": 0.6686850190162659, "learning_rate": 0.002828, "loss": 1.4497, "step": 457280 }, { "epoch": 34.32225140712946, "grad_norm": 0.5761899948120117, "learning_rate": 0.002828, "loss": 1.3841, "step": 457344 }, { "epoch": 34.327054409005626, "grad_norm": 0.6581642031669617, "learning_rate": 0.002828, "loss": 1.3786, "step": 457408 }, { "epoch": 34.3318574108818, "grad_norm": 0.6536948680877686, "learning_rate": 0.002828, "loss": 1.4748, "step": 457472 }, { "epoch": 34.33666041275797, "grad_norm": 0.6962592601776123, "learning_rate": 0.002828, "loss": 1.5362, "step": 457536 }, { "epoch": 34.34146341463415, "grad_norm": 0.6023150086402893, "learning_rate": 0.002828, "loss": 1.3787, "step": 457600 }, { "epoch": 34.34626641651032, "grad_norm": 0.654251754283905, "learning_rate": 0.002828, "loss": 1.4322, "step": 457664 }, { "epoch": 34.351069418386494, "grad_norm": 0.5367180109024048, "learning_rate": 0.002828, "loss": 1.6064, "step": 457728 }, { "epoch": 34.35587242026266, "grad_norm": 0.5423946976661682, "learning_rate": 0.002828, "loss": 1.3929, "step": 457792 }, { "epoch": 34.36067542213884, "grad_norm": 0.5561156272888184, "learning_rate": 0.002828, "loss": 1.33, "step": 457856 }, { "epoch": 34.36547842401501, "grad_norm": 0.5509253740310669, "learning_rate": 0.002828, "loss": 1.3037, "step": 457920 }, { "epoch": 34.37028142589118, "grad_norm": 0.6430239677429199, "learning_rate": 0.002828, "loss": 1.2854, "step": 457984 }, { "epoch": 34.375084427767355, "grad_norm": 0.5822476148605347, "learning_rate": 0.002828, "loss": 1.2722, "step": 458048 }, { "epoch": 34.379887429643524, "grad_norm": 0.6652078032493591, "learning_rate": 0.002828, "loss": 1.2584, "step": 458112 }, { "epoch": 34.3846904315197, "grad_norm": 0.5696972608566284, "learning_rate": 0.002828, "loss": 1.2443, "step": 458176 }, { "epoch": 34.38949343339587, "grad_norm": 0.5622748732566833, "learning_rate": 0.002828, "loss": 1.237, "step": 458240 }, { "epoch": 34.39429643527205, "grad_norm": 0.5725105404853821, "learning_rate": 0.002828, "loss": 1.2314, "step": 458304 }, { "epoch": 34.399099437148216, "grad_norm": 0.6276898980140686, "learning_rate": 0.002828, "loss": 1.2195, "step": 458368 }, { "epoch": 34.40390243902439, "grad_norm": 0.6529742479324341, "learning_rate": 0.002828, "loss": 1.2157, "step": 458432 }, { "epoch": 34.40870544090056, "grad_norm": 0.5537835955619812, "learning_rate": 0.002828, "loss": 1.2123, "step": 458496 }, { "epoch": 34.41350844277674, "grad_norm": 0.4857408404350281, "learning_rate": 0.002828, "loss": 1.1988, "step": 458560 }, { "epoch": 34.41831144465291, "grad_norm": 0.6616053581237793, "learning_rate": 0.002828, "loss": 1.2019, "step": 458624 }, { "epoch": 34.423114446529084, "grad_norm": 0.6578753590583801, "learning_rate": 0.002828, "loss": 1.1991, "step": 458688 }, { "epoch": 34.42791744840525, "grad_norm": 0.5510697364807129, "learning_rate": 0.002828, "loss": 1.196, "step": 458752 }, { "epoch": 34.43272045028142, "grad_norm": 0.7690936326980591, "learning_rate": 0.002828, "loss": 1.1898, "step": 458816 }, { "epoch": 34.4375234521576, "grad_norm": 0.6071969270706177, "learning_rate": 0.002828, "loss": 1.1845, "step": 458880 }, { "epoch": 34.44232645403377, "grad_norm": 0.773906409740448, "learning_rate": 0.002828, "loss": 1.1851, "step": 458944 }, { "epoch": 34.447129455909945, "grad_norm": 0.5726880431175232, "learning_rate": 0.002828, "loss": 1.184, "step": 459008 }, { "epoch": 34.451932457786114, "grad_norm": 0.5598574876785278, "learning_rate": 0.002828, "loss": 1.1842, "step": 459072 }, { "epoch": 34.45673545966229, "grad_norm": 0.6979678273200989, "learning_rate": 0.002828, "loss": 1.1765, "step": 459136 }, { "epoch": 34.46153846153846, "grad_norm": 0.6438438296318054, "learning_rate": 0.002828, "loss": 1.1765, "step": 459200 }, { "epoch": 34.466341463414636, "grad_norm": 0.6009986400604248, "learning_rate": 0.002828, "loss": 1.1695, "step": 459264 }, { "epoch": 34.471144465290806, "grad_norm": 0.8207782506942749, "learning_rate": 0.002828, "loss": 1.1736, "step": 459328 }, { "epoch": 34.47594746716698, "grad_norm": 0.5096902251243591, "learning_rate": 0.002828, "loss": 1.1669, "step": 459392 }, { "epoch": 34.48075046904315, "grad_norm": 0.6237781643867493, "learning_rate": 0.002828, "loss": 1.1659, "step": 459456 }, { "epoch": 34.48555347091933, "grad_norm": 0.66024249792099, "learning_rate": 0.002828, "loss": 1.1657, "step": 459520 }, { "epoch": 34.4903564727955, "grad_norm": 0.5944492816925049, "learning_rate": 0.002828, "loss": 1.1671, "step": 459584 }, { "epoch": 34.49515947467167, "grad_norm": 0.5822668075561523, "learning_rate": 0.002828, "loss": 1.1607, "step": 459648 }, { "epoch": 34.49996247654784, "grad_norm": 0.6067987680435181, "learning_rate": 0.002828, "loss": 1.1577, "step": 459712 }, { "epoch": 34.50476547842401, "grad_norm": 0.5925935506820679, "learning_rate": 0.002828, "loss": 1.1623, "step": 459776 }, { "epoch": 34.50956848030019, "grad_norm": 0.6033923029899597, "learning_rate": 0.002828, "loss": 1.1598, "step": 459840 }, { "epoch": 34.51437148217636, "grad_norm": 0.5351348519325256, "learning_rate": 0.002828, "loss": 1.16, "step": 459904 }, { "epoch": 34.519174484052535, "grad_norm": 0.5987980365753174, "learning_rate": 0.002828, "loss": 1.156, "step": 459968 }, { "epoch": 34.523977485928704, "grad_norm": 0.5701244473457336, "learning_rate": 0.002828, "loss": 1.1553, "step": 460032 }, { "epoch": 34.52878048780488, "grad_norm": 0.5841987729072571, "learning_rate": 0.002828, "loss": 1.1597, "step": 460096 }, { "epoch": 34.53358348968105, "grad_norm": 0.5855494737625122, "learning_rate": 0.002828, "loss": 1.1575, "step": 460160 }, { "epoch": 34.538386491557226, "grad_norm": 0.6749581694602966, "learning_rate": 0.002828, "loss": 1.1601, "step": 460224 }, { "epoch": 34.543189493433395, "grad_norm": 0.6047836542129517, "learning_rate": 0.002828, "loss": 1.1576, "step": 460288 }, { "epoch": 34.54799249530957, "grad_norm": 0.6513249278068542, "learning_rate": 0.002828, "loss": 1.1546, "step": 460352 }, { "epoch": 34.55279549718574, "grad_norm": 0.7231667637825012, "learning_rate": 0.002828, "loss": 1.1619, "step": 460416 }, { "epoch": 34.55759849906191, "grad_norm": 0.7058590054512024, "learning_rate": 0.002828, "loss": 1.1547, "step": 460480 }, { "epoch": 34.56240150093809, "grad_norm": 0.6364291310310364, "learning_rate": 0.002828, "loss": 1.1552, "step": 460544 }, { "epoch": 34.567204502814256, "grad_norm": 0.6043879389762878, "learning_rate": 0.002828, "loss": 1.159, "step": 460608 }, { "epoch": 34.57200750469043, "grad_norm": 0.5241459012031555, "learning_rate": 0.002828, "loss": 1.1589, "step": 460672 }, { "epoch": 34.5768105065666, "grad_norm": 0.5184655785560608, "learning_rate": 0.002828, "loss": 1.1575, "step": 460736 }, { "epoch": 34.58161350844278, "grad_norm": 0.6597776412963867, "learning_rate": 0.002828, "loss": 1.154, "step": 460800 }, { "epoch": 34.58641651031895, "grad_norm": 0.5717831254005432, "learning_rate": 0.002828, "loss": 1.1548, "step": 460864 }, { "epoch": 34.591219512195124, "grad_norm": 0.7088619470596313, "learning_rate": 0.002828, "loss": 1.1565, "step": 460928 }, { "epoch": 34.596022514071294, "grad_norm": 0.6347498893737793, "learning_rate": 0.002828, "loss": 1.1573, "step": 460992 }, { "epoch": 34.60082551594747, "grad_norm": 0.5776677131652832, "learning_rate": 0.002828, "loss": 1.1565, "step": 461056 }, { "epoch": 34.60562851782364, "grad_norm": 0.5810860395431519, "learning_rate": 0.002828, "loss": 1.1603, "step": 461120 }, { "epoch": 34.610431519699816, "grad_norm": 0.6792919039726257, "learning_rate": 0.002828, "loss": 1.1483, "step": 461184 }, { "epoch": 34.615234521575985, "grad_norm": 0.6665756702423096, "learning_rate": 0.002828, "loss": 1.1556, "step": 461248 }, { "epoch": 34.620037523452154, "grad_norm": 0.5993114709854126, "learning_rate": 0.002828, "loss": 1.1533, "step": 461312 }, { "epoch": 34.62484052532833, "grad_norm": 0.7352228164672852, "learning_rate": 0.002828, "loss": 1.1542, "step": 461376 }, { "epoch": 34.6296435272045, "grad_norm": 0.5764060020446777, "learning_rate": 0.002828, "loss": 1.1563, "step": 461440 }, { "epoch": 34.63444652908068, "grad_norm": 0.5434015989303589, "learning_rate": 0.002828, "loss": 1.1488, "step": 461504 }, { "epoch": 34.639249530956846, "grad_norm": 0.5354040265083313, "learning_rate": 0.002828, "loss": 1.1533, "step": 461568 }, { "epoch": 34.64405253283302, "grad_norm": 0.618698000907898, "learning_rate": 0.002828, "loss": 1.1528, "step": 461632 }, { "epoch": 34.64885553470919, "grad_norm": 0.6807321906089783, "learning_rate": 0.002828, "loss": 1.1542, "step": 461696 }, { "epoch": 34.65365853658537, "grad_norm": 0.5973524451255798, "learning_rate": 0.002828, "loss": 1.1537, "step": 461760 }, { "epoch": 34.65846153846154, "grad_norm": 0.6482577919960022, "learning_rate": 0.002828, "loss": 1.1475, "step": 461824 }, { "epoch": 34.663264540337714, "grad_norm": 0.694492518901825, "learning_rate": 0.002828, "loss": 1.1541, "step": 461888 }, { "epoch": 34.66806754221388, "grad_norm": 0.5939033031463623, "learning_rate": 0.002828, "loss": 1.1472, "step": 461952 }, { "epoch": 34.67287054409006, "grad_norm": 0.8614955544471741, "learning_rate": 0.002828, "loss": 1.1524, "step": 462016 }, { "epoch": 34.67767354596623, "grad_norm": 0.6040512919425964, "learning_rate": 0.002828, "loss": 1.152, "step": 462080 }, { "epoch": 34.6824765478424, "grad_norm": 0.5883114337921143, "learning_rate": 0.002828, "loss": 1.1536, "step": 462144 }, { "epoch": 34.687279549718575, "grad_norm": 0.5902653336524963, "learning_rate": 0.002828, "loss": 1.1511, "step": 462208 }, { "epoch": 34.692082551594744, "grad_norm": 0.6126294136047363, "learning_rate": 0.002828, "loss": 1.1528, "step": 462272 }, { "epoch": 34.69688555347092, "grad_norm": 0.5560796856880188, "learning_rate": 0.002828, "loss": 1.1508, "step": 462336 }, { "epoch": 34.70168855534709, "grad_norm": 0.6218116879463196, "learning_rate": 0.002828, "loss": 1.1464, "step": 462400 }, { "epoch": 34.706491557223266, "grad_norm": 0.5556285381317139, "learning_rate": 0.002828, "loss": 1.1526, "step": 462464 }, { "epoch": 34.711294559099436, "grad_norm": 0.7265997529029846, "learning_rate": 0.002828, "loss": 1.1567, "step": 462528 }, { "epoch": 34.71609756097561, "grad_norm": 0.5715520977973938, "learning_rate": 0.002828, "loss": 1.1498, "step": 462592 }, { "epoch": 34.72090056285178, "grad_norm": 0.5536825656890869, "learning_rate": 0.002828, "loss": 1.1602, "step": 462656 }, { "epoch": 34.72570356472796, "grad_norm": 0.552040696144104, "learning_rate": 0.002828, "loss": 1.1536, "step": 462720 }, { "epoch": 34.73050656660413, "grad_norm": 0.6238213181495667, "learning_rate": 0.002828, "loss": 1.153, "step": 462784 }, { "epoch": 34.735309568480304, "grad_norm": 0.6593725085258484, "learning_rate": 0.002828, "loss": 1.1496, "step": 462848 }, { "epoch": 34.74011257035647, "grad_norm": 0.7645888924598694, "learning_rate": 0.002828, "loss": 1.156, "step": 462912 }, { "epoch": 34.74491557223264, "grad_norm": 0.5748801827430725, "learning_rate": 0.002828, "loss": 1.1562, "step": 462976 }, { "epoch": 34.74971857410882, "grad_norm": 0.7016138434410095, "learning_rate": 0.002828, "loss": 1.1582, "step": 463040 }, { "epoch": 34.75452157598499, "grad_norm": 0.6343497037887573, "learning_rate": 0.002828, "loss": 1.1568, "step": 463104 }, { "epoch": 34.759324577861165, "grad_norm": 0.5537910461425781, "learning_rate": 0.002828, "loss": 1.1516, "step": 463168 }, { "epoch": 34.764127579737334, "grad_norm": 0.6629467606544495, "learning_rate": 0.002828, "loss": 1.1533, "step": 463232 }, { "epoch": 34.76893058161351, "grad_norm": 0.6417297124862671, "learning_rate": 0.002828, "loss": 1.1521, "step": 463296 }, { "epoch": 34.77373358348968, "grad_norm": 0.5607069134712219, "learning_rate": 0.002828, "loss": 1.1536, "step": 463360 }, { "epoch": 34.778536585365856, "grad_norm": 0.5720988512039185, "learning_rate": 0.002828, "loss": 1.151, "step": 463424 }, { "epoch": 34.783339587242025, "grad_norm": 0.6062473058700562, "learning_rate": 0.002828, "loss": 1.151, "step": 463488 }, { "epoch": 34.7881425891182, "grad_norm": 0.6105201244354248, "learning_rate": 0.002828, "loss": 1.151, "step": 463552 }, { "epoch": 34.79294559099437, "grad_norm": 0.6517950892448425, "learning_rate": 0.002828, "loss": 1.1578, "step": 463616 }, { "epoch": 34.79774859287055, "grad_norm": 0.5077842473983765, "learning_rate": 0.002828, "loss": 1.1524, "step": 463680 }, { "epoch": 34.80255159474672, "grad_norm": 0.4883643686771393, "learning_rate": 0.002828, "loss": 1.15, "step": 463744 }, { "epoch": 34.807354596622886, "grad_norm": 0.5117125511169434, "learning_rate": 0.002828, "loss": 1.1525, "step": 463808 }, { "epoch": 34.81215759849906, "grad_norm": 0.510983943939209, "learning_rate": 0.002828, "loss": 1.1483, "step": 463872 }, { "epoch": 34.81696060037523, "grad_norm": 0.5725462436676025, "learning_rate": 0.002828, "loss": 1.1537, "step": 463936 }, { "epoch": 34.82176360225141, "grad_norm": 0.7000669836997986, "learning_rate": 0.002828, "loss": 1.1543, "step": 464000 }, { "epoch": 34.82656660412758, "grad_norm": 0.597078800201416, "learning_rate": 0.002828, "loss": 1.153, "step": 464064 }, { "epoch": 34.831369606003754, "grad_norm": 0.5912140607833862, "learning_rate": 0.002828, "loss": 1.1547, "step": 464128 }, { "epoch": 34.836172607879924, "grad_norm": 0.6521306037902832, "learning_rate": 0.002828, "loss": 1.1561, "step": 464192 }, { "epoch": 34.8409756097561, "grad_norm": 0.5406016707420349, "learning_rate": 0.002828, "loss": 1.1598, "step": 464256 }, { "epoch": 34.84577861163227, "grad_norm": 0.6859225034713745, "learning_rate": 0.002828, "loss": 1.1541, "step": 464320 }, { "epoch": 34.850581613508446, "grad_norm": 0.5705737471580505, "learning_rate": 0.002828, "loss": 1.1581, "step": 464384 }, { "epoch": 34.855384615384615, "grad_norm": 0.6631976366043091, "learning_rate": 0.002828, "loss": 1.1534, "step": 464448 }, { "epoch": 34.860187617260785, "grad_norm": 0.6074179410934448, "learning_rate": 0.002828, "loss": 1.1521, "step": 464512 }, { "epoch": 34.86499061913696, "grad_norm": 0.5356570482254028, "learning_rate": 0.002828, "loss": 1.1497, "step": 464576 }, { "epoch": 34.86979362101313, "grad_norm": 0.760819137096405, "learning_rate": 0.002828, "loss": 1.1546, "step": 464640 }, { "epoch": 34.87459662288931, "grad_norm": 0.6953458189964294, "learning_rate": 0.002828, "loss": 1.146, "step": 464704 }, { "epoch": 34.879399624765476, "grad_norm": 0.5345589518547058, "learning_rate": 0.002828, "loss": 1.1503, "step": 464768 }, { "epoch": 34.88420262664165, "grad_norm": 0.6950819492340088, "learning_rate": 0.002828, "loss": 1.1518, "step": 464832 }, { "epoch": 34.88900562851782, "grad_norm": 0.6367828249931335, "learning_rate": 0.002828, "loss": 1.1519, "step": 464896 }, { "epoch": 34.893808630394, "grad_norm": 0.5749104619026184, "learning_rate": 0.002828, "loss": 1.1461, "step": 464960 }, { "epoch": 34.89861163227017, "grad_norm": 0.5483949184417725, "learning_rate": 0.002828, "loss": 1.1538, "step": 465024 }, { "epoch": 34.903414634146344, "grad_norm": 0.6119317412376404, "learning_rate": 0.002828, "loss": 1.1482, "step": 465088 }, { "epoch": 34.90821763602251, "grad_norm": 0.6510235071182251, "learning_rate": 0.002828, "loss": 1.1559, "step": 465152 }, { "epoch": 34.91302063789869, "grad_norm": 0.5535789728164673, "learning_rate": 0.002828, "loss": 1.1517, "step": 465216 }, { "epoch": 34.91782363977486, "grad_norm": 0.5926808714866638, "learning_rate": 0.002828, "loss": 1.1458, "step": 465280 }, { "epoch": 34.92262664165103, "grad_norm": 0.6453589200973511, "learning_rate": 0.002828, "loss": 1.1538, "step": 465344 }, { "epoch": 34.927429643527205, "grad_norm": 0.5200554132461548, "learning_rate": 0.002828, "loss": 1.1465, "step": 465408 }, { "epoch": 34.932232645403374, "grad_norm": 0.572009801864624, "learning_rate": 0.002828, "loss": 1.1535, "step": 465472 }, { "epoch": 34.93703564727955, "grad_norm": 0.5859612822532654, "learning_rate": 0.002828, "loss": 1.1583, "step": 465536 }, { "epoch": 34.94183864915572, "grad_norm": 0.8858081698417664, "learning_rate": 0.002828, "loss": 1.1491, "step": 465600 }, { "epoch": 34.9466416510319, "grad_norm": 0.5900371074676514, "learning_rate": 0.002828, "loss": 1.152, "step": 465664 }, { "epoch": 34.951444652908066, "grad_norm": 0.6436994075775146, "learning_rate": 0.002828, "loss": 1.1503, "step": 465728 }, { "epoch": 34.95624765478424, "grad_norm": 0.5482416152954102, "learning_rate": 0.002828, "loss": 1.1544, "step": 465792 }, { "epoch": 34.96105065666041, "grad_norm": 0.6615215539932251, "learning_rate": 0.002828, "loss": 1.1507, "step": 465856 }, { "epoch": 34.96585365853659, "grad_norm": 0.5764489769935608, "learning_rate": 0.002828, "loss": 1.1527, "step": 465920 }, { "epoch": 34.97065666041276, "grad_norm": 0.7269230484962463, "learning_rate": 0.002828, "loss": 1.1546, "step": 465984 }, { "epoch": 34.975459662288934, "grad_norm": 0.5440137982368469, "learning_rate": 0.002828, "loss": 1.1511, "step": 466048 }, { "epoch": 34.9802626641651, "grad_norm": 0.5844058990478516, "learning_rate": 0.002828, "loss": 1.1552, "step": 466112 }, { "epoch": 34.98506566604127, "grad_norm": 0.5198141932487488, "learning_rate": 0.002828, "loss": 1.1506, "step": 466176 }, { "epoch": 34.98986866791745, "grad_norm": 0.620516300201416, "learning_rate": 0.002828, "loss": 1.1557, "step": 466240 }, { "epoch": 34.99467166979362, "grad_norm": 0.6901420950889587, "learning_rate": 0.002828, "loss": 1.1532, "step": 466304 }, { "epoch": 34.999474671669795, "grad_norm": 0.6293230056762695, "learning_rate": 0.002828, "loss": 1.1526, "step": 466368 }, { "epoch": 35.004277673545964, "grad_norm": 0.6638535261154175, "learning_rate": 0.002828, "loss": 1.1266, "step": 466432 }, { "epoch": 35.00908067542214, "grad_norm": 0.5445776581764221, "learning_rate": 0.002828, "loss": 1.123, "step": 466496 }, { "epoch": 35.01388367729831, "grad_norm": 0.808910071849823, "learning_rate": 0.002828, "loss": 1.1231, "step": 466560 }, { "epoch": 35.018686679174486, "grad_norm": 0.6956142783164978, "learning_rate": 0.002828, "loss": 1.1235, "step": 466624 }, { "epoch": 35.023489681050656, "grad_norm": 0.6124756932258606, "learning_rate": 0.002828, "loss": 1.1242, "step": 466688 }, { "epoch": 35.02829268292683, "grad_norm": 0.5752196311950684, "learning_rate": 0.002828, "loss": 1.1175, "step": 466752 }, { "epoch": 35.033095684803, "grad_norm": 0.5530253052711487, "learning_rate": 0.002828, "loss": 1.1306, "step": 466816 }, { "epoch": 35.03789868667918, "grad_norm": 0.6303296685218811, "learning_rate": 0.002828, "loss": 1.1255, "step": 466880 }, { "epoch": 35.04270168855535, "grad_norm": 0.6001397967338562, "learning_rate": 0.002828, "loss": 1.117, "step": 466944 }, { "epoch": 35.047504690431516, "grad_norm": 0.5856416821479797, "learning_rate": 0.002828, "loss": 1.122, "step": 467008 }, { "epoch": 35.05230769230769, "grad_norm": 0.6043058633804321, "learning_rate": 0.002828, "loss": 1.1188, "step": 467072 }, { "epoch": 35.05711069418386, "grad_norm": 0.5569366812705994, "learning_rate": 0.002828, "loss": 1.1222, "step": 467136 }, { "epoch": 35.06191369606004, "grad_norm": 0.5817255973815918, "learning_rate": 0.002828, "loss": 1.1191, "step": 467200 }, { "epoch": 35.06671669793621, "grad_norm": 0.5985368490219116, "learning_rate": 0.002828, "loss": 1.1223, "step": 467264 }, { "epoch": 35.071519699812384, "grad_norm": 0.5214247107505798, "learning_rate": 0.002828, "loss": 1.1193, "step": 467328 }, { "epoch": 35.076322701688554, "grad_norm": 0.5910423994064331, "learning_rate": 0.002828, "loss": 1.1285, "step": 467392 }, { "epoch": 35.08112570356473, "grad_norm": 0.5970675945281982, "learning_rate": 0.002828, "loss": 1.1254, "step": 467456 }, { "epoch": 35.0859287054409, "grad_norm": 0.6935176849365234, "learning_rate": 0.002828, "loss": 1.1224, "step": 467520 }, { "epoch": 35.090731707317076, "grad_norm": 0.5811248421669006, "learning_rate": 0.002828, "loss": 1.1273, "step": 467584 }, { "epoch": 35.095534709193245, "grad_norm": 0.606397807598114, "learning_rate": 0.002828, "loss": 1.13, "step": 467648 }, { "epoch": 35.10033771106942, "grad_norm": 0.6233459711074829, "learning_rate": 0.002828, "loss": 1.1203, "step": 467712 }, { "epoch": 35.10514071294559, "grad_norm": 0.4995225965976715, "learning_rate": 0.002828, "loss": 1.1293, "step": 467776 }, { "epoch": 35.10994371482176, "grad_norm": 0.7771652340888977, "learning_rate": 0.002828, "loss": 1.1224, "step": 467840 }, { "epoch": 35.11474671669794, "grad_norm": 0.5408387780189514, "learning_rate": 0.002828, "loss": 1.1228, "step": 467904 }, { "epoch": 35.119549718574106, "grad_norm": 0.6495393514633179, "learning_rate": 0.002828, "loss": 1.1244, "step": 467968 }, { "epoch": 35.12435272045028, "grad_norm": 0.5671504139900208, "learning_rate": 0.002828, "loss": 1.1217, "step": 468032 }, { "epoch": 35.12915572232645, "grad_norm": 0.6291579008102417, "learning_rate": 0.002828, "loss": 1.123, "step": 468096 }, { "epoch": 35.13395872420263, "grad_norm": 0.5787367820739746, "learning_rate": 0.002828, "loss": 1.1198, "step": 468160 }, { "epoch": 35.1387617260788, "grad_norm": 0.6570515036582947, "learning_rate": 0.002828, "loss": 1.1217, "step": 468224 }, { "epoch": 35.143564727954974, "grad_norm": 0.6233621835708618, "learning_rate": 0.002828, "loss": 1.1312, "step": 468288 }, { "epoch": 35.14836772983114, "grad_norm": 0.6841063499450684, "learning_rate": 0.002828, "loss": 1.1202, "step": 468352 }, { "epoch": 35.15317073170732, "grad_norm": 0.5697184801101685, "learning_rate": 0.002828, "loss": 1.1258, "step": 468416 }, { "epoch": 35.15797373358349, "grad_norm": 0.6447716355323792, "learning_rate": 0.002828, "loss": 1.124, "step": 468480 }, { "epoch": 35.162776735459666, "grad_norm": 0.530463457107544, "learning_rate": 0.002828, "loss": 1.1245, "step": 468544 }, { "epoch": 35.167579737335835, "grad_norm": 0.6837313175201416, "learning_rate": 0.002828, "loss": 1.1258, "step": 468608 }, { "epoch": 35.172382739212004, "grad_norm": 0.7450433373451233, "learning_rate": 0.002828, "loss": 1.1297, "step": 468672 }, { "epoch": 35.17718574108818, "grad_norm": 0.6392331719398499, "learning_rate": 0.002828, "loss": 1.1263, "step": 468736 }, { "epoch": 35.18198874296435, "grad_norm": 0.5583149194717407, "learning_rate": 0.002828, "loss": 1.1284, "step": 468800 }, { "epoch": 35.18679174484053, "grad_norm": 0.43744319677352905, "learning_rate": 0.002828, "loss": 1.1268, "step": 468864 }, { "epoch": 35.191594746716696, "grad_norm": 0.5928747057914734, "learning_rate": 0.002828, "loss": 1.1272, "step": 468928 }, { "epoch": 35.19639774859287, "grad_norm": 0.5616432428359985, "learning_rate": 0.002828, "loss": 1.1273, "step": 468992 }, { "epoch": 35.20120075046904, "grad_norm": 0.5244531631469727, "learning_rate": 0.002828, "loss": 1.1274, "step": 469056 }, { "epoch": 35.20600375234522, "grad_norm": 0.5104504823684692, "learning_rate": 0.002828, "loss": 1.1269, "step": 469120 }, { "epoch": 35.21080675422139, "grad_norm": 0.5875263214111328, "learning_rate": 0.002828, "loss": 1.1352, "step": 469184 }, { "epoch": 35.215609756097564, "grad_norm": 0.6020212769508362, "learning_rate": 0.002828, "loss": 1.1231, "step": 469248 }, { "epoch": 35.22041275797373, "grad_norm": 0.7060278058052063, "learning_rate": 0.002828, "loss": 1.1249, "step": 469312 }, { "epoch": 35.22521575984991, "grad_norm": 0.5829773545265198, "learning_rate": 0.002828, "loss": 1.1286, "step": 469376 }, { "epoch": 35.23001876172608, "grad_norm": 0.6745613813400269, "learning_rate": 0.002828, "loss": 1.1309, "step": 469440 }, { "epoch": 35.23482176360225, "grad_norm": 0.6295201778411865, "learning_rate": 0.002828, "loss": 1.1232, "step": 469504 }, { "epoch": 35.239624765478425, "grad_norm": 0.6248756647109985, "learning_rate": 0.002828, "loss": 1.126, "step": 469568 }, { "epoch": 35.244427767354594, "grad_norm": 0.5959972739219666, "learning_rate": 0.002828, "loss": 1.1299, "step": 469632 }, { "epoch": 35.24923076923077, "grad_norm": 0.6190743446350098, "learning_rate": 0.002828, "loss": 1.1289, "step": 469696 }, { "epoch": 35.25403377110694, "grad_norm": 0.5316118001937866, "learning_rate": 0.002828, "loss": 1.1294, "step": 469760 }, { "epoch": 35.258836772983116, "grad_norm": 0.582175076007843, "learning_rate": 0.002828, "loss": 1.1314, "step": 469824 }, { "epoch": 35.263639774859286, "grad_norm": 0.6277863383293152, "learning_rate": 0.002828, "loss": 1.1315, "step": 469888 }, { "epoch": 35.26844277673546, "grad_norm": 0.5997483730316162, "learning_rate": 0.002828, "loss": 1.1332, "step": 469952 }, { "epoch": 35.27324577861163, "grad_norm": 0.6677803993225098, "learning_rate": 0.002828, "loss": 1.1248, "step": 470016 }, { "epoch": 35.27804878048781, "grad_norm": 0.5580044984817505, "learning_rate": 0.002828, "loss": 1.1291, "step": 470080 }, { "epoch": 35.28285178236398, "grad_norm": 0.65473872423172, "learning_rate": 0.002828, "loss": 1.1255, "step": 470144 }, { "epoch": 35.287654784240154, "grad_norm": 0.5278262495994568, "learning_rate": 0.002828, "loss": 1.1294, "step": 470208 }, { "epoch": 35.29245778611632, "grad_norm": 0.6099348664283752, "learning_rate": 0.002828, "loss": 1.133, "step": 470272 }, { "epoch": 35.29726078799249, "grad_norm": 0.5697486400604248, "learning_rate": 0.002828, "loss": 1.1358, "step": 470336 }, { "epoch": 35.30206378986867, "grad_norm": 0.6079903841018677, "learning_rate": 0.002828, "loss": 1.1344, "step": 470400 }, { "epoch": 35.30686679174484, "grad_norm": 0.6247881650924683, "learning_rate": 0.002828, "loss": 1.1288, "step": 470464 }, { "epoch": 35.311669793621014, "grad_norm": 0.5848674774169922, "learning_rate": 0.002828, "loss": 1.137, "step": 470528 }, { "epoch": 35.316472795497184, "grad_norm": 0.6638665199279785, "learning_rate": 0.002828, "loss": 1.1321, "step": 470592 }, { "epoch": 35.32127579737336, "grad_norm": 0.5754868388175964, "learning_rate": 0.002828, "loss": 1.1298, "step": 470656 }, { "epoch": 35.32607879924953, "grad_norm": 0.646364688873291, "learning_rate": 0.002828, "loss": 1.1319, "step": 470720 }, { "epoch": 35.330881801125706, "grad_norm": 0.6823537349700928, "learning_rate": 0.002828, "loss": 1.1314, "step": 470784 }, { "epoch": 35.335684803001875, "grad_norm": 0.5892389416694641, "learning_rate": 0.002828, "loss": 1.1346, "step": 470848 }, { "epoch": 35.34048780487805, "grad_norm": 0.5694969892501831, "learning_rate": 0.002828, "loss": 1.138, "step": 470912 }, { "epoch": 35.34529080675422, "grad_norm": 0.552528977394104, "learning_rate": 0.002828, "loss": 1.1331, "step": 470976 }, { "epoch": 35.3500938086304, "grad_norm": 0.6115056872367859, "learning_rate": 0.002828, "loss": 1.1331, "step": 471040 }, { "epoch": 35.35489681050657, "grad_norm": 0.5536540150642395, "learning_rate": 0.002828, "loss": 1.1308, "step": 471104 }, { "epoch": 35.359699812382736, "grad_norm": 0.570509135723114, "learning_rate": 0.002828, "loss": 1.1326, "step": 471168 }, { "epoch": 35.36450281425891, "grad_norm": 0.5459985733032227, "learning_rate": 0.002828, "loss": 1.1352, "step": 471232 }, { "epoch": 35.36930581613508, "grad_norm": 0.5294592380523682, "learning_rate": 0.002828, "loss": 1.1307, "step": 471296 }, { "epoch": 35.37410881801126, "grad_norm": 0.5800413489341736, "learning_rate": 0.002828, "loss": 1.1371, "step": 471360 }, { "epoch": 35.37891181988743, "grad_norm": 0.568485677242279, "learning_rate": 0.002828, "loss": 1.1393, "step": 471424 }, { "epoch": 35.383714821763604, "grad_norm": 0.7035385370254517, "learning_rate": 0.002828, "loss": 1.1338, "step": 471488 }, { "epoch": 35.38851782363977, "grad_norm": 0.738166868686676, "learning_rate": 0.002828, "loss": 1.1348, "step": 471552 }, { "epoch": 35.39332082551595, "grad_norm": 0.6015270352363586, "learning_rate": 0.002828, "loss": 1.1301, "step": 471616 }, { "epoch": 35.39812382739212, "grad_norm": 0.6614118218421936, "learning_rate": 0.002828, "loss": 1.1314, "step": 471680 }, { "epoch": 35.402926829268296, "grad_norm": 0.5099043846130371, "learning_rate": 0.002828, "loss": 1.1349, "step": 471744 }, { "epoch": 35.407729831144465, "grad_norm": 0.5569406747817993, "learning_rate": 0.002828, "loss": 1.1332, "step": 471808 }, { "epoch": 35.412532833020634, "grad_norm": 0.6680443286895752, "learning_rate": 0.002828, "loss": 1.1324, "step": 471872 }, { "epoch": 35.41733583489681, "grad_norm": 0.5892511010169983, "learning_rate": 0.002828, "loss": 1.134, "step": 471936 }, { "epoch": 35.42213883677298, "grad_norm": 0.6710988879203796, "learning_rate": 0.002828, "loss": 1.131, "step": 472000 }, { "epoch": 35.42694183864916, "grad_norm": 0.5715301036834717, "learning_rate": 0.002828, "loss": 1.1377, "step": 472064 }, { "epoch": 35.431744840525326, "grad_norm": 0.5255868434906006, "learning_rate": 0.002828, "loss": 1.1351, "step": 472128 }, { "epoch": 35.4365478424015, "grad_norm": 0.6189907789230347, "learning_rate": 0.002828, "loss": 1.1353, "step": 472192 }, { "epoch": 35.44135084427767, "grad_norm": 0.6305795311927795, "learning_rate": 0.002828, "loss": 1.1371, "step": 472256 }, { "epoch": 35.44615384615385, "grad_norm": 0.576610267162323, "learning_rate": 0.002828, "loss": 1.129, "step": 472320 }, { "epoch": 35.45095684803002, "grad_norm": 0.5561624765396118, "learning_rate": 0.002828, "loss": 1.129, "step": 472384 }, { "epoch": 35.455759849906194, "grad_norm": 0.6723029613494873, "learning_rate": 0.002828, "loss": 1.14, "step": 472448 }, { "epoch": 35.46056285178236, "grad_norm": 0.6434258222579956, "learning_rate": 0.002828, "loss": 1.1314, "step": 472512 }, { "epoch": 35.46536585365854, "grad_norm": 0.5116338729858398, "learning_rate": 0.002828, "loss": 1.1372, "step": 472576 }, { "epoch": 35.47016885553471, "grad_norm": 0.700920820236206, "learning_rate": 0.002828, "loss": 1.1396, "step": 472640 }, { "epoch": 35.47497185741088, "grad_norm": 0.5516610741615295, "learning_rate": 0.002828, "loss": 1.1342, "step": 472704 }, { "epoch": 35.479774859287055, "grad_norm": 0.6950984597206116, "learning_rate": 0.002828, "loss": 1.1338, "step": 472768 }, { "epoch": 35.484577861163224, "grad_norm": 0.6530833840370178, "learning_rate": 0.002828, "loss": 1.1363, "step": 472832 }, { "epoch": 35.4893808630394, "grad_norm": 0.6566064357757568, "learning_rate": 0.002828, "loss": 1.1324, "step": 472896 }, { "epoch": 35.49418386491557, "grad_norm": 0.5705472230911255, "learning_rate": 0.002828, "loss": 1.1372, "step": 472960 }, { "epoch": 35.498986866791746, "grad_norm": 0.6241821646690369, "learning_rate": 0.002828, "loss": 1.1318, "step": 473024 }, { "epoch": 35.503789868667916, "grad_norm": 0.5050911903381348, "learning_rate": 0.002828, "loss": 1.1373, "step": 473088 }, { "epoch": 35.50859287054409, "grad_norm": 0.6013885140419006, "learning_rate": 0.002828, "loss": 1.1356, "step": 473152 }, { "epoch": 35.51339587242026, "grad_norm": 0.6184156537055969, "learning_rate": 0.002828, "loss": 1.1333, "step": 473216 }, { "epoch": 35.51819887429644, "grad_norm": 0.5862095355987549, "learning_rate": 0.002828, "loss": 1.1352, "step": 473280 }, { "epoch": 35.52300187617261, "grad_norm": 0.6102173328399658, "learning_rate": 0.002828, "loss": 1.1343, "step": 473344 }, { "epoch": 35.527804878048784, "grad_norm": 0.6712579131126404, "learning_rate": 0.002828, "loss": 1.1341, "step": 473408 }, { "epoch": 35.53260787992495, "grad_norm": 0.7177531123161316, "learning_rate": 0.002828, "loss": 1.1356, "step": 473472 }, { "epoch": 35.53741088180112, "grad_norm": 0.6322616934776306, "learning_rate": 0.002828, "loss": 1.137, "step": 473536 }, { "epoch": 35.5422138836773, "grad_norm": 0.6838127374649048, "learning_rate": 0.002828, "loss": 1.1364, "step": 473600 }, { "epoch": 35.54701688555347, "grad_norm": 0.637467086315155, "learning_rate": 0.002828, "loss": 1.133, "step": 473664 }, { "epoch": 35.551819887429644, "grad_norm": 0.7239699363708496, "learning_rate": 0.002828, "loss": 1.1338, "step": 473728 }, { "epoch": 35.556622889305814, "grad_norm": 0.7126774787902832, "learning_rate": 0.002828, "loss": 1.1416, "step": 473792 }, { "epoch": 35.56142589118199, "grad_norm": 0.5230181813240051, "learning_rate": 0.002828, "loss": 1.1319, "step": 473856 }, { "epoch": 35.56622889305816, "grad_norm": 0.5874216556549072, "learning_rate": 0.002828, "loss": 1.1354, "step": 473920 }, { "epoch": 35.571031894934336, "grad_norm": 0.5958366990089417, "learning_rate": 0.002828, "loss": 1.1398, "step": 473984 }, { "epoch": 35.575834896810505, "grad_norm": 0.5132750868797302, "learning_rate": 0.002828, "loss": 1.1373, "step": 474048 }, { "epoch": 35.58063789868668, "grad_norm": 0.6392728686332703, "learning_rate": 0.002828, "loss": 1.1335, "step": 474112 }, { "epoch": 35.58544090056285, "grad_norm": 0.5683247447013855, "learning_rate": 0.002828, "loss": 1.1366, "step": 474176 }, { "epoch": 35.59024390243903, "grad_norm": 0.6130415797233582, "learning_rate": 0.002828, "loss": 1.1404, "step": 474240 }, { "epoch": 35.5950469043152, "grad_norm": 0.668921947479248, "learning_rate": 0.002828, "loss": 1.1421, "step": 474304 }, { "epoch": 35.599849906191366, "grad_norm": 0.6637597680091858, "learning_rate": 0.002828, "loss": 1.1379, "step": 474368 }, { "epoch": 35.60465290806754, "grad_norm": 0.5993471145629883, "learning_rate": 0.002828, "loss": 1.1381, "step": 474432 }, { "epoch": 35.60945590994371, "grad_norm": 0.6369923949241638, "learning_rate": 0.002828, "loss": 1.1388, "step": 474496 }, { "epoch": 35.61425891181989, "grad_norm": 0.5497013330459595, "learning_rate": 0.002828, "loss": 1.1306, "step": 474560 }, { "epoch": 35.61906191369606, "grad_norm": 0.617692768573761, "learning_rate": 0.002828, "loss": 1.1387, "step": 474624 }, { "epoch": 35.623864915572234, "grad_norm": 0.6321608424186707, "learning_rate": 0.002828, "loss": 1.1352, "step": 474688 }, { "epoch": 35.628667917448404, "grad_norm": 0.6047719717025757, "learning_rate": 0.002828, "loss": 1.1346, "step": 474752 }, { "epoch": 35.63347091932458, "grad_norm": 0.5528061985969543, "learning_rate": 0.002828, "loss": 1.1356, "step": 474816 }, { "epoch": 35.63827392120075, "grad_norm": 0.5263925790786743, "learning_rate": 0.002828, "loss": 1.1321, "step": 474880 }, { "epoch": 35.643076923076926, "grad_norm": 0.639483630657196, "learning_rate": 0.002828, "loss": 1.1418, "step": 474944 }, { "epoch": 35.647879924953095, "grad_norm": 0.605707049369812, "learning_rate": 0.002828, "loss": 1.1416, "step": 475008 }, { "epoch": 35.65268292682927, "grad_norm": 0.6229245662689209, "learning_rate": 0.002828, "loss": 1.1386, "step": 475072 }, { "epoch": 35.65748592870544, "grad_norm": 0.552807092666626, "learning_rate": 0.002828, "loss": 1.1362, "step": 475136 }, { "epoch": 35.66228893058161, "grad_norm": 0.5757431983947754, "learning_rate": 0.002828, "loss": 1.1323, "step": 475200 }, { "epoch": 35.66709193245779, "grad_norm": 0.7068957686424255, "learning_rate": 0.002828, "loss": 1.133, "step": 475264 }, { "epoch": 35.671894934333956, "grad_norm": 0.7497088313102722, "learning_rate": 0.002828, "loss": 1.1407, "step": 475328 }, { "epoch": 35.67669793621013, "grad_norm": 0.6466912627220154, "learning_rate": 0.002828, "loss": 1.1357, "step": 475392 }, { "epoch": 35.6815009380863, "grad_norm": 0.5262551307678223, "learning_rate": 0.002828, "loss": 1.1395, "step": 475456 }, { "epoch": 35.68630393996248, "grad_norm": 0.6165720820426941, "learning_rate": 0.002828, "loss": 1.1347, "step": 475520 }, { "epoch": 35.69110694183865, "grad_norm": 0.703349232673645, "learning_rate": 0.002828, "loss": 1.1536, "step": 475584 }, { "epoch": 35.695909943714824, "grad_norm": 47066.40234375, "learning_rate": 0.002828, "loss": 3.8045, "step": 475648 }, { "epoch": 35.70071294559099, "grad_norm": 21974.30859375, "learning_rate": 0.002828, "loss": 4.3187, "step": 475712 }, { "epoch": 35.70551594746717, "grad_norm": 2190.40478515625, "learning_rate": 0.002828, "loss": 4.4774, "step": 475776 }, { "epoch": 35.71031894934334, "grad_norm": 4.141016483306885, "learning_rate": 0.002828, "loss": 3.0715, "step": 475840 }, { "epoch": 35.715121951219515, "grad_norm": 3.2081074714660645, "learning_rate": 0.002828, "loss": 2.208, "step": 475904 }, { "epoch": 35.719924953095685, "grad_norm": 918.6317138671875, "learning_rate": 0.002828, "loss": 1.388, "step": 475968 }, { "epoch": 35.724727954971854, "grad_norm": 1.1493455171585083, "learning_rate": 0.002828, "loss": 1.2846, "step": 476032 }, { "epoch": 35.72953095684803, "grad_norm": 0.8047353625297546, "learning_rate": 0.002828, "loss": 1.1635, "step": 476096 }, { "epoch": 35.7343339587242, "grad_norm": 0.9038922190666199, "learning_rate": 0.002828, "loss": 1.1629, "step": 476160 }, { "epoch": 35.739136960600376, "grad_norm": 0.8342848420143127, "learning_rate": 0.002828, "loss": 1.1686, "step": 476224 }, { "epoch": 35.743939962476546, "grad_norm": 1.1942050457000732, "learning_rate": 0.002828, "loss": 1.1753, "step": 476288 }, { "epoch": 35.74874296435272, "grad_norm": 0.6982195377349854, "learning_rate": 0.002828, "loss": 1.1598, "step": 476352 }, { "epoch": 35.75354596622889, "grad_norm": 0.6133076548576355, "learning_rate": 0.002828, "loss": 1.1525, "step": 476416 }, { "epoch": 35.75834896810507, "grad_norm": 0.6019357442855835, "learning_rate": 0.002828, "loss": 1.1508, "step": 476480 }, { "epoch": 35.76315196998124, "grad_norm": 0.6552913188934326, "learning_rate": 0.002828, "loss": 1.1447, "step": 476544 }, { "epoch": 35.767954971857414, "grad_norm": 0.6798120737075806, "learning_rate": 0.002828, "loss": 1.1493, "step": 476608 }, { "epoch": 35.77275797373358, "grad_norm": 0.6229259967803955, "learning_rate": 0.002828, "loss": 1.1458, "step": 476672 }, { "epoch": 35.77756097560976, "grad_norm": 0.5518473386764526, "learning_rate": 0.002828, "loss": 1.1417, "step": 476736 }, { "epoch": 35.78236397748593, "grad_norm": 0.581292986869812, "learning_rate": 0.002828, "loss": 1.1444, "step": 476800 }, { "epoch": 35.7871669793621, "grad_norm": 0.6113260388374329, "learning_rate": 0.002828, "loss": 1.141, "step": 476864 }, { "epoch": 35.791969981238275, "grad_norm": 0.7037771940231323, "learning_rate": 0.002828, "loss": 1.1447, "step": 476928 }, { "epoch": 35.796772983114444, "grad_norm": 0.6758065223693848, "learning_rate": 0.002828, "loss": 1.1448, "step": 476992 }, { "epoch": 35.80157598499062, "grad_norm": 0.5714001655578613, "learning_rate": 0.002828, "loss": 1.1448, "step": 477056 }, { "epoch": 35.80637898686679, "grad_norm": 0.5781056880950928, "learning_rate": 0.002828, "loss": 1.1459, "step": 477120 }, { "epoch": 35.811181988742966, "grad_norm": 0.6759575605392456, "learning_rate": 0.002828, "loss": 1.1475, "step": 477184 }, { "epoch": 35.815984990619135, "grad_norm": 0.5613266825675964, "learning_rate": 0.002828, "loss": 1.1441, "step": 477248 }, { "epoch": 35.82078799249531, "grad_norm": 0.7072948813438416, "learning_rate": 0.002828, "loss": 1.1454, "step": 477312 }, { "epoch": 35.82559099437148, "grad_norm": 0.6761579513549805, "learning_rate": 0.002828, "loss": 1.1443, "step": 477376 }, { "epoch": 35.83039399624766, "grad_norm": 0.6502017974853516, "learning_rate": 0.002828, "loss": 1.1464, "step": 477440 }, { "epoch": 35.83519699812383, "grad_norm": 0.6976175904273987, "learning_rate": 0.002828, "loss": 1.1447, "step": 477504 }, { "epoch": 35.84, "grad_norm": 0.6039485335350037, "learning_rate": 0.002828, "loss": 1.1422, "step": 477568 }, { "epoch": 35.84480300187617, "grad_norm": 0.670085608959198, "learning_rate": 0.002828, "loss": 1.1412, "step": 477632 }, { "epoch": 35.84960600375234, "grad_norm": 0.6866872906684875, "learning_rate": 0.002828, "loss": 1.1491, "step": 477696 }, { "epoch": 35.85440900562852, "grad_norm": 0.6738069653511047, "learning_rate": 0.002828, "loss": 1.1477, "step": 477760 }, { "epoch": 35.85921200750469, "grad_norm": 0.597515344619751, "learning_rate": 0.002828, "loss": 1.1473, "step": 477824 }, { "epoch": 35.864015009380864, "grad_norm": 0.6964823603630066, "learning_rate": 0.002828, "loss": 1.1397, "step": 477888 }, { "epoch": 35.868818011257034, "grad_norm": 0.5846383571624756, "learning_rate": 0.002828, "loss": 1.1452, "step": 477952 }, { "epoch": 35.87362101313321, "grad_norm": 0.7641522884368896, "learning_rate": 0.002828, "loss": 1.1456, "step": 478016 }, { "epoch": 35.87842401500938, "grad_norm": 0.6246045231819153, "learning_rate": 0.002828, "loss": 1.1456, "step": 478080 }, { "epoch": 35.883227016885556, "grad_norm": 0.5942494869232178, "learning_rate": 0.002828, "loss": 1.1473, "step": 478144 }, { "epoch": 35.888030018761725, "grad_norm": 0.6786660552024841, "learning_rate": 0.002828, "loss": 1.1421, "step": 478208 }, { "epoch": 35.8928330206379, "grad_norm": 0.612209677696228, "learning_rate": 0.002828, "loss": 1.1488, "step": 478272 }, { "epoch": 35.89763602251407, "grad_norm": 0.7059167623519897, "learning_rate": 0.002828, "loss": 1.1428, "step": 478336 }, { "epoch": 35.90243902439025, "grad_norm": 0.48968270421028137, "learning_rate": 0.002828, "loss": 1.1393, "step": 478400 }, { "epoch": 35.90724202626642, "grad_norm": 0.5701474547386169, "learning_rate": 0.002828, "loss": 1.1466, "step": 478464 }, { "epoch": 35.912045028142586, "grad_norm": 0.6382841467857361, "learning_rate": 0.002828, "loss": 1.1427, "step": 478528 }, { "epoch": 35.91684803001876, "grad_norm": 0.7465910911560059, "learning_rate": 0.002828, "loss": 1.1348, "step": 478592 }, { "epoch": 35.92165103189493, "grad_norm": 0.6528146266937256, "learning_rate": 0.002828, "loss": 1.1522, "step": 478656 }, { "epoch": 35.92645403377111, "grad_norm": 0.577727198600769, "learning_rate": 0.002828, "loss": 1.1452, "step": 478720 }, { "epoch": 35.93125703564728, "grad_norm": 0.5171624422073364, "learning_rate": 0.002828, "loss": 1.1414, "step": 478784 }, { "epoch": 35.936060037523454, "grad_norm": 0.6009478569030762, "learning_rate": 0.002828, "loss": 1.1455, "step": 478848 }, { "epoch": 35.94086303939962, "grad_norm": 0.7087094783782959, "learning_rate": 0.002828, "loss": 1.142, "step": 478912 }, { "epoch": 35.9456660412758, "grad_norm": 0.5902978777885437, "learning_rate": 0.002828, "loss": 1.1488, "step": 478976 }, { "epoch": 35.95046904315197, "grad_norm": 0.6435022950172424, "learning_rate": 0.002828, "loss": 1.1403, "step": 479040 }, { "epoch": 35.955272045028146, "grad_norm": 0.5205735564231873, "learning_rate": 0.002828, "loss": 1.1398, "step": 479104 }, { "epoch": 35.960075046904315, "grad_norm": 0.5627501010894775, "learning_rate": 0.002828, "loss": 1.1452, "step": 479168 }, { "epoch": 35.96487804878049, "grad_norm": 0.5993406176567078, "learning_rate": 0.002828, "loss": 1.1441, "step": 479232 }, { "epoch": 35.96968105065666, "grad_norm": 0.6258234977722168, "learning_rate": 0.002828, "loss": 1.1438, "step": 479296 }, { "epoch": 35.97448405253283, "grad_norm": 0.7097495794296265, "learning_rate": 0.002828, "loss": 1.1469, "step": 479360 }, { "epoch": 35.979287054409006, "grad_norm": 0.5994864106178284, "learning_rate": 0.002828, "loss": 1.1457, "step": 479424 }, { "epoch": 35.984090056285176, "grad_norm": 0.7201527953147888, "learning_rate": 0.002828, "loss": 1.1439, "step": 479488 }, { "epoch": 35.98889305816135, "grad_norm": 0.5710475444793701, "learning_rate": 0.002828, "loss": 1.1458, "step": 479552 }, { "epoch": 35.99369606003752, "grad_norm": 0.7653760313987732, "learning_rate": 0.002828, "loss": 1.1467, "step": 479616 }, { "epoch": 35.9984990619137, "grad_norm": 0.7502647638320923, "learning_rate": 0.002828, "loss": 1.1445, "step": 479680 }, { "epoch": 36.00330206378987, "grad_norm": 0.6153002977371216, "learning_rate": 0.002828, "loss": 1.1176, "step": 479744 }, { "epoch": 36.008105065666044, "grad_norm": 0.733729362487793, "learning_rate": 0.002828, "loss": 1.1042, "step": 479808 }, { "epoch": 36.01290806754221, "grad_norm": 0.5874631404876709, "learning_rate": 0.002828, "loss": 1.1063, "step": 479872 }, { "epoch": 36.01771106941839, "grad_norm": 0.6294484734535217, "learning_rate": 0.002828, "loss": 1.1132, "step": 479936 }, { "epoch": 36.02251407129456, "grad_norm": 0.8180893063545227, "learning_rate": 0.002828, "loss": 1.108, "step": 480000 }, { "epoch": 36.02731707317073, "grad_norm": 0.6071234941482544, "learning_rate": 0.002828, "loss": 1.1157, "step": 480064 }, { "epoch": 36.032120075046905, "grad_norm": 0.8069936633110046, "learning_rate": 0.002828, "loss": 1.1082, "step": 480128 }, { "epoch": 36.036923076923074, "grad_norm": 0.6223371028900146, "learning_rate": 0.002828, "loss": 1.109, "step": 480192 }, { "epoch": 36.04172607879925, "grad_norm": 0.6613013744354248, "learning_rate": 0.002828, "loss": 1.1019, "step": 480256 }, { "epoch": 36.04652908067542, "grad_norm": 0.5982505083084106, "learning_rate": 0.002828, "loss": 1.1134, "step": 480320 }, { "epoch": 36.051332082551596, "grad_norm": 0.5876198410987854, "learning_rate": 0.002828, "loss": 1.1139, "step": 480384 }, { "epoch": 36.056135084427765, "grad_norm": 0.5883331894874573, "learning_rate": 0.002828, "loss": 1.1104, "step": 480448 }, { "epoch": 36.06093808630394, "grad_norm": 0.6238200664520264, "learning_rate": 0.002828, "loss": 1.1122, "step": 480512 }, { "epoch": 36.06574108818011, "grad_norm": 0.6554744839668274, "learning_rate": 0.002828, "loss": 1.11, "step": 480576 }, { "epoch": 36.07054409005629, "grad_norm": 0.7005370855331421, "learning_rate": 0.002828, "loss": 1.1063, "step": 480640 }, { "epoch": 36.07534709193246, "grad_norm": 0.5803971886634827, "learning_rate": 0.002828, "loss": 1.1154, "step": 480704 }, { "epoch": 36.08015009380863, "grad_norm": 0.7155123353004456, "learning_rate": 0.002828, "loss": 1.112, "step": 480768 }, { "epoch": 36.0849530956848, "grad_norm": 0.605573832988739, "learning_rate": 0.002828, "loss": 1.1157, "step": 480832 }, { "epoch": 36.08975609756097, "grad_norm": 0.587078332901001, "learning_rate": 0.002828, "loss": 1.1125, "step": 480896 }, { "epoch": 36.09455909943715, "grad_norm": 0.6320798993110657, "learning_rate": 0.002828, "loss": 1.1114, "step": 480960 }, { "epoch": 36.09936210131332, "grad_norm": 0.6711087822914124, "learning_rate": 0.002828, "loss": 1.1164, "step": 481024 }, { "epoch": 36.104165103189494, "grad_norm": 0.721170961856842, "learning_rate": 0.002828, "loss": 1.112, "step": 481088 }, { "epoch": 36.108968105065664, "grad_norm": 0.5629286170005798, "learning_rate": 0.002828, "loss": 1.1185, "step": 481152 }, { "epoch": 36.11377110694184, "grad_norm": 0.5569593906402588, "learning_rate": 0.002828, "loss": 1.1147, "step": 481216 }, { "epoch": 36.11857410881801, "grad_norm": 0.636684000492096, "learning_rate": 0.002828, "loss": 1.1149, "step": 481280 }, { "epoch": 36.123377110694186, "grad_norm": 0.5977029204368591, "learning_rate": 0.002828, "loss": 1.1142, "step": 481344 }, { "epoch": 36.128180112570355, "grad_norm": 0.6242080330848694, "learning_rate": 0.002828, "loss": 1.1133, "step": 481408 }, { "epoch": 36.13298311444653, "grad_norm": 0.6300401091575623, "learning_rate": 0.002828, "loss": 1.1175, "step": 481472 }, { "epoch": 36.1377861163227, "grad_norm": 0.6650278568267822, "learning_rate": 0.002828, "loss": 1.1174, "step": 481536 }, { "epoch": 36.14258911819888, "grad_norm": 0.5920589566230774, "learning_rate": 0.002828, "loss": 1.118, "step": 481600 }, { "epoch": 36.14739212007505, "grad_norm": 0.623343288898468, "learning_rate": 0.002828, "loss": 1.1124, "step": 481664 }, { "epoch": 36.152195121951216, "grad_norm": 0.7111536860466003, "learning_rate": 0.002828, "loss": 1.1205, "step": 481728 }, { "epoch": 36.15699812382739, "grad_norm": 0.6605375409126282, "learning_rate": 0.002828, "loss": 1.1181, "step": 481792 }, { "epoch": 36.16180112570356, "grad_norm": 0.6241599321365356, "learning_rate": 0.002828, "loss": 1.1124, "step": 481856 }, { "epoch": 36.16660412757974, "grad_norm": 0.5739508271217346, "learning_rate": 0.002828, "loss": 1.1213, "step": 481920 }, { "epoch": 36.17140712945591, "grad_norm": 0.5860132575035095, "learning_rate": 0.002828, "loss": 1.114, "step": 481984 }, { "epoch": 36.176210131332084, "grad_norm": 0.6558253169059753, "learning_rate": 0.002828, "loss": 1.1196, "step": 482048 }, { "epoch": 36.18101313320825, "grad_norm": 0.6566668152809143, "learning_rate": 0.002828, "loss": 1.1229, "step": 482112 }, { "epoch": 36.18581613508443, "grad_norm": 0.6323477625846863, "learning_rate": 0.002828, "loss": 1.113, "step": 482176 }, { "epoch": 36.1906191369606, "grad_norm": 0.6525058746337891, "learning_rate": 0.002828, "loss": 1.1159, "step": 482240 }, { "epoch": 36.195422138836776, "grad_norm": 0.6623163223266602, "learning_rate": 0.002828, "loss": 1.1177, "step": 482304 }, { "epoch": 36.200225140712945, "grad_norm": 0.683592677116394, "learning_rate": 0.002828, "loss": 1.1155, "step": 482368 }, { "epoch": 36.20502814258912, "grad_norm": 0.7265550494194031, "learning_rate": 0.002828, "loss": 1.1182, "step": 482432 }, { "epoch": 36.20983114446529, "grad_norm": 0.5851932168006897, "learning_rate": 0.002828, "loss": 1.1199, "step": 482496 }, { "epoch": 36.21463414634146, "grad_norm": 0.6824109554290771, "learning_rate": 0.002828, "loss": 1.1244, "step": 482560 }, { "epoch": 36.21943714821764, "grad_norm": 0.6103503704071045, "learning_rate": 0.002828, "loss": 1.1169, "step": 482624 }, { "epoch": 36.224240150093806, "grad_norm": 0.5993927717208862, "learning_rate": 0.002828, "loss": 1.1143, "step": 482688 }, { "epoch": 36.22904315196998, "grad_norm": 0.601505696773529, "learning_rate": 0.002828, "loss": 1.1209, "step": 482752 }, { "epoch": 36.23384615384615, "grad_norm": 0.6602485179901123, "learning_rate": 0.002828, "loss": 1.1153, "step": 482816 }, { "epoch": 36.23864915572233, "grad_norm": 0.7924669981002808, "learning_rate": 0.002828, "loss": 1.1218, "step": 482880 }, { "epoch": 36.2434521575985, "grad_norm": 0.6222769021987915, "learning_rate": 0.002828, "loss": 1.1201, "step": 482944 }, { "epoch": 36.248255159474674, "grad_norm": 0.5969858765602112, "learning_rate": 0.002828, "loss": 1.1201, "step": 483008 }, { "epoch": 36.25305816135084, "grad_norm": 0.6420055031776428, "learning_rate": 0.002828, "loss": 1.1219, "step": 483072 }, { "epoch": 36.25786116322702, "grad_norm": 0.6040505170822144, "learning_rate": 0.002828, "loss": 1.1151, "step": 483136 }, { "epoch": 36.26266416510319, "grad_norm": 0.5828750133514404, "learning_rate": 0.002828, "loss": 1.1249, "step": 483200 }, { "epoch": 36.267467166979365, "grad_norm": 0.5604116320610046, "learning_rate": 0.002828, "loss": 1.1217, "step": 483264 }, { "epoch": 36.272270168855535, "grad_norm": 0.7253416776657104, "learning_rate": 0.002828, "loss": 1.1243, "step": 483328 }, { "epoch": 36.277073170731704, "grad_norm": 0.538339376449585, "learning_rate": 0.002828, "loss": 1.1237, "step": 483392 }, { "epoch": 36.28187617260788, "grad_norm": 0.6550213098526001, "learning_rate": 0.002828, "loss": 1.1175, "step": 483456 }, { "epoch": 36.28667917448405, "grad_norm": 0.6307083964347839, "learning_rate": 0.002828, "loss": 1.1153, "step": 483520 }, { "epoch": 36.291482176360226, "grad_norm": 0.7109634280204773, "learning_rate": 0.002828, "loss": 1.1255, "step": 483584 }, { "epoch": 36.296285178236396, "grad_norm": 0.560472309589386, "learning_rate": 0.002828, "loss": 1.1171, "step": 483648 }, { "epoch": 36.30108818011257, "grad_norm": 0.5119628310203552, "learning_rate": 0.002828, "loss": 1.1189, "step": 483712 }, { "epoch": 36.30589118198874, "grad_norm": 0.66200852394104, "learning_rate": 0.002828, "loss": 1.1222, "step": 483776 }, { "epoch": 36.31069418386492, "grad_norm": 0.6191007494926453, "learning_rate": 0.002828, "loss": 1.1173, "step": 483840 }, { "epoch": 36.31549718574109, "grad_norm": 0.7778931260108948, "learning_rate": 0.002828, "loss": 1.1213, "step": 483904 }, { "epoch": 36.32030018761726, "grad_norm": 0.6234067678451538, "learning_rate": 0.002828, "loss": 1.1219, "step": 483968 }, { "epoch": 36.32510318949343, "grad_norm": 0.5811113119125366, "learning_rate": 0.002828, "loss": 1.1212, "step": 484032 }, { "epoch": 36.32990619136961, "grad_norm": 0.5138593912124634, "learning_rate": 0.002828, "loss": 1.125, "step": 484096 }, { "epoch": 36.33470919324578, "grad_norm": 0.5733650922775269, "learning_rate": 0.002828, "loss": 1.127, "step": 484160 }, { "epoch": 36.33951219512195, "grad_norm": 0.634680449962616, "learning_rate": 0.002828, "loss": 1.1238, "step": 484224 }, { "epoch": 36.344315196998124, "grad_norm": 0.6485024690628052, "learning_rate": 0.002828, "loss": 1.1217, "step": 484288 }, { "epoch": 36.349118198874294, "grad_norm": 0.6225195527076721, "learning_rate": 0.002828, "loss": 1.1254, "step": 484352 }, { "epoch": 36.35392120075047, "grad_norm": 0.6214507222175598, "learning_rate": 0.002828, "loss": 1.123, "step": 484416 }, { "epoch": 36.35872420262664, "grad_norm": 0.49921342730522156, "learning_rate": 0.002828, "loss": 1.1211, "step": 484480 }, { "epoch": 36.363527204502816, "grad_norm": 0.6985149383544922, "learning_rate": 0.002828, "loss": 1.1162, "step": 484544 }, { "epoch": 36.368330206378985, "grad_norm": 0.7828275561332703, "learning_rate": 0.002828, "loss": 1.132, "step": 484608 }, { "epoch": 36.37313320825516, "grad_norm": 0.6335474252700806, "learning_rate": 0.002828, "loss": 1.1208, "step": 484672 }, { "epoch": 36.37793621013133, "grad_norm": 0.6620512008666992, "learning_rate": 0.002828, "loss": 1.1188, "step": 484736 }, { "epoch": 36.38273921200751, "grad_norm": 0.48815280199050903, "learning_rate": 0.002828, "loss": 1.1223, "step": 484800 }, { "epoch": 36.38754221388368, "grad_norm": 0.6472222208976746, "learning_rate": 0.002828, "loss": 1.1208, "step": 484864 }, { "epoch": 36.39234521575985, "grad_norm": 0.5891759991645813, "learning_rate": 0.002828, "loss": 1.1215, "step": 484928 }, { "epoch": 36.39714821763602, "grad_norm": 0.7245331406593323, "learning_rate": 0.002828, "loss": 1.123, "step": 484992 }, { "epoch": 36.40195121951219, "grad_norm": 0.6620111465454102, "learning_rate": 0.002828, "loss": 1.1267, "step": 485056 }, { "epoch": 36.40675422138837, "grad_norm": 0.7744014263153076, "learning_rate": 0.002828, "loss": 1.1178, "step": 485120 }, { "epoch": 36.41155722326454, "grad_norm": 0.5176191329956055, "learning_rate": 0.002828, "loss": 1.1264, "step": 485184 }, { "epoch": 36.416360225140714, "grad_norm": 0.7624377608299255, "learning_rate": 0.002828, "loss": 1.1319, "step": 485248 }, { "epoch": 36.42116322701688, "grad_norm": 0.5986029505729675, "learning_rate": 0.002828, "loss": 1.118, "step": 485312 }, { "epoch": 36.42596622889306, "grad_norm": 0.5113912224769592, "learning_rate": 0.002828, "loss": 1.1189, "step": 485376 }, { "epoch": 36.43076923076923, "grad_norm": 0.6814943552017212, "learning_rate": 0.002828, "loss": 1.1253, "step": 485440 }, { "epoch": 36.435572232645406, "grad_norm": 0.682895839214325, "learning_rate": 0.002828, "loss": 1.125, "step": 485504 }, { "epoch": 36.440375234521575, "grad_norm": 0.5333254337310791, "learning_rate": 0.002828, "loss": 1.13, "step": 485568 }, { "epoch": 36.44517823639775, "grad_norm": 0.6200302839279175, "learning_rate": 0.002828, "loss": 1.1251, "step": 485632 }, { "epoch": 36.44998123827392, "grad_norm": 0.5972156524658203, "learning_rate": 0.002828, "loss": 1.1321, "step": 485696 }, { "epoch": 36.4547842401501, "grad_norm": 0.60141521692276, "learning_rate": 0.002828, "loss": 1.1285, "step": 485760 }, { "epoch": 36.45958724202627, "grad_norm": 0.47839000821113586, "learning_rate": 0.002828, "loss": 1.1252, "step": 485824 }, { "epoch": 36.464390243902436, "grad_norm": 0.6346657276153564, "learning_rate": 0.002828, "loss": 1.1298, "step": 485888 }, { "epoch": 36.46919324577861, "grad_norm": 0.6371756792068481, "learning_rate": 0.002828, "loss": 1.1202, "step": 485952 }, { "epoch": 36.47399624765478, "grad_norm": 0.5818215608596802, "learning_rate": 0.002828, "loss": 1.122, "step": 486016 }, { "epoch": 36.47879924953096, "grad_norm": 0.6121278405189514, "learning_rate": 0.002828, "loss": 1.1273, "step": 486080 }, { "epoch": 36.48360225140713, "grad_norm": 0.5700222849845886, "learning_rate": 0.002828, "loss": 1.1233, "step": 486144 }, { "epoch": 36.488405253283304, "grad_norm": 0.7344767451286316, "learning_rate": 0.002828, "loss": 1.1247, "step": 486208 }, { "epoch": 36.49320825515947, "grad_norm": 0.657575786113739, "learning_rate": 0.002828, "loss": 1.1269, "step": 486272 }, { "epoch": 36.49801125703565, "grad_norm": 0.574347972869873, "learning_rate": 0.002828, "loss": 1.1265, "step": 486336 }, { "epoch": 36.50281425891182, "grad_norm": 0.6713154911994934, "learning_rate": 0.002828, "loss": 1.1197, "step": 486400 }, { "epoch": 36.507617260787995, "grad_norm": 0.5219811201095581, "learning_rate": 0.002828, "loss": 1.1242, "step": 486464 }, { "epoch": 36.512420262664165, "grad_norm": 0.6195260286331177, "learning_rate": 0.002828, "loss": 1.1247, "step": 486528 }, { "epoch": 36.51722326454034, "grad_norm": 0.560436487197876, "learning_rate": 0.002828, "loss": 1.1284, "step": 486592 }, { "epoch": 36.52202626641651, "grad_norm": 0.6903839111328125, "learning_rate": 0.002828, "loss": 1.1195, "step": 486656 }, { "epoch": 36.52682926829268, "grad_norm": 0.606224536895752, "learning_rate": 0.002828, "loss": 1.1243, "step": 486720 }, { "epoch": 36.531632270168856, "grad_norm": 0.5438897609710693, "learning_rate": 0.002828, "loss": 1.1307, "step": 486784 }, { "epoch": 36.536435272045026, "grad_norm": 0.6882777214050293, "learning_rate": 0.002828, "loss": 1.1291, "step": 486848 }, { "epoch": 36.5412382739212, "grad_norm": 0.6011608839035034, "learning_rate": 0.002828, "loss": 1.1219, "step": 486912 }, { "epoch": 36.54604127579737, "grad_norm": 0.6009193658828735, "learning_rate": 0.002828, "loss": 1.1277, "step": 486976 }, { "epoch": 36.55084427767355, "grad_norm": 0.6801005601882935, "learning_rate": 0.002828, "loss": 1.1304, "step": 487040 }, { "epoch": 36.55564727954972, "grad_norm": 0.6301913261413574, "learning_rate": 0.002828, "loss": 1.1281, "step": 487104 }, { "epoch": 36.560450281425894, "grad_norm": 0.5682470798492432, "learning_rate": 0.002828, "loss": 1.1217, "step": 487168 }, { "epoch": 36.56525328330206, "grad_norm": 0.6990990042686462, "learning_rate": 0.002828, "loss": 1.1291, "step": 487232 }, { "epoch": 36.57005628517824, "grad_norm": 0.6065164804458618, "learning_rate": 0.002828, "loss": 1.1249, "step": 487296 }, { "epoch": 36.57485928705441, "grad_norm": 0.6101195216178894, "learning_rate": 0.002828, "loss": 1.128, "step": 487360 }, { "epoch": 36.57966228893058, "grad_norm": 0.6691819429397583, "learning_rate": 0.002828, "loss": 1.1252, "step": 487424 }, { "epoch": 36.584465290806754, "grad_norm": 0.7639368772506714, "learning_rate": 0.002828, "loss": 1.1233, "step": 487488 }, { "epoch": 36.589268292682924, "grad_norm": 0.8708202242851257, "learning_rate": 0.002828, "loss": 1.1313, "step": 487552 }, { "epoch": 36.5940712945591, "grad_norm": 0.5969586968421936, "learning_rate": 0.002828, "loss": 1.1267, "step": 487616 }, { "epoch": 36.59887429643527, "grad_norm": 0.542630672454834, "learning_rate": 0.002828, "loss": 1.1269, "step": 487680 }, { "epoch": 36.603677298311446, "grad_norm": 0.5667511224746704, "learning_rate": 0.002828, "loss": 1.1309, "step": 487744 }, { "epoch": 36.608480300187615, "grad_norm": 0.6424358487129211, "learning_rate": 0.002828, "loss": 1.1264, "step": 487808 }, { "epoch": 36.61328330206379, "grad_norm": 0.5762342810630798, "learning_rate": 0.002828, "loss": 1.1292, "step": 487872 }, { "epoch": 36.61808630393996, "grad_norm": 0.5913277268409729, "learning_rate": 0.002828, "loss": 1.1322, "step": 487936 }, { "epoch": 36.62288930581614, "grad_norm": 0.7270256280899048, "learning_rate": 0.002828, "loss": 1.1356, "step": 488000 }, { "epoch": 36.62769230769231, "grad_norm": 0.7120741605758667, "learning_rate": 0.002828, "loss": 1.1286, "step": 488064 }, { "epoch": 36.63249530956848, "grad_norm": 0.5873661637306213, "learning_rate": 0.002828, "loss": 1.1281, "step": 488128 }, { "epoch": 36.63729831144465, "grad_norm": 0.5965089797973633, "learning_rate": 0.002828, "loss": 1.1288, "step": 488192 }, { "epoch": 36.64210131332082, "grad_norm": 0.5685919523239136, "learning_rate": 0.002828, "loss": 1.1266, "step": 488256 }, { "epoch": 36.646904315197, "grad_norm": 0.5000462532043457, "learning_rate": 0.002828, "loss": 1.1342, "step": 488320 }, { "epoch": 36.65170731707317, "grad_norm": 0.6275484561920166, "learning_rate": 0.002828, "loss": 1.13, "step": 488384 }, { "epoch": 36.656510318949344, "grad_norm": 0.6414892077445984, "learning_rate": 0.002828, "loss": 1.1309, "step": 488448 }, { "epoch": 36.66131332082551, "grad_norm": 0.632480800151825, "learning_rate": 0.002828, "loss": 1.1315, "step": 488512 }, { "epoch": 36.66611632270169, "grad_norm": 0.6146697998046875, "learning_rate": 0.002828, "loss": 1.1311, "step": 488576 }, { "epoch": 36.67091932457786, "grad_norm": 0.6127843856811523, "learning_rate": 0.002828, "loss": 1.1307, "step": 488640 }, { "epoch": 36.675722326454036, "grad_norm": 0.6762889623641968, "learning_rate": 0.002828, "loss": 1.128, "step": 488704 }, { "epoch": 36.680525328330205, "grad_norm": 0.5495359301567078, "learning_rate": 0.002828, "loss": 1.1334, "step": 488768 }, { "epoch": 36.68532833020638, "grad_norm": 0.5311427116394043, "learning_rate": 0.002828, "loss": 1.1291, "step": 488832 }, { "epoch": 36.69013133208255, "grad_norm": 0.6519775986671448, "learning_rate": 0.002828, "loss": 1.1324, "step": 488896 }, { "epoch": 36.69493433395873, "grad_norm": 0.5537697076797485, "learning_rate": 0.002828, "loss": 1.1287, "step": 488960 }, { "epoch": 36.6997373358349, "grad_norm": 0.6321004629135132, "learning_rate": 0.002828, "loss": 1.1281, "step": 489024 }, { "epoch": 36.704540337711066, "grad_norm": 0.583900511264801, "learning_rate": 0.002828, "loss": 1.123, "step": 489088 }, { "epoch": 36.70934333958724, "grad_norm": 0.6706740856170654, "learning_rate": 0.002828, "loss": 1.1321, "step": 489152 }, { "epoch": 36.71414634146341, "grad_norm": 0.5570067763328552, "learning_rate": 0.002828, "loss": 1.132, "step": 489216 }, { "epoch": 36.71894934333959, "grad_norm": 0.747728705406189, "learning_rate": 0.002828, "loss": 1.1271, "step": 489280 }, { "epoch": 36.72375234521576, "grad_norm": 0.6183828115463257, "learning_rate": 0.002828, "loss": 1.1283, "step": 489344 }, { "epoch": 36.728555347091934, "grad_norm": 0.7345849871635437, "learning_rate": 0.002828, "loss": 1.1314, "step": 489408 }, { "epoch": 36.7333583489681, "grad_norm": 0.6629622578620911, "learning_rate": 0.002828, "loss": 1.1324, "step": 489472 }, { "epoch": 36.73816135084428, "grad_norm": 0.5575123429298401, "learning_rate": 0.002828, "loss": 1.131, "step": 489536 }, { "epoch": 36.74296435272045, "grad_norm": 0.5553168654441833, "learning_rate": 0.002828, "loss": 1.1322, "step": 489600 }, { "epoch": 36.747767354596625, "grad_norm": 0.7154178023338318, "learning_rate": 0.002828, "loss": 1.1329, "step": 489664 }, { "epoch": 36.752570356472795, "grad_norm": 0.6160262823104858, "learning_rate": 0.002828, "loss": 1.1305, "step": 489728 }, { "epoch": 36.75737335834897, "grad_norm": 0.577056348323822, "learning_rate": 0.002828, "loss": 1.1311, "step": 489792 }, { "epoch": 36.76217636022514, "grad_norm": 0.5922368168830872, "learning_rate": 0.002828, "loss": 1.1301, "step": 489856 }, { "epoch": 36.76697936210131, "grad_norm": 0.603999674320221, "learning_rate": 0.002828, "loss": 1.1282, "step": 489920 }, { "epoch": 36.771782363977486, "grad_norm": 0.5781303644180298, "learning_rate": 0.002828, "loss": 1.13, "step": 489984 }, { "epoch": 36.776585365853656, "grad_norm": 0.6862877607345581, "learning_rate": 0.002828, "loss": 1.1338, "step": 490048 }, { "epoch": 36.78138836772983, "grad_norm": 0.5813214182853699, "learning_rate": 0.002828, "loss": 1.1285, "step": 490112 }, { "epoch": 36.786191369606, "grad_norm": 0.6811426877975464, "learning_rate": 0.002828, "loss": 1.132, "step": 490176 }, { "epoch": 36.79099437148218, "grad_norm": 0.5649698376655579, "learning_rate": 0.002828, "loss": 1.1298, "step": 490240 }, { "epoch": 36.79579737335835, "grad_norm": 0.49407100677490234, "learning_rate": 0.002828, "loss": 1.1315, "step": 490304 }, { "epoch": 36.800600375234524, "grad_norm": 0.6909792423248291, "learning_rate": 0.002828, "loss": 1.1343, "step": 490368 }, { "epoch": 36.80540337711069, "grad_norm": 0.5602936744689941, "learning_rate": 0.002828, "loss": 1.1293, "step": 490432 }, { "epoch": 36.81020637898687, "grad_norm": 0.7587369680404663, "learning_rate": 0.002828, "loss": 1.1348, "step": 490496 }, { "epoch": 36.81500938086304, "grad_norm": 0.5359601378440857, "learning_rate": 0.002828, "loss": 1.1338, "step": 490560 }, { "epoch": 36.819812382739215, "grad_norm": 0.671832263469696, "learning_rate": 0.002828, "loss": 1.1335, "step": 490624 }, { "epoch": 36.824615384615385, "grad_norm": 0.5710498690605164, "learning_rate": 0.002828, "loss": 1.1339, "step": 490688 }, { "epoch": 36.829418386491554, "grad_norm": 0.6783512234687805, "learning_rate": 0.002828, "loss": 1.132, "step": 490752 }, { "epoch": 36.83422138836773, "grad_norm": 0.5253806114196777, "learning_rate": 0.002828, "loss": 1.1286, "step": 490816 }, { "epoch": 36.8390243902439, "grad_norm": 0.6593382954597473, "learning_rate": 0.002828, "loss": 1.1347, "step": 490880 }, { "epoch": 36.843827392120076, "grad_norm": 0.6432192325592041, "learning_rate": 0.002828, "loss": 1.1328, "step": 490944 }, { "epoch": 36.848630393996245, "grad_norm": 0.5609253644943237, "learning_rate": 0.002828, "loss": 1.1332, "step": 491008 }, { "epoch": 36.85343339587242, "grad_norm": 0.5702568292617798, "learning_rate": 0.002828, "loss": 1.1332, "step": 491072 }, { "epoch": 36.85823639774859, "grad_norm": 0.5661723613739014, "learning_rate": 0.002828, "loss": 1.1316, "step": 491136 }, { "epoch": 36.86303939962477, "grad_norm": 0.48676881194114685, "learning_rate": 0.002828, "loss": 1.1342, "step": 491200 }, { "epoch": 36.86784240150094, "grad_norm": 0.5914056897163391, "learning_rate": 0.002828, "loss": 1.1321, "step": 491264 }, { "epoch": 36.87264540337711, "grad_norm": 0.6446478962898254, "learning_rate": 0.002828, "loss": 1.128, "step": 491328 }, { "epoch": 36.87744840525328, "grad_norm": 0.6042805314064026, "learning_rate": 0.002828, "loss": 1.1299, "step": 491392 }, { "epoch": 36.88225140712946, "grad_norm": 0.5406030416488647, "learning_rate": 0.002828, "loss": 1.1354, "step": 491456 }, { "epoch": 36.88705440900563, "grad_norm": 0.5631481409072876, "learning_rate": 0.002828, "loss": 1.1374, "step": 491520 }, { "epoch": 36.8918574108818, "grad_norm": 0.6143375039100647, "learning_rate": 0.002828, "loss": 1.1303, "step": 491584 }, { "epoch": 36.896660412757974, "grad_norm": 0.5791195631027222, "learning_rate": 0.002828, "loss": 1.1346, "step": 491648 }, { "epoch": 36.901463414634144, "grad_norm": 0.6553552746772766, "learning_rate": 0.002828, "loss": 1.134, "step": 491712 }, { "epoch": 36.90626641651032, "grad_norm": 0.8238469958305359, "learning_rate": 0.002828, "loss": 1.1335, "step": 491776 }, { "epoch": 36.91106941838649, "grad_norm": 0.7293280959129333, "learning_rate": 0.002828, "loss": 1.1338, "step": 491840 }, { "epoch": 36.915872420262666, "grad_norm": 0.5596233010292053, "learning_rate": 0.002828, "loss": 1.1318, "step": 491904 }, { "epoch": 36.920675422138835, "grad_norm": 0.7237329483032227, "learning_rate": 0.002828, "loss": 1.1327, "step": 491968 }, { "epoch": 36.92547842401501, "grad_norm": 0.5710912346839905, "learning_rate": 0.002828, "loss": 1.1365, "step": 492032 }, { "epoch": 36.93028142589118, "grad_norm": 0.7024117708206177, "learning_rate": 0.002828, "loss": 1.1295, "step": 492096 }, { "epoch": 36.93508442776736, "grad_norm": 0.629672110080719, "learning_rate": 0.002828, "loss": 1.1367, "step": 492160 }, { "epoch": 36.93988742964353, "grad_norm": 0.7030770182609558, "learning_rate": 0.002828, "loss": 1.1313, "step": 492224 }, { "epoch": 36.9446904315197, "grad_norm": 0.6092023253440857, "learning_rate": 0.002828, "loss": 1.1352, "step": 492288 }, { "epoch": 36.94949343339587, "grad_norm": 0.5727062225341797, "learning_rate": 0.002828, "loss": 1.1333, "step": 492352 }, { "epoch": 36.95429643527204, "grad_norm": 0.9285120368003845, "learning_rate": 0.002828, "loss": 1.13, "step": 492416 }, { "epoch": 36.95909943714822, "grad_norm": 0.5199771523475647, "learning_rate": 0.002828, "loss": 1.1327, "step": 492480 }, { "epoch": 36.96390243902439, "grad_norm": 0.6424311995506287, "learning_rate": 0.002828, "loss": 1.1323, "step": 492544 }, { "epoch": 36.968705440900564, "grad_norm": 0.6531059145927429, "learning_rate": 0.002828, "loss": 1.1371, "step": 492608 }, { "epoch": 36.97350844277673, "grad_norm": 0.6560933589935303, "learning_rate": 0.002828, "loss": 1.1343, "step": 492672 }, { "epoch": 36.97831144465291, "grad_norm": 0.6605238914489746, "learning_rate": 0.002828, "loss": 1.1349, "step": 492736 }, { "epoch": 36.98311444652908, "grad_norm": 0.6459187865257263, "learning_rate": 0.002828, "loss": 1.1414, "step": 492800 }, { "epoch": 36.987917448405256, "grad_norm": 0.7478291392326355, "learning_rate": 0.002828, "loss": 1.1371, "step": 492864 }, { "epoch": 36.992720450281425, "grad_norm": 0.5982804894447327, "learning_rate": 0.002828, "loss": 1.141, "step": 492928 }, { "epoch": 36.9975234521576, "grad_norm": 0.6685630083084106, "learning_rate": 0.002828, "loss": 1.1354, "step": 492992 }, { "epoch": 37.00232645403377, "grad_norm": 0.6969371438026428, "learning_rate": 0.002828, "loss": 1.1187, "step": 493056 }, { "epoch": 37.00712945590995, "grad_norm": 0.6465162634849548, "learning_rate": 0.002828, "loss": 1.0988, "step": 493120 }, { "epoch": 37.011932457786116, "grad_norm": 0.5774773955345154, "learning_rate": 0.002828, "loss": 1.0939, "step": 493184 }, { "epoch": 37.016735459662286, "grad_norm": 0.6074837446212769, "learning_rate": 0.002828, "loss": 1.1, "step": 493248 }, { "epoch": 37.02153846153846, "grad_norm": 0.5828660726547241, "learning_rate": 0.002828, "loss": 1.0984, "step": 493312 }, { "epoch": 37.02634146341463, "grad_norm": 0.5609144568443298, "learning_rate": 0.002828, "loss": 1.0944, "step": 493376 }, { "epoch": 37.03114446529081, "grad_norm": 0.6178881525993347, "learning_rate": 0.002828, "loss": 1.0958, "step": 493440 }, { "epoch": 37.03594746716698, "grad_norm": 0.661352276802063, "learning_rate": 0.002828, "loss": 1.1001, "step": 493504 }, { "epoch": 37.040750469043154, "grad_norm": 0.5430305004119873, "learning_rate": 0.002828, "loss": 1.105, "step": 493568 }, { "epoch": 37.04555347091932, "grad_norm": 0.5815340280532837, "learning_rate": 0.002828, "loss": 1.1003, "step": 493632 }, { "epoch": 37.0503564727955, "grad_norm": 0.6391292214393616, "learning_rate": 0.002828, "loss": 1.0955, "step": 493696 }, { "epoch": 37.05515947467167, "grad_norm": 0.5859202742576599, "learning_rate": 0.002828, "loss": 1.0967, "step": 493760 }, { "epoch": 37.059962476547845, "grad_norm": 0.6187096238136292, "learning_rate": 0.002828, "loss": 1.101, "step": 493824 }, { "epoch": 37.064765478424015, "grad_norm": 0.659162163734436, "learning_rate": 0.002828, "loss": 1.1019, "step": 493888 }, { "epoch": 37.06956848030019, "grad_norm": 0.6119021773338318, "learning_rate": 0.002828, "loss": 1.1004, "step": 493952 }, { "epoch": 37.07437148217636, "grad_norm": 0.7687475681304932, "learning_rate": 0.002828, "loss": 1.1025, "step": 494016 }, { "epoch": 37.07917448405253, "grad_norm": 0.622445821762085, "learning_rate": 0.002828, "loss": 1.1039, "step": 494080 }, { "epoch": 37.083977485928706, "grad_norm": 0.6039100885391235, "learning_rate": 0.002828, "loss": 1.0992, "step": 494144 }, { "epoch": 37.088780487804875, "grad_norm": 0.6412420272827148, "learning_rate": 0.002828, "loss": 1.1027, "step": 494208 }, { "epoch": 37.09358348968105, "grad_norm": 0.8018775582313538, "learning_rate": 0.002828, "loss": 1.1036, "step": 494272 }, { "epoch": 37.09838649155722, "grad_norm": 0.5880022644996643, "learning_rate": 0.002828, "loss": 1.1047, "step": 494336 }, { "epoch": 37.1031894934334, "grad_norm": 0.6250966191291809, "learning_rate": 0.002828, "loss": 1.0999, "step": 494400 }, { "epoch": 37.10799249530957, "grad_norm": 0.5100603103637695, "learning_rate": 0.002828, "loss": 1.095, "step": 494464 }, { "epoch": 37.11279549718574, "grad_norm": 0.494398832321167, "learning_rate": 0.002828, "loss": 1.1086, "step": 494528 }, { "epoch": 37.11759849906191, "grad_norm": 0.5320671200752258, "learning_rate": 0.002828, "loss": 1.1065, "step": 494592 }, { "epoch": 37.12240150093809, "grad_norm": 0.5626203417778015, "learning_rate": 0.002828, "loss": 1.1079, "step": 494656 }, { "epoch": 37.12720450281426, "grad_norm": 0.6252749562263489, "learning_rate": 0.002828, "loss": 1.1043, "step": 494720 }, { "epoch": 37.132007504690435, "grad_norm": 0.6196590662002563, "learning_rate": 0.002828, "loss": 1.1061, "step": 494784 }, { "epoch": 37.136810506566604, "grad_norm": 0.5838425159454346, "learning_rate": 0.002828, "loss": 1.1026, "step": 494848 }, { "epoch": 37.141613508442774, "grad_norm": 0.6245355606079102, "learning_rate": 0.002828, "loss": 1.1104, "step": 494912 }, { "epoch": 37.14641651031895, "grad_norm": 0.5560353994369507, "learning_rate": 0.002828, "loss": 1.106, "step": 494976 }, { "epoch": 37.15121951219512, "grad_norm": 0.6064783930778503, "learning_rate": 0.002828, "loss": 1.1084, "step": 495040 }, { "epoch": 37.156022514071296, "grad_norm": 0.6612809300422668, "learning_rate": 0.002828, "loss": 1.1043, "step": 495104 }, { "epoch": 37.160825515947465, "grad_norm": 0.6238330006599426, "learning_rate": 0.002828, "loss": 1.1094, "step": 495168 }, { "epoch": 37.16562851782364, "grad_norm": 0.6410529613494873, "learning_rate": 0.002828, "loss": 1.0994, "step": 495232 }, { "epoch": 37.17043151969981, "grad_norm": 0.6090975403785706, "learning_rate": 0.002828, "loss": 1.1082, "step": 495296 }, { "epoch": 37.17523452157599, "grad_norm": 0.6249683499336243, "learning_rate": 0.002828, "loss": 1.1113, "step": 495360 }, { "epoch": 37.18003752345216, "grad_norm": 0.6652005314826965, "learning_rate": 0.002828, "loss": 1.1086, "step": 495424 }, { "epoch": 37.18484052532833, "grad_norm": 0.73992520570755, "learning_rate": 0.002828, "loss": 1.1009, "step": 495488 }, { "epoch": 37.1896435272045, "grad_norm": 0.7139239311218262, "learning_rate": 0.002828, "loss": 1.1062, "step": 495552 }, { "epoch": 37.19444652908068, "grad_norm": 0.7656526565551758, "learning_rate": 0.002828, "loss": 1.1103, "step": 495616 }, { "epoch": 37.19924953095685, "grad_norm": 0.6346558928489685, "learning_rate": 0.002828, "loss": 1.1107, "step": 495680 }, { "epoch": 37.20405253283302, "grad_norm": 0.5977003574371338, "learning_rate": 0.002828, "loss": 1.1101, "step": 495744 }, { "epoch": 37.208855534709194, "grad_norm": 0.6296799182891846, "learning_rate": 0.002828, "loss": 1.107, "step": 495808 }, { "epoch": 37.21365853658536, "grad_norm": 0.6340168118476868, "learning_rate": 0.002828, "loss": 1.1033, "step": 495872 }, { "epoch": 37.21846153846154, "grad_norm": 0.7288328409194946, "learning_rate": 0.002828, "loss": 1.1083, "step": 495936 }, { "epoch": 37.22326454033771, "grad_norm": 0.6049259901046753, "learning_rate": 0.002828, "loss": 1.1039, "step": 496000 }, { "epoch": 37.228067542213886, "grad_norm": 0.592760443687439, "learning_rate": 0.002828, "loss": 1.1101, "step": 496064 }, { "epoch": 37.232870544090055, "grad_norm": 0.5616378784179688, "learning_rate": 0.002828, "loss": 1.1076, "step": 496128 }, { "epoch": 37.23767354596623, "grad_norm": 0.45006880164146423, "learning_rate": 0.002828, "loss": 1.1095, "step": 496192 }, { "epoch": 37.2424765478424, "grad_norm": 0.6839893460273743, "learning_rate": 0.002828, "loss": 1.1145, "step": 496256 }, { "epoch": 37.24727954971858, "grad_norm": 0.591373085975647, "learning_rate": 0.002828, "loss": 1.1079, "step": 496320 }, { "epoch": 37.25208255159475, "grad_norm": 0.7545192837715149, "learning_rate": 0.002828, "loss": 1.1057, "step": 496384 }, { "epoch": 37.25688555347092, "grad_norm": 0.5493077635765076, "learning_rate": 0.002828, "loss": 1.1125, "step": 496448 }, { "epoch": 37.26168855534709, "grad_norm": 0.8801015615463257, "learning_rate": 0.002828, "loss": 1.108, "step": 496512 }, { "epoch": 37.26649155722326, "grad_norm": 0.5462446808815002, "learning_rate": 0.002828, "loss": 1.1068, "step": 496576 }, { "epoch": 37.27129455909944, "grad_norm": 0.5896430611610413, "learning_rate": 0.002828, "loss": 1.1084, "step": 496640 }, { "epoch": 37.27609756097561, "grad_norm": 0.591148853302002, "learning_rate": 0.002828, "loss": 1.1141, "step": 496704 }, { "epoch": 37.280900562851784, "grad_norm": 0.6122753620147705, "learning_rate": 0.002828, "loss": 1.1117, "step": 496768 }, { "epoch": 37.28570356472795, "grad_norm": 0.7818361520767212, "learning_rate": 0.002828, "loss": 1.1154, "step": 496832 }, { "epoch": 37.29050656660413, "grad_norm": 0.5743238925933838, "learning_rate": 0.002828, "loss": 1.1104, "step": 496896 }, { "epoch": 37.2953095684803, "grad_norm": 0.5294697284698486, "learning_rate": 0.002828, "loss": 1.1169, "step": 496960 }, { "epoch": 37.300112570356475, "grad_norm": 0.7010504603385925, "learning_rate": 0.002828, "loss": 1.1158, "step": 497024 }, { "epoch": 37.304915572232645, "grad_norm": 0.6668693423271179, "learning_rate": 0.002828, "loss": 1.1098, "step": 497088 }, { "epoch": 37.30971857410882, "grad_norm": 0.6958891153335571, "learning_rate": 0.002828, "loss": 1.1093, "step": 497152 }, { "epoch": 37.31452157598499, "grad_norm": 0.5669229030609131, "learning_rate": 0.002828, "loss": 1.111, "step": 497216 }, { "epoch": 37.31932457786116, "grad_norm": 0.6313174962997437, "learning_rate": 0.002828, "loss": 1.1121, "step": 497280 }, { "epoch": 37.324127579737336, "grad_norm": 0.5094839930534363, "learning_rate": 0.002828, "loss": 1.1148, "step": 497344 }, { "epoch": 37.328930581613506, "grad_norm": 0.7712111473083496, "learning_rate": 0.002828, "loss": 1.1102, "step": 497408 }, { "epoch": 37.33373358348968, "grad_norm": 0.7262586355209351, "learning_rate": 0.002828, "loss": 1.1141, "step": 497472 }, { "epoch": 37.33853658536585, "grad_norm": 0.619162917137146, "learning_rate": 0.002828, "loss": 1.1117, "step": 497536 }, { "epoch": 37.34333958724203, "grad_norm": 0.634042501449585, "learning_rate": 0.002828, "loss": 1.1135, "step": 497600 }, { "epoch": 37.3481425891182, "grad_norm": 0.6961790919303894, "learning_rate": 0.002828, "loss": 1.1155, "step": 497664 }, { "epoch": 37.35294559099437, "grad_norm": 0.724677324295044, "learning_rate": 0.002828, "loss": 1.1122, "step": 497728 }, { "epoch": 37.35774859287054, "grad_norm": 0.6303979158401489, "learning_rate": 0.002828, "loss": 1.1105, "step": 497792 }, { "epoch": 37.36255159474672, "grad_norm": 0.5575435161590576, "learning_rate": 0.002828, "loss": 1.1127, "step": 497856 }, { "epoch": 37.36735459662289, "grad_norm": 0.6775777339935303, "learning_rate": 0.002828, "loss": 1.1157, "step": 497920 }, { "epoch": 37.372157598499065, "grad_norm": 0.4811544120311737, "learning_rate": 0.002828, "loss": 1.1143, "step": 497984 }, { "epoch": 37.376960600375234, "grad_norm": 0.6227823495864868, "learning_rate": 0.002828, "loss": 1.1153, "step": 498048 }, { "epoch": 37.381763602251404, "grad_norm": 0.6934810876846313, "learning_rate": 0.002828, "loss": 1.1181, "step": 498112 }, { "epoch": 37.38656660412758, "grad_norm": 0.600058913230896, "learning_rate": 0.002828, "loss": 1.1118, "step": 498176 }, { "epoch": 37.39136960600375, "grad_norm": 0.5795884132385254, "learning_rate": 0.002828, "loss": 1.1218, "step": 498240 }, { "epoch": 37.396172607879926, "grad_norm": 0.5940358638763428, "learning_rate": 0.002828, "loss": 1.1086, "step": 498304 }, { "epoch": 37.400975609756095, "grad_norm": 0.5886350870132446, "learning_rate": 0.002828, "loss": 1.1114, "step": 498368 }, { "epoch": 37.40577861163227, "grad_norm": 0.6366506814956665, "learning_rate": 0.002828, "loss": 1.1171, "step": 498432 }, { "epoch": 37.41058161350844, "grad_norm": 0.6446359753608704, "learning_rate": 0.002828, "loss": 1.1209, "step": 498496 }, { "epoch": 37.41538461538462, "grad_norm": 0.6479003429412842, "learning_rate": 0.002828, "loss": 1.1141, "step": 498560 }, { "epoch": 37.42018761726079, "grad_norm": 0.5944030284881592, "learning_rate": 0.002828, "loss": 1.1162, "step": 498624 }, { "epoch": 37.42499061913696, "grad_norm": 0.6356111168861389, "learning_rate": 0.002828, "loss": 1.1199, "step": 498688 }, { "epoch": 37.42979362101313, "grad_norm": 0.7332292795181274, "learning_rate": 0.002828, "loss": 1.1138, "step": 498752 }, { "epoch": 37.43459662288931, "grad_norm": 0.6613455414772034, "learning_rate": 0.002828, "loss": 1.1253, "step": 498816 }, { "epoch": 37.43939962476548, "grad_norm": 0.5517838001251221, "learning_rate": 0.002828, "loss": 1.1151, "step": 498880 }, { "epoch": 37.44420262664165, "grad_norm": 0.5878711342811584, "learning_rate": 0.002828, "loss": 1.1155, "step": 498944 }, { "epoch": 37.449005628517824, "grad_norm": 0.6955589652061462, "learning_rate": 0.002828, "loss": 1.1114, "step": 499008 }, { "epoch": 37.45380863039399, "grad_norm": 0.7319133281707764, "learning_rate": 0.002828, "loss": 1.1204, "step": 499072 }, { "epoch": 37.45861163227017, "grad_norm": 0.5313762426376343, "learning_rate": 0.002828, "loss": 1.1194, "step": 499136 }, { "epoch": 37.46341463414634, "grad_norm": 0.5796071290969849, "learning_rate": 0.002828, "loss": 1.121, "step": 499200 }, { "epoch": 37.468217636022516, "grad_norm": 0.632054328918457, "learning_rate": 0.002828, "loss": 1.1115, "step": 499264 }, { "epoch": 37.473020637898685, "grad_norm": 0.6562007069587708, "learning_rate": 0.002828, "loss": 1.1174, "step": 499328 }, { "epoch": 37.47782363977486, "grad_norm": 0.7263187170028687, "learning_rate": 0.002828, "loss": 1.124, "step": 499392 }, { "epoch": 37.48262664165103, "grad_norm": 0.7176118493080139, "learning_rate": 0.002828, "loss": 1.115, "step": 499456 }, { "epoch": 37.48742964352721, "grad_norm": 0.7256899476051331, "learning_rate": 0.002828, "loss": 1.1184, "step": 499520 }, { "epoch": 37.49223264540338, "grad_norm": 0.6485072374343872, "learning_rate": 0.002828, "loss": 1.119, "step": 499584 }, { "epoch": 37.49703564727955, "grad_norm": 0.589574933052063, "learning_rate": 0.002828, "loss": 1.1155, "step": 499648 }, { "epoch": 37.50183864915572, "grad_norm": 0.5531770586967468, "learning_rate": 0.002828, "loss": 1.1154, "step": 499712 }, { "epoch": 37.50664165103189, "grad_norm": 0.7608128786087036, "learning_rate": 0.002828, "loss": 1.1173, "step": 499776 }, { "epoch": 37.51144465290807, "grad_norm": 0.7428147196769714, "learning_rate": 0.002828, "loss": 1.1147, "step": 499840 }, { "epoch": 37.51624765478424, "grad_norm": 0.6598548889160156, "learning_rate": 0.002828, "loss": 1.1154, "step": 499904 }, { "epoch": 37.521050656660414, "grad_norm": 0.6032675504684448, "learning_rate": 0.002828, "loss": 1.1154, "step": 499968 }, { "epoch": 37.52585365853658, "grad_norm": 0.6908695101737976, "learning_rate": 0.002828, "loss": 1.1208, "step": 500032 }, { "epoch": 37.53065666041276, "grad_norm": 0.5637995600700378, "learning_rate": 0.002828, "loss": 1.1177, "step": 500096 }, { "epoch": 37.53545966228893, "grad_norm": 0.6846232414245605, "learning_rate": 0.002828, "loss": 1.1136, "step": 500160 }, { "epoch": 37.540262664165105, "grad_norm": 0.6805240511894226, "learning_rate": 0.002828, "loss": 1.1135, "step": 500224 }, { "epoch": 37.545065666041275, "grad_norm": 0.6433038711547852, "learning_rate": 0.002828, "loss": 1.1211, "step": 500288 }, { "epoch": 37.54986866791745, "grad_norm": 0.6407705545425415, "learning_rate": 0.002828, "loss": 1.1197, "step": 500352 }, { "epoch": 37.55467166979362, "grad_norm": 0.5954273343086243, "learning_rate": 0.002828, "loss": 1.1181, "step": 500416 }, { "epoch": 37.5594746716698, "grad_norm": 0.5801562666893005, "learning_rate": 0.002828, "loss": 1.1257, "step": 500480 }, { "epoch": 37.564277673545966, "grad_norm": 0.7970672249794006, "learning_rate": 0.002828, "loss": 1.1191, "step": 500544 }, { "epoch": 37.569080675422136, "grad_norm": 0.5519067049026489, "learning_rate": 0.002828, "loss": 1.1225, "step": 500608 }, { "epoch": 37.57388367729831, "grad_norm": 0.623494029045105, "learning_rate": 0.002828, "loss": 1.1178, "step": 500672 }, { "epoch": 37.57868667917448, "grad_norm": 0.5677382349967957, "learning_rate": 0.002828, "loss": 1.1205, "step": 500736 }, { "epoch": 37.58348968105066, "grad_norm": 0.6610050797462463, "learning_rate": 0.002828, "loss": 1.119, "step": 500800 }, { "epoch": 37.58829268292683, "grad_norm": 0.6180018782615662, "learning_rate": 0.002828, "loss": 1.1178, "step": 500864 }, { "epoch": 37.593095684803004, "grad_norm": 0.6084687113761902, "learning_rate": 0.002828, "loss": 1.1167, "step": 500928 }, { "epoch": 37.59789868667917, "grad_norm": 0.6760867834091187, "learning_rate": 0.002828, "loss": 1.1169, "step": 500992 }, { "epoch": 37.60270168855535, "grad_norm": 0.7423652410507202, "learning_rate": 0.002828, "loss": 1.1234, "step": 501056 }, { "epoch": 37.60750469043152, "grad_norm": 0.6940181255340576, "learning_rate": 0.002828, "loss": 1.1214, "step": 501120 }, { "epoch": 37.612307692307695, "grad_norm": 0.5058683156967163, "learning_rate": 0.002828, "loss": 1.1197, "step": 501184 }, { "epoch": 37.617110694183864, "grad_norm": 0.6302307844161987, "learning_rate": 0.002828, "loss": 1.1183, "step": 501248 }, { "epoch": 37.62191369606004, "grad_norm": 0.6181077361106873, "learning_rate": 0.002828, "loss": 1.1226, "step": 501312 }, { "epoch": 37.62671669793621, "grad_norm": 0.527346134185791, "learning_rate": 0.002828, "loss": 1.1219, "step": 501376 }, { "epoch": 37.63151969981238, "grad_norm": 0.5519901514053345, "learning_rate": 0.002828, "loss": 1.1215, "step": 501440 }, { "epoch": 37.636322701688556, "grad_norm": 0.6316769123077393, "learning_rate": 0.002828, "loss": 1.122, "step": 501504 }, { "epoch": 37.641125703564725, "grad_norm": 0.6549479961395264, "learning_rate": 0.002828, "loss": 1.1235, "step": 501568 }, { "epoch": 37.6459287054409, "grad_norm": 0.579494297504425, "learning_rate": 0.002828, "loss": 1.1224, "step": 501632 }, { "epoch": 37.65073170731707, "grad_norm": 0.6767232418060303, "learning_rate": 0.002828, "loss": 1.1196, "step": 501696 }, { "epoch": 37.65553470919325, "grad_norm": 0.5806661248207092, "learning_rate": 0.002828, "loss": 1.1173, "step": 501760 }, { "epoch": 37.66033771106942, "grad_norm": 0.6679486036300659, "learning_rate": 0.002828, "loss": 1.1177, "step": 501824 }, { "epoch": 37.66514071294559, "grad_norm": 0.7422885298728943, "learning_rate": 0.002828, "loss": 1.1167, "step": 501888 }, { "epoch": 37.66994371482176, "grad_norm": 0.663256824016571, "learning_rate": 0.002828, "loss": 1.1221, "step": 501952 }, { "epoch": 37.67474671669794, "grad_norm": 0.5884062647819519, "learning_rate": 0.002828, "loss": 1.1179, "step": 502016 }, { "epoch": 37.67954971857411, "grad_norm": 0.5877790451049805, "learning_rate": 0.002828, "loss": 1.116, "step": 502080 }, { "epoch": 37.684352720450285, "grad_norm": 0.5684241652488708, "learning_rate": 0.002828, "loss": 1.1154, "step": 502144 }, { "epoch": 37.689155722326454, "grad_norm": 0.46695685386657715, "learning_rate": 0.002828, "loss": 1.121, "step": 502208 }, { "epoch": 37.69395872420262, "grad_norm": 0.6337296962738037, "learning_rate": 0.002828, "loss": 1.122, "step": 502272 }, { "epoch": 37.6987617260788, "grad_norm": 0.6555072665214539, "learning_rate": 0.002828, "loss": 1.1239, "step": 502336 }, { "epoch": 37.70356472795497, "grad_norm": 0.6246955394744873, "learning_rate": 0.002828, "loss": 1.1192, "step": 502400 }, { "epoch": 37.708367729831146, "grad_norm": 0.6092773079872131, "learning_rate": 0.002828, "loss": 1.125, "step": 502464 }, { "epoch": 37.713170731707315, "grad_norm": 0.5604978203773499, "learning_rate": 0.002828, "loss": 1.1228, "step": 502528 }, { "epoch": 37.71797373358349, "grad_norm": 0.6719847321510315, "learning_rate": 0.002828, "loss": 1.1253, "step": 502592 }, { "epoch": 37.72277673545966, "grad_norm": 0.5437619686126709, "learning_rate": 0.002828, "loss": 1.1236, "step": 502656 }, { "epoch": 37.72757973733584, "grad_norm": 0.6136580109596252, "learning_rate": 0.002828, "loss": 1.1218, "step": 502720 }, { "epoch": 37.73238273921201, "grad_norm": 0.5170011520385742, "learning_rate": 0.002828, "loss": 1.1196, "step": 502784 }, { "epoch": 37.73718574108818, "grad_norm": 0.619965136051178, "learning_rate": 0.002828, "loss": 1.1244, "step": 502848 }, { "epoch": 37.74198874296435, "grad_norm": 0.6624310612678528, "learning_rate": 0.002828, "loss": 1.1222, "step": 502912 }, { "epoch": 37.74679174484053, "grad_norm": 0.651011049747467, "learning_rate": 0.002828, "loss": 1.1227, "step": 502976 }, { "epoch": 37.7515947467167, "grad_norm": 0.7418103218078613, "learning_rate": 0.002828, "loss": 1.1168, "step": 503040 }, { "epoch": 37.75639774859287, "grad_norm": 0.5328953266143799, "learning_rate": 0.002828, "loss": 1.125, "step": 503104 }, { "epoch": 37.761200750469044, "grad_norm": 0.5455359816551208, "learning_rate": 0.002828, "loss": 1.1166, "step": 503168 }, { "epoch": 37.76600375234521, "grad_norm": 0.5440381765365601, "learning_rate": 0.002828, "loss": 1.1239, "step": 503232 }, { "epoch": 37.77080675422139, "grad_norm": 0.7047117948532104, "learning_rate": 0.002828, "loss": 1.1265, "step": 503296 }, { "epoch": 37.77560975609756, "grad_norm": 0.6114978790283203, "learning_rate": 0.002828, "loss": 1.126, "step": 503360 }, { "epoch": 37.780412757973735, "grad_norm": 0.5973011255264282, "learning_rate": 0.002828, "loss": 1.1196, "step": 503424 }, { "epoch": 37.785215759849905, "grad_norm": 0.7089464664459229, "learning_rate": 0.002828, "loss": 1.1223, "step": 503488 }, { "epoch": 37.79001876172608, "grad_norm": 0.5899783372879028, "learning_rate": 0.002828, "loss": 1.1222, "step": 503552 }, { "epoch": 37.79482176360225, "grad_norm": 0.5558510422706604, "learning_rate": 0.002828, "loss": 1.1183, "step": 503616 }, { "epoch": 37.79962476547843, "grad_norm": 0.5553831458091736, "learning_rate": 0.002828, "loss": 1.1214, "step": 503680 }, { "epoch": 37.804427767354596, "grad_norm": 0.58292555809021, "learning_rate": 0.002828, "loss": 1.1272, "step": 503744 }, { "epoch": 37.809230769230766, "grad_norm": 0.6653010249137878, "learning_rate": 0.002828, "loss": 1.1235, "step": 503808 }, { "epoch": 37.81403377110694, "grad_norm": 0.6157090067863464, "learning_rate": 0.002828, "loss": 1.1226, "step": 503872 }, { "epoch": 37.81883677298311, "grad_norm": 0.7392660975456238, "learning_rate": 0.002828, "loss": 1.1201, "step": 503936 }, { "epoch": 37.82363977485929, "grad_norm": 0.5767449140548706, "learning_rate": 0.002828, "loss": 1.123, "step": 504000 }, { "epoch": 37.82844277673546, "grad_norm": 0.7190343141555786, "learning_rate": 0.002828, "loss": 1.1232, "step": 504064 }, { "epoch": 37.833245778611634, "grad_norm": 0.5598681569099426, "learning_rate": 0.002828, "loss": 1.1228, "step": 504128 }, { "epoch": 37.8380487804878, "grad_norm": 0.5747976899147034, "learning_rate": 0.002828, "loss": 1.1229, "step": 504192 }, { "epoch": 37.84285178236398, "grad_norm": 0.542398989200592, "learning_rate": 0.002828, "loss": 1.1238, "step": 504256 }, { "epoch": 37.84765478424015, "grad_norm": 0.5907443165779114, "learning_rate": 0.002828, "loss": 1.1252, "step": 504320 }, { "epoch": 37.852457786116325, "grad_norm": 0.5543791055679321, "learning_rate": 0.002828, "loss": 1.1284, "step": 504384 }, { "epoch": 37.857260787992494, "grad_norm": 0.6768468618392944, "learning_rate": 0.002828, "loss": 1.1181, "step": 504448 }, { "epoch": 37.86206378986867, "grad_norm": 0.7407353520393372, "learning_rate": 0.002828, "loss": 1.1261, "step": 504512 }, { "epoch": 37.86686679174484, "grad_norm": 0.5630684494972229, "learning_rate": 0.002828, "loss": 1.1292, "step": 504576 }, { "epoch": 37.87166979362101, "grad_norm": 0.5335458517074585, "learning_rate": 0.002828, "loss": 1.1274, "step": 504640 }, { "epoch": 37.876472795497186, "grad_norm": 0.6631928086280823, "learning_rate": 0.002828, "loss": 1.1204, "step": 504704 }, { "epoch": 37.881275797373355, "grad_norm": 0.7281734943389893, "learning_rate": 0.002828, "loss": 1.122, "step": 504768 }, { "epoch": 37.88607879924953, "grad_norm": 0.5659152269363403, "learning_rate": 0.002828, "loss": 1.1308, "step": 504832 }, { "epoch": 37.8908818011257, "grad_norm": 0.4987253248691559, "learning_rate": 0.002828, "loss": 1.1241, "step": 504896 }, { "epoch": 37.89568480300188, "grad_norm": 0.6153334379196167, "learning_rate": 0.002828, "loss": 1.124, "step": 504960 }, { "epoch": 37.90048780487805, "grad_norm": 0.5674315094947815, "learning_rate": 0.002828, "loss": 1.1288, "step": 505024 }, { "epoch": 37.90529080675422, "grad_norm": 0.5308176875114441, "learning_rate": 0.002828, "loss": 1.131, "step": 505088 }, { "epoch": 37.91009380863039, "grad_norm": 0.6265832781791687, "learning_rate": 0.002828, "loss": 1.1251, "step": 505152 }, { "epoch": 37.91489681050657, "grad_norm": 0.6296497583389282, "learning_rate": 0.002828, "loss": 1.1206, "step": 505216 }, { "epoch": 37.91969981238274, "grad_norm": 0.6319650411605835, "learning_rate": 0.002828, "loss": 1.1263, "step": 505280 }, { "epoch": 37.924502814258915, "grad_norm": 0.6511676907539368, "learning_rate": 0.002828, "loss": 1.1226, "step": 505344 }, { "epoch": 37.929305816135084, "grad_norm": 0.6702365279197693, "learning_rate": 0.002828, "loss": 1.1238, "step": 505408 }, { "epoch": 37.934108818011254, "grad_norm": 0.558114767074585, "learning_rate": 0.002828, "loss": 1.1272, "step": 505472 }, { "epoch": 37.93891181988743, "grad_norm": 0.6984198689460754, "learning_rate": 0.002828, "loss": 1.123, "step": 505536 }, { "epoch": 37.9437148217636, "grad_norm": 0.6288992166519165, "learning_rate": 0.002828, "loss": 1.125, "step": 505600 }, { "epoch": 37.948517823639776, "grad_norm": 0.5294339060783386, "learning_rate": 0.002828, "loss": 1.1236, "step": 505664 }, { "epoch": 37.953320825515945, "grad_norm": 0.5702909827232361, "learning_rate": 0.002828, "loss": 1.1221, "step": 505728 }, { "epoch": 37.95812382739212, "grad_norm": 0.6646183133125305, "learning_rate": 0.002828, "loss": 1.1326, "step": 505792 }, { "epoch": 37.96292682926829, "grad_norm": 0.5951704978942871, "learning_rate": 0.002828, "loss": 1.1261, "step": 505856 }, { "epoch": 37.96772983114447, "grad_norm": 0.8041133284568787, "learning_rate": 0.002828, "loss": 1.1215, "step": 505920 }, { "epoch": 37.97253283302064, "grad_norm": 0.7284034490585327, "learning_rate": 0.002828, "loss": 1.1254, "step": 505984 }, { "epoch": 37.97733583489681, "grad_norm": 0.6413600444793701, "learning_rate": 0.002828, "loss": 1.1193, "step": 506048 }, { "epoch": 37.98213883677298, "grad_norm": 0.6052785515785217, "learning_rate": 0.002828, "loss": 1.1267, "step": 506112 }, { "epoch": 37.98694183864916, "grad_norm": 0.7194735407829285, "learning_rate": 0.002828, "loss": 1.1246, "step": 506176 }, { "epoch": 37.99174484052533, "grad_norm": 0.5225487351417542, "learning_rate": 0.002828, "loss": 1.1266, "step": 506240 }, { "epoch": 37.9965478424015, "grad_norm": 0.6789017915725708, "learning_rate": 0.002828, "loss": 1.1258, "step": 506304 }, { "epoch": 38.001350844277674, "grad_norm": 0.6713529229164124, "learning_rate": 0.002828, "loss": 1.1187, "step": 506368 }, { "epoch": 38.00615384615384, "grad_norm": 0.594455897808075, "learning_rate": 0.002828, "loss": 1.0846, "step": 506432 }, { "epoch": 38.01095684803002, "grad_norm": 0.5679947137832642, "learning_rate": 0.002828, "loss": 1.0902, "step": 506496 }, { "epoch": 38.01575984990619, "grad_norm": 0.6373549103736877, "learning_rate": 0.002828, "loss": 1.0958, "step": 506560 }, { "epoch": 38.020562851782365, "grad_norm": 0.603205144405365, "learning_rate": 0.002828, "loss": 1.0907, "step": 506624 }, { "epoch": 38.025365853658535, "grad_norm": 0.5866041779518127, "learning_rate": 0.002828, "loss": 1.0973, "step": 506688 }, { "epoch": 38.03016885553471, "grad_norm": 0.7726960182189941, "learning_rate": 0.002828, "loss": 1.0933, "step": 506752 }, { "epoch": 38.03497185741088, "grad_norm": 0.6744563579559326, "learning_rate": 0.002828, "loss": 1.0959, "step": 506816 }, { "epoch": 38.03977485928706, "grad_norm": 0.6458048224449158, "learning_rate": 0.002828, "loss": 1.0906, "step": 506880 }, { "epoch": 38.044577861163226, "grad_norm": 0.6350529193878174, "learning_rate": 0.002828, "loss": 1.0889, "step": 506944 }, { "epoch": 38.0493808630394, "grad_norm": 0.5482154488563538, "learning_rate": 0.002828, "loss": 1.0905, "step": 507008 }, { "epoch": 38.05418386491557, "grad_norm": 0.5511150360107422, "learning_rate": 0.002828, "loss": 1.0931, "step": 507072 }, { "epoch": 38.05898686679174, "grad_norm": 0.5821910500526428, "learning_rate": 0.002828, "loss": 1.0922, "step": 507136 }, { "epoch": 38.06378986866792, "grad_norm": 0.5219399929046631, "learning_rate": 0.002828, "loss": 1.0928, "step": 507200 }, { "epoch": 38.06859287054409, "grad_norm": 0.7586058378219604, "learning_rate": 0.002828, "loss": 1.0944, "step": 507264 }, { "epoch": 38.073395872420264, "grad_norm": 0.6015982627868652, "learning_rate": 0.002828, "loss": 1.0933, "step": 507328 }, { "epoch": 38.07819887429643, "grad_norm": 0.598664402961731, "learning_rate": 0.002828, "loss": 1.0922, "step": 507392 }, { "epoch": 38.08300187617261, "grad_norm": 0.5428532361984253, "learning_rate": 0.002828, "loss": 1.091, "step": 507456 }, { "epoch": 38.08780487804878, "grad_norm": 0.6144164204597473, "learning_rate": 0.002828, "loss": 1.0968, "step": 507520 }, { "epoch": 38.092607879924955, "grad_norm": 0.5404438376426697, "learning_rate": 0.002828, "loss": 1.0952, "step": 507584 }, { "epoch": 38.097410881801125, "grad_norm": 0.6267830729484558, "learning_rate": 0.002828, "loss": 1.0978, "step": 507648 }, { "epoch": 38.1022138836773, "grad_norm": 0.6026227474212646, "learning_rate": 0.002828, "loss": 1.0913, "step": 507712 }, { "epoch": 38.10701688555347, "grad_norm": 0.6021787524223328, "learning_rate": 0.002828, "loss": 1.0947, "step": 507776 }, { "epoch": 38.11181988742965, "grad_norm": 0.7789412140846252, "learning_rate": 0.002828, "loss": 1.0922, "step": 507840 }, { "epoch": 38.116622889305816, "grad_norm": 0.663597583770752, "learning_rate": 0.002828, "loss": 1.0943, "step": 507904 }, { "epoch": 38.121425891181985, "grad_norm": 0.6256552338600159, "learning_rate": 0.002828, "loss": 1.0924, "step": 507968 }, { "epoch": 38.12622889305816, "grad_norm": 0.6621292233467102, "learning_rate": 0.002828, "loss": 1.0954, "step": 508032 }, { "epoch": 38.13103189493433, "grad_norm": 0.6057916283607483, "learning_rate": 0.002828, "loss": 1.0977, "step": 508096 }, { "epoch": 38.13583489681051, "grad_norm": 0.606999397277832, "learning_rate": 0.002828, "loss": 1.0945, "step": 508160 }, { "epoch": 38.14063789868668, "grad_norm": 0.5733497142791748, "learning_rate": 0.002828, "loss": 1.0998, "step": 508224 }, { "epoch": 38.14544090056285, "grad_norm": 0.5235196352005005, "learning_rate": 0.002828, "loss": 1.1009, "step": 508288 }, { "epoch": 38.15024390243902, "grad_norm": 0.688124418258667, "learning_rate": 0.002828, "loss": 1.0945, "step": 508352 }, { "epoch": 38.1550469043152, "grad_norm": 0.5677928328514099, "learning_rate": 0.002828, "loss": 1.0985, "step": 508416 }, { "epoch": 38.15984990619137, "grad_norm": 0.7261043190956116, "learning_rate": 0.002828, "loss": 1.1007, "step": 508480 }, { "epoch": 38.164652908067545, "grad_norm": 0.6233505606651306, "learning_rate": 0.002828, "loss": 1.0984, "step": 508544 }, { "epoch": 38.169455909943714, "grad_norm": 0.5829945206642151, "learning_rate": 0.002828, "loss": 1.0973, "step": 508608 }, { "epoch": 38.17425891181989, "grad_norm": 0.6414790749549866, "learning_rate": 0.002828, "loss": 1.0946, "step": 508672 }, { "epoch": 38.17906191369606, "grad_norm": 0.5896680951118469, "learning_rate": 0.002828, "loss": 1.0977, "step": 508736 }, { "epoch": 38.18386491557223, "grad_norm": 0.8097200989723206, "learning_rate": 0.002828, "loss": 1.1029, "step": 508800 }, { "epoch": 38.188667917448406, "grad_norm": 0.5933405160903931, "learning_rate": 0.002828, "loss": 1.1028, "step": 508864 }, { "epoch": 38.193470919324575, "grad_norm": 0.5823057889938354, "learning_rate": 0.002828, "loss": 1.0985, "step": 508928 }, { "epoch": 38.19827392120075, "grad_norm": 0.6513922810554504, "learning_rate": 0.002828, "loss": 1.1007, "step": 508992 }, { "epoch": 38.20307692307692, "grad_norm": 0.5925971865653992, "learning_rate": 0.002828, "loss": 1.1033, "step": 509056 }, { "epoch": 38.2078799249531, "grad_norm": 0.5212792158126831, "learning_rate": 0.002828, "loss": 1.0937, "step": 509120 }, { "epoch": 38.21268292682927, "grad_norm": 0.6763430833816528, "learning_rate": 0.002828, "loss": 1.1048, "step": 509184 }, { "epoch": 38.21748592870544, "grad_norm": 0.6594332456588745, "learning_rate": 0.002828, "loss": 1.0959, "step": 509248 }, { "epoch": 38.22228893058161, "grad_norm": 0.4742533266544342, "learning_rate": 0.002828, "loss": 1.0996, "step": 509312 }, { "epoch": 38.22709193245779, "grad_norm": 0.6435297131538391, "learning_rate": 0.002828, "loss": 1.1035, "step": 509376 }, { "epoch": 38.23189493433396, "grad_norm": 0.6899946928024292, "learning_rate": 0.002828, "loss": 1.1011, "step": 509440 }, { "epoch": 38.236697936210135, "grad_norm": 0.8009315729141235, "learning_rate": 0.002828, "loss": 1.0996, "step": 509504 }, { "epoch": 38.241500938086304, "grad_norm": 0.7253291606903076, "learning_rate": 0.002828, "loss": 1.1016, "step": 509568 }, { "epoch": 38.24630393996247, "grad_norm": 0.7756628394126892, "learning_rate": 0.002828, "loss": 1.105, "step": 509632 }, { "epoch": 38.25110694183865, "grad_norm": 0.6615393757820129, "learning_rate": 0.002828, "loss": 1.0979, "step": 509696 }, { "epoch": 38.25590994371482, "grad_norm": 0.5859424471855164, "learning_rate": 0.002828, "loss": 1.0973, "step": 509760 }, { "epoch": 38.260712945590996, "grad_norm": 0.5020551681518555, "learning_rate": 0.002828, "loss": 1.1073, "step": 509824 }, { "epoch": 38.265515947467165, "grad_norm": 0.6972935795783997, "learning_rate": 0.002828, "loss": 1.1034, "step": 509888 }, { "epoch": 38.27031894934334, "grad_norm": 0.5921815633773804, "learning_rate": 0.002828, "loss": 1.1032, "step": 509952 }, { "epoch": 38.27512195121951, "grad_norm": 0.6624758243560791, "learning_rate": 0.002828, "loss": 1.1025, "step": 510016 }, { "epoch": 38.27992495309569, "grad_norm": 0.6163641810417175, "learning_rate": 0.002828, "loss": 1.1061, "step": 510080 }, { "epoch": 38.284727954971856, "grad_norm": 0.5077274441719055, "learning_rate": 0.002828, "loss": 1.1049, "step": 510144 }, { "epoch": 38.28953095684803, "grad_norm": 0.6114723682403564, "learning_rate": 0.002828, "loss": 1.1052, "step": 510208 }, { "epoch": 38.2943339587242, "grad_norm": 0.5323904156684875, "learning_rate": 0.002828, "loss": 1.1044, "step": 510272 }, { "epoch": 38.29913696060038, "grad_norm": 0.526978075504303, "learning_rate": 0.002828, "loss": 1.1013, "step": 510336 }, { "epoch": 38.30393996247655, "grad_norm": 0.6368070244789124, "learning_rate": 0.002828, "loss": 1.1009, "step": 510400 }, { "epoch": 38.30874296435272, "grad_norm": 0.5327396988868713, "learning_rate": 0.002828, "loss": 1.1024, "step": 510464 }, { "epoch": 38.313545966228894, "grad_norm": 0.7854524850845337, "learning_rate": 0.002828, "loss": 1.1085, "step": 510528 }, { "epoch": 38.31834896810506, "grad_norm": 0.5681763291358948, "learning_rate": 0.002828, "loss": 1.1038, "step": 510592 }, { "epoch": 38.32315196998124, "grad_norm": 0.5100849866867065, "learning_rate": 0.002828, "loss": 1.104, "step": 510656 }, { "epoch": 38.32795497185741, "grad_norm": 0.5657297968864441, "learning_rate": 0.002828, "loss": 1.1042, "step": 510720 }, { "epoch": 38.332757973733585, "grad_norm": 0.6012672781944275, "learning_rate": 0.002828, "loss": 1.1009, "step": 510784 }, { "epoch": 38.337560975609755, "grad_norm": 0.5699858665466309, "learning_rate": 0.002828, "loss": 1.11, "step": 510848 }, { "epoch": 38.34236397748593, "grad_norm": 0.6629965305328369, "learning_rate": 0.002828, "loss": 1.1089, "step": 510912 }, { "epoch": 38.3471669793621, "grad_norm": 0.5883797407150269, "learning_rate": 0.002828, "loss": 1.1072, "step": 510976 }, { "epoch": 38.35196998123828, "grad_norm": 0.5491295456886292, "learning_rate": 0.002828, "loss": 1.1028, "step": 511040 }, { "epoch": 38.356772983114446, "grad_norm": 0.5589887499809265, "learning_rate": 0.002828, "loss": 1.1025, "step": 511104 }, { "epoch": 38.36157598499062, "grad_norm": 0.7199347019195557, "learning_rate": 0.002828, "loss": 1.1031, "step": 511168 }, { "epoch": 38.36637898686679, "grad_norm": 0.5486679673194885, "learning_rate": 0.002828, "loss": 1.1099, "step": 511232 }, { "epoch": 38.37118198874296, "grad_norm": 0.6592404246330261, "learning_rate": 0.002828, "loss": 1.1115, "step": 511296 }, { "epoch": 38.37598499061914, "grad_norm": 0.7273109555244446, "learning_rate": 0.002828, "loss": 1.1117, "step": 511360 }, { "epoch": 38.38078799249531, "grad_norm": 0.6115809679031372, "learning_rate": 0.002828, "loss": 1.1043, "step": 511424 }, { "epoch": 38.38559099437148, "grad_norm": 0.688369870185852, "learning_rate": 0.002828, "loss": 1.1059, "step": 511488 }, { "epoch": 38.39039399624765, "grad_norm": 0.6574243307113647, "learning_rate": 0.002828, "loss": 1.1098, "step": 511552 }, { "epoch": 38.39519699812383, "grad_norm": 0.6283542513847351, "learning_rate": 0.002828, "loss": 1.1073, "step": 511616 }, { "epoch": 38.4, "grad_norm": 0.6580241918563843, "learning_rate": 0.002828, "loss": 1.1053, "step": 511680 }, { "epoch": 38.404803001876175, "grad_norm": 0.6294876337051392, "learning_rate": 0.002828, "loss": 1.078, "step": 511744 }, { "epoch": 38.409606003752344, "grad_norm": 0.6697729825973511, "learning_rate": 0.002828, "loss": 1.0859, "step": 511808 }, { "epoch": 38.41440900562852, "grad_norm": 0.587795615196228, "learning_rate": 0.002828, "loss": 1.081, "step": 511872 }, { "epoch": 38.41921200750469, "grad_norm": 0.5539612174034119, "learning_rate": 0.002828, "loss": 1.0887, "step": 511936 }, { "epoch": 38.42401500938087, "grad_norm": 0.5980706810951233, "learning_rate": 0.002828, "loss": 1.0868, "step": 512000 }, { "epoch": 38.428818011257036, "grad_norm": 0.5685859322547913, "learning_rate": 0.002828, "loss": 1.0872, "step": 512064 }, { "epoch": 38.433621013133205, "grad_norm": 0.8710797429084778, "learning_rate": 0.002828, "loss": 1.0896, "step": 512128 }, { "epoch": 38.43842401500938, "grad_norm": 0.5642542243003845, "learning_rate": 0.002828, "loss": 1.0899, "step": 512192 }, { "epoch": 38.44322701688555, "grad_norm": 0.7766580581665039, "learning_rate": 0.002828, "loss": 1.0905, "step": 512256 }, { "epoch": 38.44803001876173, "grad_norm": 0.5817084908485413, "learning_rate": 0.002828, "loss": 1.0894, "step": 512320 }, { "epoch": 38.4528330206379, "grad_norm": 0.6450785994529724, "learning_rate": 0.002828, "loss": 1.0845, "step": 512384 }, { "epoch": 38.45763602251407, "grad_norm": 0.6442525386810303, "learning_rate": 0.002828, "loss": 1.0891, "step": 512448 }, { "epoch": 38.46243902439024, "grad_norm": 0.7165563106536865, "learning_rate": 0.002828, "loss": 1.0873, "step": 512512 }, { "epoch": 38.46724202626642, "grad_norm": 0.6603183150291443, "learning_rate": 0.002828, "loss": 1.0875, "step": 512576 }, { "epoch": 38.47204502814259, "grad_norm": 0.5756245255470276, "learning_rate": 0.002828, "loss": 1.0897, "step": 512640 }, { "epoch": 38.476848030018765, "grad_norm": 0.5640691518783569, "learning_rate": 0.002828, "loss": 1.0868, "step": 512704 }, { "epoch": 38.481651031894934, "grad_norm": 0.5975730419158936, "learning_rate": 0.002828, "loss": 1.0905, "step": 512768 }, { "epoch": 38.48645403377111, "grad_norm": 0.7235714197158813, "learning_rate": 0.002828, "loss": 1.0891, "step": 512832 }, { "epoch": 38.49125703564728, "grad_norm": 0.691107988357544, "learning_rate": 0.002828, "loss": 1.0892, "step": 512896 }, { "epoch": 38.49606003752345, "grad_norm": 0.657853364944458, "learning_rate": 0.002828, "loss": 1.0908, "step": 512960 }, { "epoch": 38.500863039399626, "grad_norm": 0.5610063672065735, "learning_rate": 0.002828, "loss": 1.0891, "step": 513024 }, { "epoch": 38.505666041275795, "grad_norm": 0.5976006388664246, "learning_rate": 0.002828, "loss": 1.088, "step": 513088 }, { "epoch": 38.51046904315197, "grad_norm": 0.4549904465675354, "learning_rate": 0.002828, "loss": 1.0861, "step": 513152 }, { "epoch": 38.51527204502814, "grad_norm": 0.7005747556686401, "learning_rate": 0.002828, "loss": 1.0903, "step": 513216 }, { "epoch": 38.52007504690432, "grad_norm": 0.7073675990104675, "learning_rate": 0.002828, "loss": 1.0905, "step": 513280 }, { "epoch": 38.52487804878049, "grad_norm": 0.5825762152671814, "learning_rate": 0.002828, "loss": 1.0887, "step": 513344 }, { "epoch": 38.52968105065666, "grad_norm": 0.6172112822532654, "learning_rate": 0.002828, "loss": 1.0941, "step": 513408 }, { "epoch": 38.53448405253283, "grad_norm": 0.7240038514137268, "learning_rate": 0.002828, "loss": 1.0974, "step": 513472 }, { "epoch": 38.53928705440901, "grad_norm": 0.687610924243927, "learning_rate": 0.002828, "loss": 1.0917, "step": 513536 }, { "epoch": 38.54409005628518, "grad_norm": 0.5815373063087463, "learning_rate": 0.002828, "loss": 1.0937, "step": 513600 }, { "epoch": 38.54889305816135, "grad_norm": 0.5881292223930359, "learning_rate": 0.002828, "loss": 1.088, "step": 513664 }, { "epoch": 38.553696060037524, "grad_norm": 0.5459499359130859, "learning_rate": 0.002828, "loss": 1.0878, "step": 513728 }, { "epoch": 38.55849906191369, "grad_norm": 0.5804657340049744, "learning_rate": 0.002828, "loss": 1.0973, "step": 513792 }, { "epoch": 38.56330206378987, "grad_norm": 0.8041890859603882, "learning_rate": 0.002828, "loss": 1.0948, "step": 513856 }, { "epoch": 38.56810506566604, "grad_norm": 0.5507553815841675, "learning_rate": 0.002828, "loss": 1.0945, "step": 513920 }, { "epoch": 38.572908067542215, "grad_norm": 0.5750390291213989, "learning_rate": 0.002828, "loss": 1.0874, "step": 513984 }, { "epoch": 38.577711069418385, "grad_norm": 0.7084633708000183, "learning_rate": 0.002828, "loss": 1.0955, "step": 514048 }, { "epoch": 38.58251407129456, "grad_norm": 0.538191020488739, "learning_rate": 0.002828, "loss": 1.0871, "step": 514112 }, { "epoch": 38.58731707317073, "grad_norm": 0.5860592126846313, "learning_rate": 0.002828, "loss": 1.0946, "step": 514176 }, { "epoch": 38.59212007504691, "grad_norm": 0.7087304592132568, "learning_rate": 0.002828, "loss": 1.0938, "step": 514240 }, { "epoch": 38.596923076923076, "grad_norm": 0.736056923866272, "learning_rate": 0.002828, "loss": 1.0931, "step": 514304 }, { "epoch": 38.60172607879925, "grad_norm": 0.5794273614883423, "learning_rate": 0.002828, "loss": 1.092, "step": 514368 }, { "epoch": 38.60652908067542, "grad_norm": 0.5700469613075256, "learning_rate": 0.002828, "loss": 1.0928, "step": 514432 }, { "epoch": 38.61133208255159, "grad_norm": 0.8039986491203308, "learning_rate": 0.002828, "loss": 1.0953, "step": 514496 }, { "epoch": 38.61613508442777, "grad_norm": 0.5886857509613037, "learning_rate": 0.002828, "loss": 1.0937, "step": 514560 }, { "epoch": 38.62093808630394, "grad_norm": 0.70937579870224, "learning_rate": 0.002828, "loss": 1.0886, "step": 514624 }, { "epoch": 38.62574108818011, "grad_norm": 0.5150787830352783, "learning_rate": 0.002828, "loss": 1.0991, "step": 514688 }, { "epoch": 38.63054409005628, "grad_norm": 0.5741467475891113, "learning_rate": 0.002828, "loss": 1.1033, "step": 514752 }, { "epoch": 38.63534709193246, "grad_norm": 0.6546338796615601, "learning_rate": 0.002828, "loss": 1.0949, "step": 514816 }, { "epoch": 38.64015009380863, "grad_norm": 0.6613404750823975, "learning_rate": 0.002828, "loss": 1.0983, "step": 514880 }, { "epoch": 38.644953095684805, "grad_norm": 0.6713902950286865, "learning_rate": 0.002828, "loss": 1.0915, "step": 514944 }, { "epoch": 38.649756097560974, "grad_norm": 0.701934814453125, "learning_rate": 0.002828, "loss": 1.0891, "step": 515008 }, { "epoch": 38.65455909943715, "grad_norm": 0.7185537815093994, "learning_rate": 0.002828, "loss": 1.1002, "step": 515072 }, { "epoch": 38.65936210131332, "grad_norm": 0.7131350040435791, "learning_rate": 0.002828, "loss": 1.0986, "step": 515136 }, { "epoch": 38.6641651031895, "grad_norm": 0.5673279762268066, "learning_rate": 0.002828, "loss": 1.0947, "step": 515200 }, { "epoch": 38.668968105065666, "grad_norm": 0.8174933791160583, "learning_rate": 0.002828, "loss": 1.1058, "step": 515264 }, { "epoch": 38.673771106941835, "grad_norm": 0.6347920298576355, "learning_rate": 0.002828, "loss": 1.0964, "step": 515328 }, { "epoch": 38.67857410881801, "grad_norm": 0.630473792552948, "learning_rate": 0.002828, "loss": 1.1003, "step": 515392 }, { "epoch": 38.68337711069418, "grad_norm": 0.7728782296180725, "learning_rate": 0.002828, "loss": 1.0962, "step": 515456 }, { "epoch": 38.68818011257036, "grad_norm": 0.6685201525688171, "learning_rate": 0.002828, "loss": 1.0965, "step": 515520 }, { "epoch": 38.69298311444653, "grad_norm": 0.5990980267524719, "learning_rate": 0.002828, "loss": 1.0971, "step": 515584 }, { "epoch": 38.6977861163227, "grad_norm": 0.6950483918190002, "learning_rate": 0.002828, "loss": 1.1007, "step": 515648 }, { "epoch": 38.70258911819887, "grad_norm": 0.6069904565811157, "learning_rate": 0.002828, "loss": 1.0995, "step": 515712 }, { "epoch": 38.70739212007505, "grad_norm": 0.6790332794189453, "learning_rate": 0.002828, "loss": 1.0931, "step": 515776 }, { "epoch": 38.71219512195122, "grad_norm": 0.6225919127464294, "learning_rate": 0.002828, "loss": 1.0946, "step": 515840 }, { "epoch": 38.716998123827395, "grad_norm": 0.5538632869720459, "learning_rate": 0.002828, "loss": 1.0975, "step": 515904 }, { "epoch": 38.721801125703564, "grad_norm": 0.5822528600692749, "learning_rate": 0.002828, "loss": 1.1019, "step": 515968 }, { "epoch": 38.72660412757974, "grad_norm": 0.5993365049362183, "learning_rate": 0.002828, "loss": 1.099, "step": 516032 }, { "epoch": 38.73140712945591, "grad_norm": 0.5491153001785278, "learning_rate": 0.002828, "loss": 1.0961, "step": 516096 }, { "epoch": 38.73621013133208, "grad_norm": 0.5945103168487549, "learning_rate": 0.002828, "loss": 1.0929, "step": 516160 }, { "epoch": 38.741013133208256, "grad_norm": 0.7646733522415161, "learning_rate": 0.002828, "loss": 1.1043, "step": 516224 }, { "epoch": 38.745816135084425, "grad_norm": 0.5670483112335205, "learning_rate": 0.002828, "loss": 1.0979, "step": 516288 }, { "epoch": 38.7506191369606, "grad_norm": 0.5190681219100952, "learning_rate": 0.002828, "loss": 1.0952, "step": 516352 }, { "epoch": 38.75542213883677, "grad_norm": 0.4930049777030945, "learning_rate": 0.002828, "loss": 1.0968, "step": 516416 }, { "epoch": 38.76022514071295, "grad_norm": 0.5032718777656555, "learning_rate": 0.002828, "loss": 1.1002, "step": 516480 }, { "epoch": 38.76502814258912, "grad_norm": 0.6901083588600159, "learning_rate": 0.002828, "loss": 1.0912, "step": 516544 }, { "epoch": 38.76983114446529, "grad_norm": 0.6587644219398499, "learning_rate": 0.002828, "loss": 1.0992, "step": 516608 }, { "epoch": 38.77463414634146, "grad_norm": 0.5171419382095337, "learning_rate": 0.002828, "loss": 1.0938, "step": 516672 }, { "epoch": 38.77943714821764, "grad_norm": 0.5529362559318542, "learning_rate": 0.002828, "loss": 1.095, "step": 516736 }, { "epoch": 38.78424015009381, "grad_norm": 0.5704380869865417, "learning_rate": 0.002828, "loss": 1.0978, "step": 516800 }, { "epoch": 38.789043151969985, "grad_norm": 0.555026650428772, "learning_rate": 0.002828, "loss": 1.0913, "step": 516864 }, { "epoch": 38.793846153846154, "grad_norm": 0.5735583305358887, "learning_rate": 0.002828, "loss": 1.0955, "step": 516928 }, { "epoch": 38.79864915572232, "grad_norm": 0.625392496585846, "learning_rate": 0.002828, "loss": 1.0995, "step": 516992 }, { "epoch": 38.8034521575985, "grad_norm": 0.6034018397331238, "learning_rate": 0.002828, "loss": 1.0937, "step": 517056 }, { "epoch": 38.80825515947467, "grad_norm": 0.5921566486358643, "learning_rate": 0.002828, "loss": 1.0998, "step": 517120 }, { "epoch": 38.813058161350845, "grad_norm": 0.5109006762504578, "learning_rate": 0.002828, "loss": 1.0951, "step": 517184 }, { "epoch": 38.817861163227015, "grad_norm": 0.6649883985519409, "learning_rate": 0.002828, "loss": 1.102, "step": 517248 }, { "epoch": 38.82266416510319, "grad_norm": 0.662285566329956, "learning_rate": 0.002828, "loss": 1.0959, "step": 517312 }, { "epoch": 38.82746716697936, "grad_norm": 0.5535122156143188, "learning_rate": 0.002828, "loss": 1.1035, "step": 517376 }, { "epoch": 38.83227016885554, "grad_norm": 0.5791530013084412, "learning_rate": 0.002828, "loss": 1.0936, "step": 517440 }, { "epoch": 38.837073170731706, "grad_norm": 0.5605199337005615, "learning_rate": 0.002828, "loss": 1.0933, "step": 517504 }, { "epoch": 38.84187617260788, "grad_norm": 0.6626695394515991, "learning_rate": 0.002828, "loss": 1.103, "step": 517568 }, { "epoch": 38.84667917448405, "grad_norm": 0.5840564966201782, "learning_rate": 0.002828, "loss": 1.0962, "step": 517632 }, { "epoch": 38.85148217636023, "grad_norm": 0.771919846534729, "learning_rate": 0.002828, "loss": 1.1029, "step": 517696 }, { "epoch": 38.8562851782364, "grad_norm": 0.6716858148574829, "learning_rate": 0.002828, "loss": 1.0984, "step": 517760 }, { "epoch": 38.86108818011257, "grad_norm": 0.7528976202011108, "learning_rate": 0.002828, "loss": 1.1002, "step": 517824 }, { "epoch": 38.865891181988744, "grad_norm": 0.6416760087013245, "learning_rate": 0.002828, "loss": 1.0986, "step": 517888 }, { "epoch": 38.87069418386491, "grad_norm": 0.8085233569145203, "learning_rate": 0.002828, "loss": 1.0977, "step": 517952 }, { "epoch": 38.87549718574109, "grad_norm": 0.6201658844947815, "learning_rate": 0.002828, "loss": 1.101, "step": 518016 }, { "epoch": 38.88030018761726, "grad_norm": 0.6434842944145203, "learning_rate": 0.002828, "loss": 1.0945, "step": 518080 }, { "epoch": 38.885103189493435, "grad_norm": 0.6296826601028442, "learning_rate": 0.002828, "loss": 1.0953, "step": 518144 }, { "epoch": 38.889906191369604, "grad_norm": 0.6131699681282043, "learning_rate": 0.002828, "loss": 1.1066, "step": 518208 }, { "epoch": 38.89470919324578, "grad_norm": 0.6386839151382446, "learning_rate": 0.002828, "loss": 1.1016, "step": 518272 }, { "epoch": 38.89951219512195, "grad_norm": 0.5856118202209473, "learning_rate": 0.002828, "loss": 1.1013, "step": 518336 }, { "epoch": 38.90431519699813, "grad_norm": 0.627488374710083, "learning_rate": 0.002828, "loss": 1.1064, "step": 518400 }, { "epoch": 38.909118198874296, "grad_norm": 0.5808975696563721, "learning_rate": 0.002828, "loss": 1.1035, "step": 518464 }, { "epoch": 38.91392120075047, "grad_norm": 0.6418662667274475, "learning_rate": 0.002828, "loss": 1.0998, "step": 518528 }, { "epoch": 38.91872420262664, "grad_norm": 0.7283346652984619, "learning_rate": 0.002828, "loss": 1.0988, "step": 518592 }, { "epoch": 38.92352720450281, "grad_norm": 0.5792503952980042, "learning_rate": 0.002828, "loss": 1.1016, "step": 518656 }, { "epoch": 38.92833020637899, "grad_norm": 0.6570231318473816, "learning_rate": 0.002828, "loss": 1.0969, "step": 518720 }, { "epoch": 38.93313320825516, "grad_norm": 0.5340087413787842, "learning_rate": 0.002828, "loss": 1.1022, "step": 518784 }, { "epoch": 38.93793621013133, "grad_norm": 0.6886125802993774, "learning_rate": 0.002828, "loss": 1.0984, "step": 518848 }, { "epoch": 38.9427392120075, "grad_norm": 0.6742810010910034, "learning_rate": 0.002828, "loss": 1.0961, "step": 518912 }, { "epoch": 38.94754221388368, "grad_norm": 0.5848747491836548, "learning_rate": 0.002828, "loss": 1.1027, "step": 518976 }, { "epoch": 38.95234521575985, "grad_norm": 0.6648167967796326, "learning_rate": 0.002828, "loss": 1.108, "step": 519040 }, { "epoch": 38.957148217636025, "grad_norm": 0.5892636775970459, "learning_rate": 0.002828, "loss": 1.0965, "step": 519104 }, { "epoch": 38.961951219512194, "grad_norm": 0.5304135680198669, "learning_rate": 0.002828, "loss": 1.0996, "step": 519168 }, { "epoch": 38.96675422138837, "grad_norm": 0.5350622534751892, "learning_rate": 0.002828, "loss": 1.1015, "step": 519232 }, { "epoch": 38.97155722326454, "grad_norm": 0.6611760258674622, "learning_rate": 0.002828, "loss": 1.1017, "step": 519296 }, { "epoch": 38.976360225140716, "grad_norm": 0.6533018350601196, "learning_rate": 0.002828, "loss": 1.1043, "step": 519360 }, { "epoch": 38.981163227016886, "grad_norm": 0.6382613182067871, "learning_rate": 0.002828, "loss": 1.1024, "step": 519424 }, { "epoch": 38.985966228893055, "grad_norm": 0.6975395083427429, "learning_rate": 0.002828, "loss": 1.0993, "step": 519488 }, { "epoch": 38.99076923076923, "grad_norm": 0.5687751770019531, "learning_rate": 0.002828, "loss": 1.0989, "step": 519552 }, { "epoch": 38.9955722326454, "grad_norm": 0.5590277314186096, "learning_rate": 0.002828, "loss": 1.1026, "step": 519616 }, { "epoch": 39.00037523452158, "grad_norm": 0.6050010323524475, "learning_rate": 0.002828, "loss": 1.101, "step": 519680 }, { "epoch": 39.00517823639775, "grad_norm": 0.5483032464981079, "learning_rate": 0.002828, "loss": 1.0784, "step": 519744 }, { "epoch": 39.00998123827392, "grad_norm": 0.6067426800727844, "learning_rate": 0.002828, "loss": 1.081, "step": 519808 }, { "epoch": 39.01478424015009, "grad_norm": 0.6473919749259949, "learning_rate": 0.002828, "loss": 1.0773, "step": 519872 }, { "epoch": 39.01958724202627, "grad_norm": 0.6021099090576172, "learning_rate": 0.002828, "loss": 1.0786, "step": 519936 }, { "epoch": 39.02439024390244, "grad_norm": 0.5758794546127319, "learning_rate": 0.002828, "loss": 1.0821, "step": 520000 }, { "epoch": 39.029193245778615, "grad_norm": 0.5767053961753845, "learning_rate": 0.002828, "loss": 1.0775, "step": 520064 }, { "epoch": 39.033996247654784, "grad_norm": 0.6255131959915161, "learning_rate": 0.002828, "loss": 1.0813, "step": 520128 }, { "epoch": 39.03879924953096, "grad_norm": 0.6981056928634644, "learning_rate": 0.002828, "loss": 1.0787, "step": 520192 }, { "epoch": 39.04360225140713, "grad_norm": 0.557799756526947, "learning_rate": 0.002828, "loss": 1.0811, "step": 520256 }, { "epoch": 39.0484052532833, "grad_norm": 0.68275386095047, "learning_rate": 0.002828, "loss": 1.0839, "step": 520320 }, { "epoch": 39.053208255159475, "grad_norm": 0.6308878064155579, "learning_rate": 0.002828, "loss": 1.0849, "step": 520384 }, { "epoch": 39.058011257035645, "grad_norm": 0.5470284819602966, "learning_rate": 0.002828, "loss": 1.0865, "step": 520448 }, { "epoch": 39.06281425891182, "grad_norm": 0.677872359752655, "learning_rate": 0.002828, "loss": 1.0839, "step": 520512 }, { "epoch": 39.06761726078799, "grad_norm": 0.6441925168037415, "learning_rate": 0.002828, "loss": 1.0787, "step": 520576 }, { "epoch": 39.07242026266417, "grad_norm": 0.5155608654022217, "learning_rate": 0.002828, "loss": 1.0854, "step": 520640 }, { "epoch": 39.077223264540336, "grad_norm": 0.6666632294654846, "learning_rate": 0.002828, "loss": 1.0848, "step": 520704 }, { "epoch": 39.08202626641651, "grad_norm": 0.7269572615623474, "learning_rate": 0.002828, "loss": 1.0867, "step": 520768 }, { "epoch": 39.08682926829268, "grad_norm": 0.6180398464202881, "learning_rate": 0.002828, "loss": 1.0839, "step": 520832 }, { "epoch": 39.09163227016886, "grad_norm": 0.5263913869857788, "learning_rate": 0.002828, "loss": 1.0861, "step": 520896 }, { "epoch": 39.09643527204503, "grad_norm": 0.7088015079498291, "learning_rate": 0.002828, "loss": 1.089, "step": 520960 }, { "epoch": 39.1012382739212, "grad_norm": 0.7093296647071838, "learning_rate": 0.002828, "loss": 1.0882, "step": 521024 }, { "epoch": 39.106041275797374, "grad_norm": 0.6128780841827393, "learning_rate": 0.002828, "loss": 1.0845, "step": 521088 }, { "epoch": 39.11084427767354, "grad_norm": 0.6755836606025696, "learning_rate": 0.002828, "loss": 1.0873, "step": 521152 }, { "epoch": 39.11564727954972, "grad_norm": 0.616104245185852, "learning_rate": 0.002828, "loss": 1.0891, "step": 521216 }, { "epoch": 39.12045028142589, "grad_norm": 0.6191969513893127, "learning_rate": 0.002828, "loss": 1.0901, "step": 521280 }, { "epoch": 39.125253283302065, "grad_norm": 0.4933353066444397, "learning_rate": 0.002828, "loss": 1.0842, "step": 521344 }, { "epoch": 39.130056285178235, "grad_norm": 0.5262903571128845, "learning_rate": 0.002828, "loss": 1.0935, "step": 521408 }, { "epoch": 39.13485928705441, "grad_norm": 0.7534499168395996, "learning_rate": 0.002828, "loss": 1.0906, "step": 521472 }, { "epoch": 39.13966228893058, "grad_norm": 0.6676933765411377, "learning_rate": 0.002828, "loss": 1.0896, "step": 521536 }, { "epoch": 39.14446529080676, "grad_norm": 0.5996463894844055, "learning_rate": 0.002828, "loss": 1.0909, "step": 521600 }, { "epoch": 39.149268292682926, "grad_norm": 0.566777229309082, "learning_rate": 0.002828, "loss": 1.083, "step": 521664 }, { "epoch": 39.1540712945591, "grad_norm": 0.560036301612854, "learning_rate": 0.002828, "loss": 1.0947, "step": 521728 }, { "epoch": 39.15887429643527, "grad_norm": 0.49662432074546814, "learning_rate": 0.002828, "loss": 1.0925, "step": 521792 }, { "epoch": 39.16367729831144, "grad_norm": 0.6516875624656677, "learning_rate": 0.002828, "loss": 1.0967, "step": 521856 }, { "epoch": 39.16848030018762, "grad_norm": 0.6191356778144836, "learning_rate": 0.002828, "loss": 1.0933, "step": 521920 }, { "epoch": 39.17328330206379, "grad_norm": 0.8029376268386841, "learning_rate": 0.002828, "loss": 1.0939, "step": 521984 }, { "epoch": 39.17808630393996, "grad_norm": 0.7398802638053894, "learning_rate": 0.002828, "loss": 1.0931, "step": 522048 }, { "epoch": 39.18288930581613, "grad_norm": 0.5711380839347839, "learning_rate": 0.002828, "loss": 1.0872, "step": 522112 }, { "epoch": 39.18769230769231, "grad_norm": 0.5394037365913391, "learning_rate": 0.002828, "loss": 1.0941, "step": 522176 }, { "epoch": 39.19249530956848, "grad_norm": 0.7376382350921631, "learning_rate": 0.002828, "loss": 1.0896, "step": 522240 }, { "epoch": 39.197298311444655, "grad_norm": 0.6365683078765869, "learning_rate": 0.002828, "loss": 1.0917, "step": 522304 }, { "epoch": 39.202101313320824, "grad_norm": 0.7121477127075195, "learning_rate": 0.002828, "loss": 1.0907, "step": 522368 }, { "epoch": 39.206904315197, "grad_norm": 0.5471106171607971, "learning_rate": 0.002828, "loss": 1.0913, "step": 522432 }, { "epoch": 39.21170731707317, "grad_norm": 0.7246633172035217, "learning_rate": 0.002828, "loss": 1.0991, "step": 522496 }, { "epoch": 39.21651031894935, "grad_norm": 0.6447710990905762, "learning_rate": 0.002828, "loss": 1.0955, "step": 522560 }, { "epoch": 39.221313320825516, "grad_norm": 0.6338580250740051, "learning_rate": 0.002828, "loss": 1.088, "step": 522624 }, { "epoch": 39.226116322701685, "grad_norm": 0.539563775062561, "learning_rate": 0.002828, "loss": 1.0885, "step": 522688 }, { "epoch": 39.23091932457786, "grad_norm": 0.6259217262268066, "learning_rate": 0.002828, "loss": 1.0964, "step": 522752 }, { "epoch": 39.23572232645403, "grad_norm": 0.658747673034668, "learning_rate": 0.002828, "loss": 1.0877, "step": 522816 }, { "epoch": 39.24052532833021, "grad_norm": 0.9338593482971191, "learning_rate": 0.002828, "loss": 1.0912, "step": 522880 }, { "epoch": 39.24532833020638, "grad_norm": 0.6139676570892334, "learning_rate": 0.002828, "loss": 1.0891, "step": 522944 }, { "epoch": 39.25013133208255, "grad_norm": 0.5953230857849121, "learning_rate": 0.002828, "loss": 1.0909, "step": 523008 }, { "epoch": 39.25493433395872, "grad_norm": 0.607876718044281, "learning_rate": 0.002828, "loss": 1.0846, "step": 523072 }, { "epoch": 39.2597373358349, "grad_norm": 0.6370177268981934, "learning_rate": 0.002828, "loss": 1.0923, "step": 523136 }, { "epoch": 39.26454033771107, "grad_norm": 0.5965909957885742, "learning_rate": 0.002828, "loss": 1.0905, "step": 523200 }, { "epoch": 39.269343339587245, "grad_norm": 0.5890311002731323, "learning_rate": 0.002828, "loss": 1.0908, "step": 523264 }, { "epoch": 39.274146341463414, "grad_norm": 0.5888711214065552, "learning_rate": 0.002828, "loss": 1.0938, "step": 523328 }, { "epoch": 39.27894934333959, "grad_norm": 0.634347140789032, "learning_rate": 0.002828, "loss": 1.0916, "step": 523392 }, { "epoch": 39.28375234521576, "grad_norm": 0.6978724002838135, "learning_rate": 0.002828, "loss": 1.0919, "step": 523456 }, { "epoch": 39.28855534709193, "grad_norm": 0.5859372615814209, "learning_rate": 0.002828, "loss": 1.0921, "step": 523520 }, { "epoch": 39.293358348968106, "grad_norm": 0.6379390954971313, "learning_rate": 0.002828, "loss": 1.0935, "step": 523584 }, { "epoch": 39.298161350844275, "grad_norm": 0.6283619403839111, "learning_rate": 0.002828, "loss": 1.0962, "step": 523648 }, { "epoch": 39.30296435272045, "grad_norm": 0.7185676693916321, "learning_rate": 0.002828, "loss": 1.0929, "step": 523712 }, { "epoch": 39.30776735459662, "grad_norm": 0.5743740200996399, "learning_rate": 0.002828, "loss": 1.0989, "step": 523776 }, { "epoch": 39.3125703564728, "grad_norm": 0.5320876836776733, "learning_rate": 0.002828, "loss": 1.0939, "step": 523840 }, { "epoch": 39.317373358348966, "grad_norm": 0.6361159682273865, "learning_rate": 0.002828, "loss": 1.0963, "step": 523904 }, { "epoch": 39.32217636022514, "grad_norm": 0.6137511730194092, "learning_rate": 0.002828, "loss": 1.0893, "step": 523968 }, { "epoch": 39.32697936210131, "grad_norm": 0.5668014883995056, "learning_rate": 0.002828, "loss": 1.0992, "step": 524032 }, { "epoch": 39.33178236397749, "grad_norm": 0.8398164510726929, "learning_rate": 0.002828, "loss": 1.0974, "step": 524096 }, { "epoch": 39.33658536585366, "grad_norm": 0.6183857917785645, "learning_rate": 0.002828, "loss": 1.0904, "step": 524160 }, { "epoch": 39.341388367729834, "grad_norm": 0.5952325463294983, "learning_rate": 0.002828, "loss": 1.0937, "step": 524224 }, { "epoch": 39.346191369606004, "grad_norm": 0.6355870366096497, "learning_rate": 0.002828, "loss": 1.0983, "step": 524288 }, { "epoch": 39.35099437148217, "grad_norm": 0.6765314936637878, "learning_rate": 0.002828, "loss": 1.0876, "step": 524352 }, { "epoch": 39.35579737335835, "grad_norm": 0.6968716979026794, "learning_rate": 0.002828, "loss": 1.095, "step": 524416 }, { "epoch": 39.36060037523452, "grad_norm": 0.6691702008247375, "learning_rate": 0.002828, "loss": 1.0964, "step": 524480 }, { "epoch": 39.365403377110695, "grad_norm": 0.5527083873748779, "learning_rate": 0.002828, "loss": 1.0981, "step": 524544 }, { "epoch": 39.370206378986865, "grad_norm": 0.6122605800628662, "learning_rate": 0.002828, "loss": 1.0969, "step": 524608 }, { "epoch": 39.37500938086304, "grad_norm": 0.5258229374885559, "learning_rate": 0.002828, "loss": 1.0961, "step": 524672 }, { "epoch": 39.37981238273921, "grad_norm": 0.7176069021224976, "learning_rate": 0.002828, "loss": 1.0971, "step": 524736 }, { "epoch": 39.38461538461539, "grad_norm": 0.544308066368103, "learning_rate": 0.002828, "loss": 1.0941, "step": 524800 }, { "epoch": 39.389418386491556, "grad_norm": 0.6387256383895874, "learning_rate": 0.002828, "loss": 1.0963, "step": 524864 }, { "epoch": 39.39422138836773, "grad_norm": 0.6066888570785522, "learning_rate": 0.002828, "loss": 1.0975, "step": 524928 }, { "epoch": 39.3990243902439, "grad_norm": 0.6804490089416504, "learning_rate": 0.002828, "loss": 1.1012, "step": 524992 }, { "epoch": 39.40382739212008, "grad_norm": 0.6256688237190247, "learning_rate": 0.002828, "loss": 1.0984, "step": 525056 }, { "epoch": 39.40863039399625, "grad_norm": 0.5704160332679749, "learning_rate": 0.002828, "loss": 1.0965, "step": 525120 }, { "epoch": 39.41343339587242, "grad_norm": 0.643605649471283, "learning_rate": 0.002828, "loss": 1.0993, "step": 525184 }, { "epoch": 39.41823639774859, "grad_norm": 0.691459059715271, "learning_rate": 0.002828, "loss": 1.0889, "step": 525248 }, { "epoch": 39.42303939962476, "grad_norm": 0.5297619104385376, "learning_rate": 0.002828, "loss": 1.0973, "step": 525312 }, { "epoch": 39.42784240150094, "grad_norm": 0.650749921798706, "learning_rate": 0.002828, "loss": 1.1024, "step": 525376 }, { "epoch": 39.43264540337711, "grad_norm": 0.6719233393669128, "learning_rate": 0.002828, "loss": 1.1037, "step": 525440 }, { "epoch": 39.437448405253285, "grad_norm": 0.7150486707687378, "learning_rate": 0.002828, "loss": 1.0935, "step": 525504 }, { "epoch": 39.442251407129454, "grad_norm": 0.5051080584526062, "learning_rate": 0.002828, "loss": 1.1025, "step": 525568 }, { "epoch": 39.44705440900563, "grad_norm": 0.5855237245559692, "learning_rate": 0.002828, "loss": 1.0889, "step": 525632 }, { "epoch": 39.4518574108818, "grad_norm": 0.561397135257721, "learning_rate": 0.002828, "loss": 1.0973, "step": 525696 }, { "epoch": 39.45666041275798, "grad_norm": 0.6438418626785278, "learning_rate": 0.002828, "loss": 1.104, "step": 525760 }, { "epoch": 39.461463414634146, "grad_norm": 0.6153573989868164, "learning_rate": 0.002828, "loss": 1.0989, "step": 525824 }, { "epoch": 39.46626641651032, "grad_norm": 0.6531253457069397, "learning_rate": 0.002828, "loss": 1.0977, "step": 525888 }, { "epoch": 39.47106941838649, "grad_norm": 0.7286359667778015, "learning_rate": 0.002828, "loss": 1.0986, "step": 525952 }, { "epoch": 39.47587242026266, "grad_norm": 0.5893198847770691, "learning_rate": 0.002828, "loss": 1.101, "step": 526016 }, { "epoch": 39.48067542213884, "grad_norm": 0.5942003726959229, "learning_rate": 0.002828, "loss": 1.1037, "step": 526080 }, { "epoch": 39.48547842401501, "grad_norm": 0.5884833335876465, "learning_rate": 0.002828, "loss": 1.0984, "step": 526144 }, { "epoch": 39.49028142589118, "grad_norm": 0.6396442651748657, "learning_rate": 0.002828, "loss": 1.0984, "step": 526208 }, { "epoch": 39.49508442776735, "grad_norm": 0.664161205291748, "learning_rate": 0.002828, "loss": 1.103, "step": 526272 }, { "epoch": 39.49988742964353, "grad_norm": 0.6068063974380493, "learning_rate": 0.002828, "loss": 1.098, "step": 526336 }, { "epoch": 39.5046904315197, "grad_norm": 0.6167985200881958, "learning_rate": 0.002828, "loss": 1.1029, "step": 526400 }, { "epoch": 39.509493433395875, "grad_norm": 0.6410574316978455, "learning_rate": 0.002828, "loss": 1.1036, "step": 526464 }, { "epoch": 39.514296435272044, "grad_norm": 0.5695111751556396, "learning_rate": 0.002828, "loss": 1.1009, "step": 526528 }, { "epoch": 39.51909943714822, "grad_norm": 0.8604126572608948, "learning_rate": 0.002828, "loss": 1.1033, "step": 526592 }, { "epoch": 39.52390243902439, "grad_norm": 0.6506824493408203, "learning_rate": 0.002828, "loss": 1.0995, "step": 526656 }, { "epoch": 39.528705440900566, "grad_norm": 0.5646496415138245, "learning_rate": 0.002828, "loss": 1.0983, "step": 526720 }, { "epoch": 39.533508442776736, "grad_norm": 0.7363172769546509, "learning_rate": 0.002828, "loss": 1.1057, "step": 526784 }, { "epoch": 39.538311444652905, "grad_norm": 0.598517894744873, "learning_rate": 0.002828, "loss": 1.1013, "step": 526848 }, { "epoch": 39.54311444652908, "grad_norm": 0.8225451111793518, "learning_rate": 0.002828, "loss": 1.1021, "step": 526912 }, { "epoch": 39.54791744840525, "grad_norm": 0.5679796934127808, "learning_rate": 0.002828, "loss": 1.0973, "step": 526976 }, { "epoch": 39.55272045028143, "grad_norm": 0.5201491117477417, "learning_rate": 0.002828, "loss": 1.099, "step": 527040 }, { "epoch": 39.5575234521576, "grad_norm": 0.6762983202934265, "learning_rate": 0.002828, "loss": 1.1049, "step": 527104 }, { "epoch": 39.56232645403377, "grad_norm": 0.7534833550453186, "learning_rate": 0.002828, "loss": 1.104, "step": 527168 }, { "epoch": 39.56712945590994, "grad_norm": 0.6385214328765869, "learning_rate": 0.002828, "loss": 1.1013, "step": 527232 }, { "epoch": 39.57193245778612, "grad_norm": 0.59105384349823, "learning_rate": 0.002828, "loss": 1.1059, "step": 527296 }, { "epoch": 39.57673545966229, "grad_norm": 0.6427984237670898, "learning_rate": 0.002828, "loss": 1.1064, "step": 527360 }, { "epoch": 39.581538461538464, "grad_norm": 0.7744491696357727, "learning_rate": 0.002828, "loss": 1.1046, "step": 527424 }, { "epoch": 39.586341463414634, "grad_norm": 0.49544790387153625, "learning_rate": 0.002828, "loss": 1.1031, "step": 527488 }, { "epoch": 39.59114446529081, "grad_norm": 0.6462889909744263, "learning_rate": 0.002828, "loss": 1.099, "step": 527552 }, { "epoch": 39.59594746716698, "grad_norm": 0.7103201150894165, "learning_rate": 0.002828, "loss": 1.1006, "step": 527616 }, { "epoch": 39.60075046904315, "grad_norm": 0.5468045473098755, "learning_rate": 0.002828, "loss": 1.1072, "step": 527680 }, { "epoch": 39.605553470919325, "grad_norm": 0.5340071320533752, "learning_rate": 0.002828, "loss": 1.1011, "step": 527744 }, { "epoch": 39.610356472795495, "grad_norm": 0.6542819142341614, "learning_rate": 0.002828, "loss": 1.1011, "step": 527808 }, { "epoch": 39.61515947467167, "grad_norm": 0.6813800930976868, "learning_rate": 0.002828, "loss": 1.1031, "step": 527872 }, { "epoch": 39.61996247654784, "grad_norm": 0.7469828724861145, "learning_rate": 0.002828, "loss": 1.0996, "step": 527936 }, { "epoch": 39.62476547842402, "grad_norm": 0.7847934365272522, "learning_rate": 0.002828, "loss": 1.0987, "step": 528000 }, { "epoch": 39.629568480300186, "grad_norm": 0.5818659067153931, "learning_rate": 0.002828, "loss": 1.099, "step": 528064 }, { "epoch": 39.63437148217636, "grad_norm": 0.6419061422348022, "learning_rate": 0.002828, "loss": 1.1005, "step": 528128 }, { "epoch": 39.63917448405253, "grad_norm": 0.6728861927986145, "learning_rate": 0.002828, "loss": 1.1004, "step": 528192 }, { "epoch": 39.64397748592871, "grad_norm": 0.645656943321228, "learning_rate": 0.002828, "loss": 1.101, "step": 528256 }, { "epoch": 39.64878048780488, "grad_norm": 0.5696425437927246, "learning_rate": 0.002828, "loss": 1.1031, "step": 528320 }, { "epoch": 39.653583489681054, "grad_norm": 1.0262229442596436, "learning_rate": 0.002828, "loss": 1.1, "step": 528384 }, { "epoch": 39.65838649155722, "grad_norm": 0.6700289249420166, "learning_rate": 0.002828, "loss": 1.1002, "step": 528448 }, { "epoch": 39.66318949343339, "grad_norm": 0.5933250188827515, "learning_rate": 0.002828, "loss": 1.0997, "step": 528512 }, { "epoch": 39.66799249530957, "grad_norm": 0.5513843297958374, "learning_rate": 0.002828, "loss": 1.1087, "step": 528576 }, { "epoch": 39.67279549718574, "grad_norm": 0.632848858833313, "learning_rate": 0.002828, "loss": 1.1055, "step": 528640 }, { "epoch": 39.677598499061915, "grad_norm": 0.6517565846443176, "learning_rate": 0.002828, "loss": 1.0998, "step": 528704 }, { "epoch": 39.682401500938084, "grad_norm": 0.5785037875175476, "learning_rate": 0.002828, "loss": 1.0996, "step": 528768 }, { "epoch": 39.68720450281426, "grad_norm": 0.6588200926780701, "learning_rate": 0.002828, "loss": 1.1026, "step": 528832 }, { "epoch": 39.69200750469043, "grad_norm": 0.5650362968444824, "learning_rate": 0.002828, "loss": 1.106, "step": 528896 }, { "epoch": 39.69681050656661, "grad_norm": 0.6296866536140442, "learning_rate": 0.002828, "loss": 1.1033, "step": 528960 }, { "epoch": 39.701613508442776, "grad_norm": 0.7417605519294739, "learning_rate": 0.002828, "loss": 1.109, "step": 529024 }, { "epoch": 39.70641651031895, "grad_norm": 0.5952714681625366, "learning_rate": 0.002828, "loss": 1.1063, "step": 529088 }, { "epoch": 39.71121951219512, "grad_norm": 0.5962236523628235, "learning_rate": 0.002828, "loss": 1.102, "step": 529152 }, { "epoch": 39.7160225140713, "grad_norm": 0.6925379633903503, "learning_rate": 0.002828, "loss": 1.1073, "step": 529216 }, { "epoch": 39.72082551594747, "grad_norm": 0.5629957914352417, "learning_rate": 0.002828, "loss": 1.1037, "step": 529280 }, { "epoch": 39.72562851782364, "grad_norm": 0.6818074584007263, "learning_rate": 0.002828, "loss": 1.1017, "step": 529344 }, { "epoch": 39.73043151969981, "grad_norm": 0.8665837049484253, "learning_rate": 0.002828, "loss": 1.1023, "step": 529408 }, { "epoch": 39.73523452157598, "grad_norm": 0.608192503452301, "learning_rate": 0.002828, "loss": 1.1088, "step": 529472 }, { "epoch": 39.74003752345216, "grad_norm": 0.6119167804718018, "learning_rate": 0.002828, "loss": 1.1112, "step": 529536 }, { "epoch": 39.74484052532833, "grad_norm": 0.5534200072288513, "learning_rate": 0.002828, "loss": 1.1018, "step": 529600 }, { "epoch": 39.749643527204505, "grad_norm": 0.5940262675285339, "learning_rate": 0.002828, "loss": 1.1045, "step": 529664 }, { "epoch": 39.754446529080674, "grad_norm": 0.611477792263031, "learning_rate": 0.002828, "loss": 1.1031, "step": 529728 }, { "epoch": 39.75924953095685, "grad_norm": 0.7395573854446411, "learning_rate": 0.002828, "loss": 1.0994, "step": 529792 }, { "epoch": 39.76405253283302, "grad_norm": 0.6626569628715515, "learning_rate": 0.002828, "loss": 1.1132, "step": 529856 }, { "epoch": 39.768855534709196, "grad_norm": 0.596015989780426, "learning_rate": 0.002828, "loss": 1.1109, "step": 529920 }, { "epoch": 39.773658536585366, "grad_norm": 0.6616296768188477, "learning_rate": 0.002828, "loss": 1.1065, "step": 529984 }, { "epoch": 39.778461538461535, "grad_norm": 0.6416288614273071, "learning_rate": 0.002828, "loss": 1.1039, "step": 530048 }, { "epoch": 39.78326454033771, "grad_norm": 0.6856739521026611, "learning_rate": 0.002828, "loss": 1.1038, "step": 530112 }, { "epoch": 39.78806754221388, "grad_norm": 0.5775629878044128, "learning_rate": 0.002828, "loss": 1.1103, "step": 530176 }, { "epoch": 39.79287054409006, "grad_norm": 0.8429564237594604, "learning_rate": 0.002828, "loss": 1.1096, "step": 530240 }, { "epoch": 39.79767354596623, "grad_norm": 0.571973443031311, "learning_rate": 0.002828, "loss": 1.1081, "step": 530304 }, { "epoch": 39.8024765478424, "grad_norm": 0.9216853380203247, "learning_rate": 0.002828, "loss": 1.1028, "step": 530368 }, { "epoch": 39.80727954971857, "grad_norm": 0.6850279569625854, "learning_rate": 0.002828, "loss": 1.1081, "step": 530432 }, { "epoch": 39.81208255159475, "grad_norm": 0.5708699226379395, "learning_rate": 0.002828, "loss": 1.1084, "step": 530496 }, { "epoch": 39.81688555347092, "grad_norm": 0.655859112739563, "learning_rate": 0.002828, "loss": 1.1068, "step": 530560 }, { "epoch": 39.821688555347095, "grad_norm": 0.5955530405044556, "learning_rate": 0.002828, "loss": 1.1093, "step": 530624 }, { "epoch": 39.826491557223264, "grad_norm": 0.587763249874115, "learning_rate": 0.002828, "loss": 1.1067, "step": 530688 }, { "epoch": 39.83129455909944, "grad_norm": 0.6162307858467102, "learning_rate": 0.002828, "loss": 1.1025, "step": 530752 }, { "epoch": 39.83609756097561, "grad_norm": 0.6205756664276123, "learning_rate": 0.002828, "loss": 1.1049, "step": 530816 }, { "epoch": 39.84090056285178, "grad_norm": 0.5409461855888367, "learning_rate": 0.002828, "loss": 1.1084, "step": 530880 }, { "epoch": 39.845703564727955, "grad_norm": 1.0385624170303345, "learning_rate": 0.002828, "loss": 1.1074, "step": 530944 }, { "epoch": 39.850506566604125, "grad_norm": 0.6885086297988892, "learning_rate": 0.002828, "loss": 1.113, "step": 531008 }, { "epoch": 39.8553095684803, "grad_norm": 0.694943904876709, "learning_rate": 0.002828, "loss": 1.11, "step": 531072 }, { "epoch": 39.86011257035647, "grad_norm": 0.7824503779411316, "learning_rate": 0.002828, "loss": 1.1064, "step": 531136 }, { "epoch": 39.86491557223265, "grad_norm": 0.562761127948761, "learning_rate": 0.002828, "loss": 1.1019, "step": 531200 }, { "epoch": 39.869718574108816, "grad_norm": 0.5979791283607483, "learning_rate": 0.002828, "loss": 1.1135, "step": 531264 }, { "epoch": 39.87452157598499, "grad_norm": 0.5886710286140442, "learning_rate": 0.002828, "loss": 1.1056, "step": 531328 }, { "epoch": 39.87932457786116, "grad_norm": 0.6354160904884338, "learning_rate": 0.002828, "loss": 1.1073, "step": 531392 }, { "epoch": 39.88412757973734, "grad_norm": 0.7226580381393433, "learning_rate": 0.002828, "loss": 1.1098, "step": 531456 }, { "epoch": 39.88893058161351, "grad_norm": 0.6297502517700195, "learning_rate": 0.002828, "loss": 1.1076, "step": 531520 }, { "epoch": 39.893733583489684, "grad_norm": 0.5878957509994507, "learning_rate": 0.002828, "loss": 1.1136, "step": 531584 }, { "epoch": 39.898536585365854, "grad_norm": 0.7568423748016357, "learning_rate": 0.002828, "loss": 1.1085, "step": 531648 }, { "epoch": 39.90333958724202, "grad_norm": 0.5480137467384338, "learning_rate": 0.002828, "loss": 1.1044, "step": 531712 }, { "epoch": 39.9081425891182, "grad_norm": 0.5894506573677063, "learning_rate": 0.002828, "loss": 1.1104, "step": 531776 }, { "epoch": 39.91294559099437, "grad_norm": 0.6330196261405945, "learning_rate": 0.002828, "loss": 1.1087, "step": 531840 }, { "epoch": 39.917748592870545, "grad_norm": 0.7971309423446655, "learning_rate": 0.002828, "loss": 1.106, "step": 531904 }, { "epoch": 39.922551594746714, "grad_norm": 0.6329578757286072, "learning_rate": 0.002828, "loss": 1.1129, "step": 531968 }, { "epoch": 39.92735459662289, "grad_norm": 0.5038153529167175, "learning_rate": 0.002828, "loss": 1.1095, "step": 532032 }, { "epoch": 39.93215759849906, "grad_norm": 0.6094999313354492, "learning_rate": 0.002828, "loss": 1.1125, "step": 532096 }, { "epoch": 39.93696060037524, "grad_norm": 0.5079559087753296, "learning_rate": 0.002828, "loss": 1.1029, "step": 532160 }, { "epoch": 39.941763602251406, "grad_norm": 0.55616694688797, "learning_rate": 0.002828, "loss": 1.1044, "step": 532224 }, { "epoch": 39.94656660412758, "grad_norm": 0.5412519574165344, "learning_rate": 0.002828, "loss": 1.1061, "step": 532288 }, { "epoch": 39.95136960600375, "grad_norm": 0.6757495403289795, "learning_rate": 0.002828, "loss": 1.1099, "step": 532352 }, { "epoch": 39.95617260787993, "grad_norm": 0.6678786873817444, "learning_rate": 0.002828, "loss": 1.1036, "step": 532416 }, { "epoch": 39.9609756097561, "grad_norm": 0.6607096791267395, "learning_rate": 0.002828, "loss": 1.1037, "step": 532480 }, { "epoch": 39.96577861163227, "grad_norm": 0.5916298031806946, "learning_rate": 0.002828, "loss": 1.1094, "step": 532544 }, { "epoch": 39.97058161350844, "grad_norm": 0.6358587145805359, "learning_rate": 0.002828, "loss": 1.1145, "step": 532608 }, { "epoch": 39.97538461538461, "grad_norm": 0.6469662189483643, "learning_rate": 0.002828, "loss": 1.1119, "step": 532672 }, { "epoch": 39.98018761726079, "grad_norm": 0.6851943731307983, "learning_rate": 0.002828, "loss": 1.1107, "step": 532736 }, { "epoch": 39.98499061913696, "grad_norm": 0.6589726209640503, "learning_rate": 0.002828, "loss": 1.1127, "step": 532800 }, { "epoch": 39.989793621013135, "grad_norm": 0.5190383791923523, "learning_rate": 0.002828, "loss": 1.1059, "step": 532864 }, { "epoch": 39.994596622889304, "grad_norm": 0.5913875699043274, "learning_rate": 0.002828, "loss": 1.107, "step": 532928 }, { "epoch": 39.99939962476548, "grad_norm": 0.6283708810806274, "learning_rate": 0.002828, "loss": 1.1123, "step": 532992 }, { "epoch": 40.00420262664165, "grad_norm": 0.5936568379402161, "learning_rate": 0.002828, "loss": 1.0796, "step": 533056 }, { "epoch": 40.009005628517826, "grad_norm": 0.6228441596031189, "learning_rate": 0.002828, "loss": 1.0738, "step": 533120 }, { "epoch": 40.013808630393996, "grad_norm": 0.5124711394309998, "learning_rate": 0.002828, "loss": 1.0717, "step": 533184 }, { "epoch": 40.01861163227017, "grad_norm": 0.7940961718559265, "learning_rate": 0.002828, "loss": 1.0734, "step": 533248 }, { "epoch": 40.02341463414634, "grad_norm": 0.5992420315742493, "learning_rate": 0.002828, "loss": 1.0736, "step": 533312 }, { "epoch": 40.02821763602251, "grad_norm": 0.5343099236488342, "learning_rate": 0.002828, "loss": 1.071, "step": 533376 }, { "epoch": 40.03302063789869, "grad_norm": 0.5241178274154663, "learning_rate": 0.002828, "loss": 1.074, "step": 533440 }, { "epoch": 40.03782363977486, "grad_norm": 0.7192960977554321, "learning_rate": 0.002828, "loss": 1.0745, "step": 533504 }, { "epoch": 40.04262664165103, "grad_norm": 0.7904562950134277, "learning_rate": 0.002828, "loss": 1.0774, "step": 533568 }, { "epoch": 40.0474296435272, "grad_norm": 0.760181725025177, "learning_rate": 0.002828, "loss": 1.0721, "step": 533632 }, { "epoch": 40.05223264540338, "grad_norm": 0.5834547877311707, "learning_rate": 0.002828, "loss": 1.074, "step": 533696 }, { "epoch": 40.05703564727955, "grad_norm": 0.5778083205223083, "learning_rate": 0.002828, "loss": 1.0751, "step": 533760 }, { "epoch": 40.061838649155725, "grad_norm": 0.6584703326225281, "learning_rate": 0.002828, "loss": 1.0745, "step": 533824 }, { "epoch": 40.066641651031894, "grad_norm": 0.5253800749778748, "learning_rate": 0.002828, "loss": 1.0711, "step": 533888 }, { "epoch": 40.07144465290807, "grad_norm": 0.49245938658714294, "learning_rate": 0.002828, "loss": 1.0791, "step": 533952 }, { "epoch": 40.07624765478424, "grad_norm": 0.6337112188339233, "learning_rate": 0.002828, "loss": 1.0807, "step": 534016 }, { "epoch": 40.081050656660416, "grad_norm": 0.7335975170135498, "learning_rate": 0.002828, "loss": 1.0733, "step": 534080 }, { "epoch": 40.085853658536585, "grad_norm": 0.5771627426147461, "learning_rate": 0.002828, "loss": 1.0753, "step": 534144 }, { "epoch": 40.090656660412755, "grad_norm": 0.6392300724983215, "learning_rate": 0.002828, "loss": 1.081, "step": 534208 }, { "epoch": 40.09545966228893, "grad_norm": 0.5753986239433289, "learning_rate": 0.002828, "loss": 1.0789, "step": 534272 }, { "epoch": 40.1002626641651, "grad_norm": 0.5396976470947266, "learning_rate": 0.002828, "loss": 1.0792, "step": 534336 }, { "epoch": 40.10506566604128, "grad_norm": 0.621372401714325, "learning_rate": 0.002828, "loss": 1.079, "step": 534400 }, { "epoch": 40.109868667917446, "grad_norm": 0.6752663850784302, "learning_rate": 0.002828, "loss": 1.081, "step": 534464 }, { "epoch": 40.11467166979362, "grad_norm": 0.6094245910644531, "learning_rate": 0.002828, "loss": 1.0864, "step": 534528 }, { "epoch": 40.11947467166979, "grad_norm": 0.6214587092399597, "learning_rate": 0.002828, "loss": 1.0774, "step": 534592 }, { "epoch": 40.12427767354597, "grad_norm": 0.6940475106239319, "learning_rate": 0.002828, "loss": 1.0802, "step": 534656 }, { "epoch": 40.12908067542214, "grad_norm": 0.5746902823448181, "learning_rate": 0.002828, "loss": 1.079, "step": 534720 }, { "epoch": 40.133883677298314, "grad_norm": 0.7482483983039856, "learning_rate": 0.002828, "loss": 1.0785, "step": 534784 }, { "epoch": 40.138686679174484, "grad_norm": 0.6373165845870972, "learning_rate": 0.002828, "loss": 1.0808, "step": 534848 }, { "epoch": 40.14348968105066, "grad_norm": 0.6416199207305908, "learning_rate": 0.002828, "loss": 1.08, "step": 534912 }, { "epoch": 40.14829268292683, "grad_norm": 0.7983708381652832, "learning_rate": 0.002828, "loss": 1.0809, "step": 534976 }, { "epoch": 40.153095684803, "grad_norm": 0.6383090615272522, "learning_rate": 0.002828, "loss": 1.0832, "step": 535040 }, { "epoch": 40.157898686679175, "grad_norm": 0.5542482733726501, "learning_rate": 0.002828, "loss": 1.0791, "step": 535104 }, { "epoch": 40.162701688555345, "grad_norm": 0.5444024801254272, "learning_rate": 0.002828, "loss": 1.0781, "step": 535168 }, { "epoch": 40.16750469043152, "grad_norm": 0.5228294134140015, "learning_rate": 0.002828, "loss": 1.0819, "step": 535232 }, { "epoch": 40.17230769230769, "grad_norm": 0.5700907707214355, "learning_rate": 0.002828, "loss": 1.084, "step": 535296 }, { "epoch": 40.17711069418387, "grad_norm": 0.7073768377304077, "learning_rate": 0.002828, "loss": 1.0811, "step": 535360 }, { "epoch": 40.181913696060036, "grad_norm": 0.5951655507087708, "learning_rate": 0.002828, "loss": 1.0865, "step": 535424 }, { "epoch": 40.18671669793621, "grad_norm": 0.5553143620491028, "learning_rate": 0.002828, "loss": 1.0865, "step": 535488 }, { "epoch": 40.19151969981238, "grad_norm": 0.6570543646812439, "learning_rate": 0.002828, "loss": 1.084, "step": 535552 }, { "epoch": 40.19632270168856, "grad_norm": 0.555578887462616, "learning_rate": 0.002828, "loss": 1.0806, "step": 535616 }, { "epoch": 40.20112570356473, "grad_norm": 0.5957068800926208, "learning_rate": 0.002828, "loss": 1.0834, "step": 535680 }, { "epoch": 40.205928705440904, "grad_norm": 0.660434365272522, "learning_rate": 0.002828, "loss": 1.0789, "step": 535744 }, { "epoch": 40.21073170731707, "grad_norm": 0.616719126701355, "learning_rate": 0.002828, "loss": 1.0808, "step": 535808 }, { "epoch": 40.21553470919324, "grad_norm": 0.7140281200408936, "learning_rate": 0.002828, "loss": 1.0857, "step": 535872 }, { "epoch": 40.22033771106942, "grad_norm": 0.7330237627029419, "learning_rate": 0.002828, "loss": 1.0897, "step": 535936 }, { "epoch": 40.22514071294559, "grad_norm": 0.6709555387496948, "learning_rate": 0.002828, "loss": 1.0825, "step": 536000 }, { "epoch": 40.229943714821765, "grad_norm": 0.591925859451294, "learning_rate": 0.002828, "loss": 1.0851, "step": 536064 }, { "epoch": 40.234746716697934, "grad_norm": 0.520167350769043, "learning_rate": 0.002828, "loss": 1.079, "step": 536128 }, { "epoch": 40.23954971857411, "grad_norm": 0.5892237424850464, "learning_rate": 0.002828, "loss": 1.0828, "step": 536192 }, { "epoch": 40.24435272045028, "grad_norm": 0.5957344770431519, "learning_rate": 0.002828, "loss": 1.0843, "step": 536256 }, { "epoch": 40.249155722326456, "grad_norm": 0.5429134368896484, "learning_rate": 0.002828, "loss": 1.0875, "step": 536320 }, { "epoch": 40.253958724202626, "grad_norm": 0.6112959384918213, "learning_rate": 0.002828, "loss": 1.0886, "step": 536384 }, { "epoch": 40.2587617260788, "grad_norm": 0.5770933032035828, "learning_rate": 0.002828, "loss": 1.086, "step": 536448 }, { "epoch": 40.26356472795497, "grad_norm": 0.6509702801704407, "learning_rate": 0.002828, "loss": 1.0912, "step": 536512 }, { "epoch": 40.26836772983115, "grad_norm": 0.5557456016540527, "learning_rate": 0.002828, "loss": 1.0845, "step": 536576 }, { "epoch": 40.27317073170732, "grad_norm": 0.5438624024391174, "learning_rate": 0.002828, "loss": 1.0823, "step": 536640 }, { "epoch": 40.27797373358349, "grad_norm": 0.6417461037635803, "learning_rate": 0.002828, "loss": 1.0856, "step": 536704 }, { "epoch": 40.28277673545966, "grad_norm": 0.5874984860420227, "learning_rate": 0.002828, "loss": 1.0885, "step": 536768 }, { "epoch": 40.28757973733583, "grad_norm": 0.6954288482666016, "learning_rate": 0.002828, "loss": 1.0822, "step": 536832 }, { "epoch": 40.29238273921201, "grad_norm": 0.5061184167861938, "learning_rate": 0.002828, "loss": 1.078, "step": 536896 }, { "epoch": 40.29718574108818, "grad_norm": 0.7347505688667297, "learning_rate": 0.002828, "loss": 1.0901, "step": 536960 }, { "epoch": 40.301988742964355, "grad_norm": 0.6875437498092651, "learning_rate": 0.002828, "loss": 1.09, "step": 537024 }, { "epoch": 40.306791744840524, "grad_norm": 0.7001727819442749, "learning_rate": 0.002828, "loss": 1.087, "step": 537088 }, { "epoch": 40.3115947467167, "grad_norm": 0.7775195240974426, "learning_rate": 0.002828, "loss": 1.0962, "step": 537152 }, { "epoch": 40.31639774859287, "grad_norm": 0.6886974573135376, "learning_rate": 0.002828, "loss": 1.0867, "step": 537216 }, { "epoch": 40.321200750469046, "grad_norm": 0.6425182223320007, "learning_rate": 0.002828, "loss": 1.0902, "step": 537280 }, { "epoch": 40.326003752345216, "grad_norm": 0.6771668195724487, "learning_rate": 0.002828, "loss": 1.0861, "step": 537344 }, { "epoch": 40.330806754221385, "grad_norm": 0.6500647664070129, "learning_rate": 0.002828, "loss": 1.0899, "step": 537408 }, { "epoch": 40.33560975609756, "grad_norm": 0.6931434273719788, "learning_rate": 0.002828, "loss": 1.088, "step": 537472 }, { "epoch": 40.34041275797373, "grad_norm": 0.7521113753318787, "learning_rate": 0.002828, "loss": 1.0854, "step": 537536 }, { "epoch": 40.34521575984991, "grad_norm": 0.7151188850402832, "learning_rate": 0.002828, "loss": 1.0876, "step": 537600 }, { "epoch": 40.350018761726076, "grad_norm": 0.5576344132423401, "learning_rate": 0.002828, "loss": 1.0917, "step": 537664 }, { "epoch": 40.35482176360225, "grad_norm": 0.6216950416564941, "learning_rate": 0.002828, "loss": 1.0861, "step": 537728 }, { "epoch": 40.35962476547842, "grad_norm": 0.6059309840202332, "learning_rate": 0.002828, "loss": 1.0857, "step": 537792 }, { "epoch": 40.3644277673546, "grad_norm": 0.5524047613143921, "learning_rate": 0.002828, "loss": 1.0887, "step": 537856 }, { "epoch": 40.36923076923077, "grad_norm": 0.6283742189407349, "learning_rate": 0.002828, "loss": 1.0864, "step": 537920 }, { "epoch": 40.374033771106944, "grad_norm": 0.7623336911201477, "learning_rate": 0.002828, "loss": 1.087, "step": 537984 }, { "epoch": 40.378836772983114, "grad_norm": 0.5618724226951599, "learning_rate": 0.002828, "loss": 1.0896, "step": 538048 }, { "epoch": 40.38363977485929, "grad_norm": 0.5732715129852295, "learning_rate": 0.002828, "loss": 1.0899, "step": 538112 }, { "epoch": 40.38844277673546, "grad_norm": 0.607079267501831, "learning_rate": 0.002828, "loss": 1.0862, "step": 538176 }, { "epoch": 40.39324577861163, "grad_norm": 0.6852363348007202, "learning_rate": 0.002828, "loss": 1.0849, "step": 538240 }, { "epoch": 40.398048780487805, "grad_norm": 0.7007236480712891, "learning_rate": 0.002828, "loss": 1.0916, "step": 538304 }, { "epoch": 40.402851782363975, "grad_norm": 0.5718969106674194, "learning_rate": 0.002828, "loss": 1.0869, "step": 538368 }, { "epoch": 40.40765478424015, "grad_norm": 0.6075987219810486, "learning_rate": 0.002828, "loss": 1.0912, "step": 538432 }, { "epoch": 40.41245778611632, "grad_norm": 0.6271284222602844, "learning_rate": 0.002828, "loss": 1.0908, "step": 538496 }, { "epoch": 40.4172607879925, "grad_norm": 0.7049885988235474, "learning_rate": 0.002828, "loss": 1.0912, "step": 538560 }, { "epoch": 40.422063789868666, "grad_norm": 0.6199770569801331, "learning_rate": 0.002828, "loss": 1.0889, "step": 538624 }, { "epoch": 40.42686679174484, "grad_norm": 0.6718481183052063, "learning_rate": 0.002828, "loss": 1.0894, "step": 538688 }, { "epoch": 40.43166979362101, "grad_norm": 0.8460226655006409, "learning_rate": 0.002828, "loss": 1.0894, "step": 538752 }, { "epoch": 40.43647279549719, "grad_norm": 0.5630996227264404, "learning_rate": 0.002828, "loss": 1.097, "step": 538816 }, { "epoch": 40.44127579737336, "grad_norm": 0.580254077911377, "learning_rate": 0.002828, "loss": 1.0858, "step": 538880 }, { "epoch": 40.446078799249534, "grad_norm": 0.5695968270301819, "learning_rate": 0.002828, "loss": 1.0947, "step": 538944 }, { "epoch": 40.4508818011257, "grad_norm": 0.6187861561775208, "learning_rate": 0.002828, "loss": 1.0981, "step": 539008 }, { "epoch": 40.45568480300187, "grad_norm": 0.5952305793762207, "learning_rate": 0.002828, "loss": 1.0931, "step": 539072 }, { "epoch": 40.46048780487805, "grad_norm": 0.5588722229003906, "learning_rate": 0.002828, "loss": 1.0858, "step": 539136 }, { "epoch": 40.46529080675422, "grad_norm": 0.5876528024673462, "learning_rate": 0.002828, "loss": 1.0847, "step": 539200 }, { "epoch": 40.470093808630395, "grad_norm": 0.9118528962135315, "learning_rate": 0.002828, "loss": 1.0904, "step": 539264 }, { "epoch": 40.474896810506564, "grad_norm": 0.5637339949607849, "learning_rate": 0.002828, "loss": 1.0855, "step": 539328 }, { "epoch": 40.47969981238274, "grad_norm": 0.6483489274978638, "learning_rate": 0.002828, "loss": 1.0969, "step": 539392 }, { "epoch": 40.48450281425891, "grad_norm": 0.8003821969032288, "learning_rate": 0.002828, "loss": 1.0917, "step": 539456 }, { "epoch": 40.48930581613509, "grad_norm": 0.6287664175033569, "learning_rate": 0.002828, "loss": 1.0893, "step": 539520 }, { "epoch": 40.494108818011256, "grad_norm": 0.6048157215118408, "learning_rate": 0.002828, "loss": 1.0967, "step": 539584 }, { "epoch": 40.49891181988743, "grad_norm": 0.6489182710647583, "learning_rate": 0.002828, "loss": 1.0941, "step": 539648 }, { "epoch": 40.5037148217636, "grad_norm": 0.6671035885810852, "learning_rate": 0.002828, "loss": 1.0936, "step": 539712 }, { "epoch": 40.50851782363978, "grad_norm": 0.5299566984176636, "learning_rate": 0.002828, "loss": 1.0953, "step": 539776 }, { "epoch": 40.51332082551595, "grad_norm": 0.653409481048584, "learning_rate": 0.002828, "loss": 1.0909, "step": 539840 }, { "epoch": 40.51812382739212, "grad_norm": 0.6193459033966064, "learning_rate": 0.002828, "loss": 1.0925, "step": 539904 }, { "epoch": 40.52292682926829, "grad_norm": 0.5770994424819946, "learning_rate": 0.002828, "loss": 1.0909, "step": 539968 }, { "epoch": 40.52772983114446, "grad_norm": 0.5876451730728149, "learning_rate": 0.002828, "loss": 1.0905, "step": 540032 }, { "epoch": 40.53253283302064, "grad_norm": 0.6242850422859192, "learning_rate": 0.002828, "loss": 1.0878, "step": 540096 }, { "epoch": 40.53733583489681, "grad_norm": 0.6436592936515808, "learning_rate": 0.002828, "loss": 1.0934, "step": 540160 }, { "epoch": 40.542138836772985, "grad_norm": 0.6318769454956055, "learning_rate": 0.002828, "loss": 1.0946, "step": 540224 }, { "epoch": 40.546941838649154, "grad_norm": 0.6540458798408508, "learning_rate": 0.002828, "loss": 1.0924, "step": 540288 }, { "epoch": 40.55174484052533, "grad_norm": 0.6689574718475342, "learning_rate": 0.002828, "loss": 1.0954, "step": 540352 }, { "epoch": 40.5565478424015, "grad_norm": 0.5450055003166199, "learning_rate": 0.002828, "loss": 1.092, "step": 540416 }, { "epoch": 40.561350844277676, "grad_norm": 0.5304721593856812, "learning_rate": 0.002828, "loss": 1.0933, "step": 540480 }, { "epoch": 40.566153846153846, "grad_norm": 0.676292896270752, "learning_rate": 0.002828, "loss": 1.0944, "step": 540544 }, { "epoch": 40.57095684803002, "grad_norm": 0.877575159072876, "learning_rate": 0.002828, "loss": 1.0927, "step": 540608 }, { "epoch": 40.57575984990619, "grad_norm": 0.5048443675041199, "learning_rate": 0.002828, "loss": 1.0995, "step": 540672 }, { "epoch": 40.58056285178236, "grad_norm": 0.6128528118133545, "learning_rate": 0.002828, "loss": 1.0891, "step": 540736 }, { "epoch": 40.58536585365854, "grad_norm": 0.606031596660614, "learning_rate": 0.002828, "loss": 1.0974, "step": 540800 }, { "epoch": 40.590168855534706, "grad_norm": 0.8159749507904053, "learning_rate": 0.002828, "loss": 1.0903, "step": 540864 }, { "epoch": 40.59497185741088, "grad_norm": 0.5860930681228638, "learning_rate": 0.002828, "loss": 1.0918, "step": 540928 }, { "epoch": 40.59977485928705, "grad_norm": 0.5595285892486572, "learning_rate": 0.002828, "loss": 1.0936, "step": 540992 }, { "epoch": 40.60457786116323, "grad_norm": 0.7063301205635071, "learning_rate": 0.002828, "loss": 1.0919, "step": 541056 }, { "epoch": 40.6093808630394, "grad_norm": 0.5304481983184814, "learning_rate": 0.002828, "loss": 1.0892, "step": 541120 }, { "epoch": 40.614183864915574, "grad_norm": 0.7145516872406006, "learning_rate": 0.002828, "loss": 1.096, "step": 541184 }, { "epoch": 40.618986866791744, "grad_norm": 0.5920206904411316, "learning_rate": 0.002828, "loss": 1.0949, "step": 541248 }, { "epoch": 40.62378986866792, "grad_norm": 0.6007336378097534, "learning_rate": 0.002828, "loss": 1.0978, "step": 541312 }, { "epoch": 40.62859287054409, "grad_norm": 0.6843711137771606, "learning_rate": 0.002828, "loss": 1.1003, "step": 541376 }, { "epoch": 40.633395872420266, "grad_norm": 0.566557765007019, "learning_rate": 0.002828, "loss": 1.0945, "step": 541440 }, { "epoch": 40.638198874296435, "grad_norm": 0.5469267964363098, "learning_rate": 0.002828, "loss": 1.0921, "step": 541504 }, { "epoch": 40.643001876172605, "grad_norm": 0.6030517816543579, "learning_rate": 0.002828, "loss": 1.0944, "step": 541568 }, { "epoch": 40.64780487804878, "grad_norm": 0.740609347820282, "learning_rate": 0.002828, "loss": 1.0956, "step": 541632 }, { "epoch": 40.65260787992495, "grad_norm": 0.6031094789505005, "learning_rate": 0.002828, "loss": 1.1013, "step": 541696 }, { "epoch": 40.65741088180113, "grad_norm": 0.5991085767745972, "learning_rate": 0.002828, "loss": 1.095, "step": 541760 }, { "epoch": 40.662213883677296, "grad_norm": 0.6071657538414001, "learning_rate": 0.002828, "loss": 1.0972, "step": 541824 }, { "epoch": 40.66701688555347, "grad_norm": 0.6647424101829529, "learning_rate": 0.002828, "loss": 1.0956, "step": 541888 }, { "epoch": 40.67181988742964, "grad_norm": 0.6323827505111694, "learning_rate": 0.002828, "loss": 1.1023, "step": 541952 }, { "epoch": 40.67662288930582, "grad_norm": 0.6064316034317017, "learning_rate": 0.002828, "loss": 1.0975, "step": 542016 }, { "epoch": 40.68142589118199, "grad_norm": 0.515994131565094, "learning_rate": 0.002828, "loss": 1.1038, "step": 542080 }, { "epoch": 40.686228893058164, "grad_norm": 0.5427857637405396, "learning_rate": 0.002828, "loss": 1.0952, "step": 542144 }, { "epoch": 40.69103189493433, "grad_norm": 0.6527212858200073, "learning_rate": 0.002828, "loss": 1.0949, "step": 542208 }, { "epoch": 40.69583489681051, "grad_norm": 0.6460388898849487, "learning_rate": 0.002828, "loss": 1.0974, "step": 542272 }, { "epoch": 40.70063789868668, "grad_norm": 0.6885424852371216, "learning_rate": 0.002828, "loss": 1.0957, "step": 542336 }, { "epoch": 40.70544090056285, "grad_norm": 0.6290729641914368, "learning_rate": 0.002828, "loss": 1.0961, "step": 542400 }, { "epoch": 40.710243902439025, "grad_norm": 0.5251750349998474, "learning_rate": 0.002828, "loss": 1.0976, "step": 542464 }, { "epoch": 40.715046904315194, "grad_norm": 0.5548868775367737, "learning_rate": 0.002828, "loss": 1.0948, "step": 542528 }, { "epoch": 40.71984990619137, "grad_norm": 0.6257069110870361, "learning_rate": 0.002828, "loss": 1.099, "step": 542592 }, { "epoch": 40.72465290806754, "grad_norm": 0.5366240739822388, "learning_rate": 0.002828, "loss": 1.091, "step": 542656 }, { "epoch": 40.72945590994372, "grad_norm": 0.7957614660263062, "learning_rate": 0.002828, "loss": 1.1006, "step": 542720 }, { "epoch": 40.734258911819886, "grad_norm": 0.5318620204925537, "learning_rate": 0.002828, "loss": 1.1019, "step": 542784 }, { "epoch": 40.73906191369606, "grad_norm": 0.6072466969490051, "learning_rate": 0.002828, "loss": 1.1016, "step": 542848 }, { "epoch": 40.74386491557223, "grad_norm": 0.6516512632369995, "learning_rate": 0.002828, "loss": 1.1004, "step": 542912 }, { "epoch": 40.74866791744841, "grad_norm": 0.6152677536010742, "learning_rate": 0.002828, "loss": 1.0987, "step": 542976 }, { "epoch": 40.75347091932458, "grad_norm": 0.7000126242637634, "learning_rate": 0.002828, "loss": 1.0917, "step": 543040 }, { "epoch": 40.758273921200754, "grad_norm": 0.7065082788467407, "learning_rate": 0.002828, "loss": 1.0984, "step": 543104 }, { "epoch": 40.76307692307692, "grad_norm": 0.5523930191993713, "learning_rate": 0.002828, "loss": 1.0957, "step": 543168 }, { "epoch": 40.76787992495309, "grad_norm": 0.4815172255039215, "learning_rate": 0.002828, "loss": 1.0991, "step": 543232 }, { "epoch": 40.77268292682927, "grad_norm": 0.768750011920929, "learning_rate": 0.002828, "loss": 1.0962, "step": 543296 }, { "epoch": 40.77748592870544, "grad_norm": 0.5711806416511536, "learning_rate": 0.002828, "loss": 1.1019, "step": 543360 }, { "epoch": 40.782288930581615, "grad_norm": 0.7046821713447571, "learning_rate": 0.002828, "loss": 1.0976, "step": 543424 }, { "epoch": 40.787091932457784, "grad_norm": 0.7322063446044922, "learning_rate": 0.002828, "loss": 1.0948, "step": 543488 }, { "epoch": 40.79189493433396, "grad_norm": 0.5857354402542114, "learning_rate": 0.002828, "loss": 1.106, "step": 543552 }, { "epoch": 40.79669793621013, "grad_norm": 0.6323885917663574, "learning_rate": 0.002828, "loss": 1.0987, "step": 543616 }, { "epoch": 40.801500938086306, "grad_norm": 0.6436946392059326, "learning_rate": 0.002828, "loss": 1.104, "step": 543680 }, { "epoch": 40.806303939962476, "grad_norm": 0.7924228310585022, "learning_rate": 0.002828, "loss": 1.0973, "step": 543744 }, { "epoch": 40.81110694183865, "grad_norm": 0.5841827988624573, "learning_rate": 0.002828, "loss": 1.0949, "step": 543808 }, { "epoch": 40.81590994371482, "grad_norm": 0.5505257248878479, "learning_rate": 0.002828, "loss": 1.1047, "step": 543872 }, { "epoch": 40.820712945591, "grad_norm": 0.7143750190734863, "learning_rate": 0.002828, "loss": 1.0979, "step": 543936 }, { "epoch": 40.82551594746717, "grad_norm": 0.7817658185958862, "learning_rate": 0.002828, "loss": 1.1134, "step": 544000 }, { "epoch": 40.83031894934334, "grad_norm": 0.7498055100440979, "learning_rate": 0.002828, "loss": 1.1031, "step": 544064 }, { "epoch": 40.83512195121951, "grad_norm": 0.6902478933334351, "learning_rate": 0.002828, "loss": 1.0984, "step": 544128 }, { "epoch": 40.83992495309568, "grad_norm": 0.7068681120872498, "learning_rate": 0.002828, "loss": 1.1016, "step": 544192 }, { "epoch": 40.84472795497186, "grad_norm": 0.6595316529273987, "learning_rate": 0.002828, "loss": 1.1013, "step": 544256 }, { "epoch": 40.84953095684803, "grad_norm": 0.5899179577827454, "learning_rate": 0.002828, "loss": 1.0953, "step": 544320 }, { "epoch": 40.854333958724204, "grad_norm": 0.5795630216598511, "learning_rate": 0.002828, "loss": 1.0997, "step": 544384 }, { "epoch": 40.859136960600374, "grad_norm": 0.5591762661933899, "learning_rate": 0.002828, "loss": 1.0958, "step": 544448 }, { "epoch": 40.86393996247655, "grad_norm": 0.6017462611198425, "learning_rate": 0.002828, "loss": 1.1009, "step": 544512 }, { "epoch": 40.86874296435272, "grad_norm": 0.4877956509590149, "learning_rate": 0.002828, "loss": 1.0972, "step": 544576 }, { "epoch": 40.873545966228896, "grad_norm": 0.6430007219314575, "learning_rate": 0.002828, "loss": 1.098, "step": 544640 }, { "epoch": 40.878348968105065, "grad_norm": 0.6140983700752258, "learning_rate": 0.002828, "loss": 1.1026, "step": 544704 }, { "epoch": 40.88315196998124, "grad_norm": 0.5751563310623169, "learning_rate": 0.002828, "loss": 1.0949, "step": 544768 }, { "epoch": 40.88795497185741, "grad_norm": 1.0069924592971802, "learning_rate": 0.002828, "loss": 1.0938, "step": 544832 }, { "epoch": 40.89275797373358, "grad_norm": 0.5524895787239075, "learning_rate": 0.002828, "loss": 1.0963, "step": 544896 }, { "epoch": 40.89756097560976, "grad_norm": 0.550719678401947, "learning_rate": 0.002828, "loss": 1.1027, "step": 544960 }, { "epoch": 40.902363977485926, "grad_norm": 0.5717636346817017, "learning_rate": 0.002828, "loss": 1.1007, "step": 545024 }, { "epoch": 40.9071669793621, "grad_norm": 1.6790406703948975, "learning_rate": 0.002828, "loss": 1.0998, "step": 545088 }, { "epoch": 40.91196998123827, "grad_norm": 0.6707130670547485, "learning_rate": 0.002828, "loss": 1.0965, "step": 545152 }, { "epoch": 40.91677298311445, "grad_norm": 0.6442863941192627, "learning_rate": 0.002828, "loss": 1.1022, "step": 545216 }, { "epoch": 40.92157598499062, "grad_norm": 0.6229715347290039, "learning_rate": 0.002828, "loss": 1.1009, "step": 545280 }, { "epoch": 40.926378986866794, "grad_norm": 0.649745523929596, "learning_rate": 0.002828, "loss": 1.1047, "step": 545344 }, { "epoch": 40.93118198874296, "grad_norm": 0.5942361354827881, "learning_rate": 0.002828, "loss": 1.099, "step": 545408 }, { "epoch": 40.93598499061914, "grad_norm": 0.6187747120857239, "learning_rate": 0.002828, "loss": 1.1, "step": 545472 }, { "epoch": 40.94078799249531, "grad_norm": 0.7702499628067017, "learning_rate": 0.002828, "loss": 1.1031, "step": 545536 }, { "epoch": 40.945590994371486, "grad_norm": 0.6279484033584595, "learning_rate": 0.002828, "loss": 1.1026, "step": 545600 }, { "epoch": 40.950393996247655, "grad_norm": 0.5844237208366394, "learning_rate": 0.002828, "loss": 1.103, "step": 545664 }, { "epoch": 40.955196998123824, "grad_norm": 0.5983730554580688, "learning_rate": 0.002828, "loss": 1.0975, "step": 545728 }, { "epoch": 40.96, "grad_norm": 0.5255396366119385, "learning_rate": 0.002828, "loss": 1.1032, "step": 545792 }, { "epoch": 40.96480300187617, "grad_norm": 0.651672899723053, "learning_rate": 0.002828, "loss": 1.0965, "step": 545856 }, { "epoch": 40.96960600375235, "grad_norm": 0.6900015473365784, "learning_rate": 0.002828, "loss": 1.0953, "step": 545920 }, { "epoch": 40.974409005628516, "grad_norm": 0.6562929153442383, "learning_rate": 0.002828, "loss": 1.1003, "step": 545984 }, { "epoch": 40.97921200750469, "grad_norm": 0.5922935605049133, "learning_rate": 0.002828, "loss": 1.1015, "step": 546048 }, { "epoch": 40.98401500938086, "grad_norm": 0.6217307448387146, "learning_rate": 0.002828, "loss": 1.1096, "step": 546112 }, { "epoch": 40.98881801125704, "grad_norm": 0.6182146072387695, "learning_rate": 0.002828, "loss": 1.0996, "step": 546176 }, { "epoch": 40.99362101313321, "grad_norm": 0.5867896676063538, "learning_rate": 0.002828, "loss": 1.102, "step": 546240 }, { "epoch": 40.998424015009384, "grad_norm": 0.6607248187065125, "learning_rate": 0.002828, "loss": 1.1067, "step": 546304 }, { "epoch": 41.00322701688555, "grad_norm": 0.5025272965431213, "learning_rate": 0.002828, "loss": 1.0785, "step": 546368 }, { "epoch": 41.00803001876172, "grad_norm": 0.5673875212669373, "learning_rate": 0.002828, "loss": 1.0593, "step": 546432 }, { "epoch": 41.0128330206379, "grad_norm": 0.6235575079917908, "learning_rate": 0.002828, "loss": 1.0606, "step": 546496 }, { "epoch": 41.01763602251407, "grad_norm": 0.6856295466423035, "learning_rate": 0.002828, "loss": 1.0666, "step": 546560 }, { "epoch": 41.022439024390245, "grad_norm": 0.591202437877655, "learning_rate": 0.002828, "loss": 1.0644, "step": 546624 }, { "epoch": 41.027242026266414, "grad_norm": 0.6456746459007263, "learning_rate": 0.002828, "loss": 1.068, "step": 546688 }, { "epoch": 41.03204502814259, "grad_norm": 0.8349180221557617, "learning_rate": 0.002828, "loss": 1.0669, "step": 546752 }, { "epoch": 41.03684803001876, "grad_norm": 0.6748512387275696, "learning_rate": 0.002828, "loss": 1.0691, "step": 546816 }, { "epoch": 41.041651031894936, "grad_norm": 0.6579387187957764, "learning_rate": 0.002828, "loss": 1.0695, "step": 546880 }, { "epoch": 41.046454033771106, "grad_norm": 0.6043445467948914, "learning_rate": 0.002828, "loss": 1.0627, "step": 546944 }, { "epoch": 41.05125703564728, "grad_norm": 0.853763997554779, "learning_rate": 0.002828, "loss": 1.0677, "step": 547008 }, { "epoch": 41.05606003752345, "grad_norm": 0.703504741191864, "learning_rate": 0.002828, "loss": 1.067, "step": 547072 }, { "epoch": 41.06086303939963, "grad_norm": 0.5170365571975708, "learning_rate": 0.002828, "loss": 1.0711, "step": 547136 }, { "epoch": 41.0656660412758, "grad_norm": 0.7496975660324097, "learning_rate": 0.002828, "loss": 1.0742, "step": 547200 }, { "epoch": 41.07046904315197, "grad_norm": 0.5645685195922852, "learning_rate": 0.002828, "loss": 1.0664, "step": 547264 }, { "epoch": 41.07527204502814, "grad_norm": 0.6629078388214111, "learning_rate": 0.002828, "loss": 1.0731, "step": 547328 }, { "epoch": 41.08007504690431, "grad_norm": 0.6933609247207642, "learning_rate": 0.002828, "loss": 1.0642, "step": 547392 }, { "epoch": 41.08487804878049, "grad_norm": 0.5706798434257507, "learning_rate": 0.002828, "loss": 1.0701, "step": 547456 }, { "epoch": 41.08968105065666, "grad_norm": 0.7231749892234802, "learning_rate": 0.002828, "loss": 1.0729, "step": 547520 }, { "epoch": 41.094484052532835, "grad_norm": 0.6114044189453125, "learning_rate": 0.002828, "loss": 1.074, "step": 547584 }, { "epoch": 41.099287054409004, "grad_norm": 0.5615615248680115, "learning_rate": 0.002828, "loss": 1.08, "step": 547648 }, { "epoch": 41.10409005628518, "grad_norm": 0.5425843000411987, "learning_rate": 0.002828, "loss": 1.071, "step": 547712 }, { "epoch": 41.10889305816135, "grad_norm": 0.72618168592453, "learning_rate": 0.002828, "loss": 1.0716, "step": 547776 }, { "epoch": 41.113696060037526, "grad_norm": 0.7147377133369446, "learning_rate": 0.002828, "loss": 1.0764, "step": 547840 }, { "epoch": 41.118499061913695, "grad_norm": 0.6635823249816895, "learning_rate": 0.002828, "loss": 1.0703, "step": 547904 }, { "epoch": 41.12330206378987, "grad_norm": 0.724740743637085, "learning_rate": 0.002828, "loss": 1.0793, "step": 547968 }, { "epoch": 41.12810506566604, "grad_norm": 0.6134108901023865, "learning_rate": 0.002828, "loss": 1.0714, "step": 548032 }, { "epoch": 41.13290806754221, "grad_norm": 0.6556316018104553, "learning_rate": 0.002828, "loss": 1.0735, "step": 548096 }, { "epoch": 41.13771106941839, "grad_norm": 0.567468523979187, "learning_rate": 0.002828, "loss": 1.0748, "step": 548160 }, { "epoch": 41.142514071294556, "grad_norm": 0.4976537227630615, "learning_rate": 0.002828, "loss": 1.0669, "step": 548224 }, { "epoch": 41.14731707317073, "grad_norm": 0.5294303297996521, "learning_rate": 0.002828, "loss": 1.0752, "step": 548288 }, { "epoch": 41.1521200750469, "grad_norm": 0.7013516426086426, "learning_rate": 0.002828, "loss": 1.0757, "step": 548352 }, { "epoch": 41.15692307692308, "grad_norm": 0.803352415561676, "learning_rate": 0.002828, "loss": 1.0707, "step": 548416 }, { "epoch": 41.16172607879925, "grad_norm": 0.711155891418457, "learning_rate": 0.002828, "loss": 1.0781, "step": 548480 }, { "epoch": 41.166529080675424, "grad_norm": 0.6899096965789795, "learning_rate": 0.002828, "loss": 1.0741, "step": 548544 }, { "epoch": 41.171332082551594, "grad_norm": 0.713313639163971, "learning_rate": 0.002828, "loss": 1.074, "step": 548608 }, { "epoch": 41.17613508442777, "grad_norm": 0.6300371289253235, "learning_rate": 0.002828, "loss": 1.0737, "step": 548672 }, { "epoch": 41.18093808630394, "grad_norm": 0.5804073810577393, "learning_rate": 0.002828, "loss": 1.0755, "step": 548736 }, { "epoch": 41.185741088180116, "grad_norm": 0.5882118940353394, "learning_rate": 0.002828, "loss": 1.076, "step": 548800 }, { "epoch": 41.190544090056285, "grad_norm": 0.6168975830078125, "learning_rate": 0.002828, "loss": 1.0765, "step": 548864 }, { "epoch": 41.195347091932454, "grad_norm": 0.7686378955841064, "learning_rate": 0.002828, "loss": 1.08, "step": 548928 }, { "epoch": 41.20015009380863, "grad_norm": 0.6872912049293518, "learning_rate": 0.002828, "loss": 1.0768, "step": 548992 }, { "epoch": 41.2049530956848, "grad_norm": 0.6079062819480896, "learning_rate": 0.002828, "loss": 1.076, "step": 549056 }, { "epoch": 41.20975609756098, "grad_norm": 0.6001817584037781, "learning_rate": 0.002828, "loss": 1.0793, "step": 549120 }, { "epoch": 41.214559099437146, "grad_norm": 0.8148412108421326, "learning_rate": 0.002828, "loss": 1.0822, "step": 549184 }, { "epoch": 41.21936210131332, "grad_norm": 0.6967695951461792, "learning_rate": 0.002828, "loss": 1.078, "step": 549248 }, { "epoch": 41.22416510318949, "grad_norm": 0.5821467638015747, "learning_rate": 0.002828, "loss": 1.0762, "step": 549312 }, { "epoch": 41.22896810506567, "grad_norm": 0.592621386051178, "learning_rate": 0.002828, "loss": 1.0737, "step": 549376 }, { "epoch": 41.23377110694184, "grad_norm": 0.5450983643531799, "learning_rate": 0.002828, "loss": 1.078, "step": 549440 }, { "epoch": 41.238574108818014, "grad_norm": 0.6808087229728699, "learning_rate": 0.002828, "loss": 1.0754, "step": 549504 }, { "epoch": 41.24337711069418, "grad_norm": 0.8498914837837219, "learning_rate": 0.002828, "loss": 1.0768, "step": 549568 }, { "epoch": 41.24818011257036, "grad_norm": 0.6844555139541626, "learning_rate": 0.002828, "loss": 1.0779, "step": 549632 }, { "epoch": 41.25298311444653, "grad_norm": 0.5792409181594849, "learning_rate": 0.002828, "loss": 1.0793, "step": 549696 }, { "epoch": 41.2577861163227, "grad_norm": 0.7109765410423279, "learning_rate": 0.002828, "loss": 1.0782, "step": 549760 }, { "epoch": 41.262589118198875, "grad_norm": 0.5593913793563843, "learning_rate": 0.002828, "loss": 1.0805, "step": 549824 }, { "epoch": 41.267392120075044, "grad_norm": 0.6583130955696106, "learning_rate": 0.002828, "loss": 1.0802, "step": 549888 }, { "epoch": 41.27219512195122, "grad_norm": 0.6874505877494812, "learning_rate": 0.002828, "loss": 1.0821, "step": 549952 }, { "epoch": 41.27699812382739, "grad_norm": 0.6103010177612305, "learning_rate": 0.002828, "loss": 1.0768, "step": 550016 }, { "epoch": 41.281801125703566, "grad_norm": 0.507485568523407, "learning_rate": 0.002828, "loss": 1.0839, "step": 550080 }, { "epoch": 41.286604127579736, "grad_norm": 0.5909212231636047, "learning_rate": 0.002828, "loss": 1.0764, "step": 550144 }, { "epoch": 41.29140712945591, "grad_norm": 0.6002563834190369, "learning_rate": 0.002828, "loss": 1.0735, "step": 550208 }, { "epoch": 41.29621013133208, "grad_norm": 0.6409716010093689, "learning_rate": 0.002828, "loss": 1.077, "step": 550272 }, { "epoch": 41.30101313320826, "grad_norm": 0.534680187702179, "learning_rate": 0.002828, "loss": 1.0798, "step": 550336 }, { "epoch": 41.30581613508443, "grad_norm": 0.7306864261627197, "learning_rate": 0.002828, "loss": 1.0778, "step": 550400 }, { "epoch": 41.310619136960604, "grad_norm": 0.7213438153266907, "learning_rate": 0.002828, "loss": 1.0782, "step": 550464 }, { "epoch": 41.31542213883677, "grad_norm": 0.6151289343833923, "learning_rate": 0.002828, "loss": 1.0804, "step": 550528 }, { "epoch": 41.32022514071294, "grad_norm": 0.6030688881874084, "learning_rate": 0.002828, "loss": 1.0813, "step": 550592 }, { "epoch": 41.32502814258912, "grad_norm": 0.6703411340713501, "learning_rate": 0.002828, "loss": 1.0803, "step": 550656 }, { "epoch": 41.32983114446529, "grad_norm": 0.6406719088554382, "learning_rate": 0.002828, "loss": 1.0799, "step": 550720 }, { "epoch": 41.334634146341465, "grad_norm": 0.6375758647918701, "learning_rate": 0.002828, "loss": 1.0795, "step": 550784 }, { "epoch": 41.339437148217634, "grad_norm": 0.6271138191223145, "learning_rate": 0.002828, "loss": 1.0772, "step": 550848 }, { "epoch": 41.34424015009381, "grad_norm": 0.5871312618255615, "learning_rate": 0.002828, "loss": 1.0793, "step": 550912 }, { "epoch": 41.34904315196998, "grad_norm": 0.7420594692230225, "learning_rate": 0.002828, "loss": 1.0838, "step": 550976 }, { "epoch": 41.353846153846156, "grad_norm": 0.6966577172279358, "learning_rate": 0.002828, "loss": 1.0814, "step": 551040 }, { "epoch": 41.358649155722325, "grad_norm": 0.6194618940353394, "learning_rate": 0.002828, "loss": 1.0822, "step": 551104 }, { "epoch": 41.3634521575985, "grad_norm": 0.6598330140113831, "learning_rate": 0.002828, "loss": 1.0817, "step": 551168 }, { "epoch": 41.36825515947467, "grad_norm": 0.6324847340583801, "learning_rate": 0.002828, "loss": 1.0772, "step": 551232 }, { "epoch": 41.37305816135085, "grad_norm": 0.5739226937294006, "learning_rate": 0.002828, "loss": 1.0782, "step": 551296 }, { "epoch": 41.37786116322702, "grad_norm": 0.5579782724380493, "learning_rate": 0.002828, "loss": 1.0834, "step": 551360 }, { "epoch": 41.382664165103186, "grad_norm": 0.6816611886024475, "learning_rate": 0.002828, "loss": 1.0829, "step": 551424 }, { "epoch": 41.38746716697936, "grad_norm": 0.5585365891456604, "learning_rate": 0.002828, "loss": 1.0804, "step": 551488 }, { "epoch": 41.39227016885553, "grad_norm": 0.6159017086029053, "learning_rate": 0.002828, "loss": 1.0819, "step": 551552 }, { "epoch": 41.39707317073171, "grad_norm": 0.5967724323272705, "learning_rate": 0.002828, "loss": 1.0869, "step": 551616 }, { "epoch": 41.40187617260788, "grad_norm": 0.6096137166023254, "learning_rate": 0.002828, "loss": 1.0821, "step": 551680 }, { "epoch": 41.406679174484054, "grad_norm": 0.7553973197937012, "learning_rate": 0.002828, "loss": 1.0882, "step": 551744 }, { "epoch": 41.411482176360224, "grad_norm": 0.7201663255691528, "learning_rate": 0.002828, "loss": 1.0743, "step": 551808 }, { "epoch": 41.4162851782364, "grad_norm": 0.7154415845870972, "learning_rate": 0.002828, "loss": 1.0782, "step": 551872 }, { "epoch": 41.42108818011257, "grad_norm": 0.5563473105430603, "learning_rate": 0.002828, "loss": 1.082, "step": 551936 }, { "epoch": 41.425891181988746, "grad_norm": 0.7097707986831665, "learning_rate": 0.002828, "loss": 1.0813, "step": 552000 }, { "epoch": 41.430694183864915, "grad_norm": 0.6488261818885803, "learning_rate": 0.002828, "loss": 1.0801, "step": 552064 }, { "epoch": 41.43549718574109, "grad_norm": 0.5918857455253601, "learning_rate": 0.002828, "loss": 1.0844, "step": 552128 }, { "epoch": 41.44030018761726, "grad_norm": 0.6212255954742432, "learning_rate": 0.002828, "loss": 1.0822, "step": 552192 }, { "epoch": 41.44510318949343, "grad_norm": 0.696937620639801, "learning_rate": 0.002828, "loss": 1.0863, "step": 552256 }, { "epoch": 41.44990619136961, "grad_norm": 0.5609291195869446, "learning_rate": 0.002828, "loss": 1.0809, "step": 552320 }, { "epoch": 41.454709193245776, "grad_norm": 0.5481732487678528, "learning_rate": 0.002828, "loss": 1.0829, "step": 552384 }, { "epoch": 41.45951219512195, "grad_norm": 0.676578164100647, "learning_rate": 0.002828, "loss": 1.0877, "step": 552448 }, { "epoch": 41.46431519699812, "grad_norm": 0.5765472650527954, "learning_rate": 0.002828, "loss": 1.08, "step": 552512 }, { "epoch": 41.4691181988743, "grad_norm": 0.508776843547821, "learning_rate": 0.002828, "loss": 1.0824, "step": 552576 }, { "epoch": 41.47392120075047, "grad_norm": 0.6089415550231934, "learning_rate": 0.002828, "loss": 1.0807, "step": 552640 }, { "epoch": 41.478724202626644, "grad_norm": 0.7184882164001465, "learning_rate": 0.002828, "loss": 1.0858, "step": 552704 }, { "epoch": 41.48352720450281, "grad_norm": 0.5389273762702942, "learning_rate": 0.002828, "loss": 1.0862, "step": 552768 }, { "epoch": 41.48833020637899, "grad_norm": 0.5319380164146423, "learning_rate": 0.002828, "loss": 1.0869, "step": 552832 }, { "epoch": 41.49313320825516, "grad_norm": 0.6539042592048645, "learning_rate": 0.002828, "loss": 1.0829, "step": 552896 }, { "epoch": 41.497936210131336, "grad_norm": 0.7643285989761353, "learning_rate": 0.002828, "loss": 1.0806, "step": 552960 }, { "epoch": 41.502739212007505, "grad_norm": 0.6605548858642578, "learning_rate": 0.002828, "loss": 1.0857, "step": 553024 }, { "epoch": 41.507542213883674, "grad_norm": 0.5355969071388245, "learning_rate": 0.002828, "loss": 1.0899, "step": 553088 }, { "epoch": 41.51234521575985, "grad_norm": 0.6309624910354614, "learning_rate": 0.002828, "loss": 1.0881, "step": 553152 }, { "epoch": 41.51714821763602, "grad_norm": 0.7098450064659119, "learning_rate": 0.002828, "loss": 1.0886, "step": 553216 }, { "epoch": 41.5219512195122, "grad_norm": 0.6140933632850647, "learning_rate": 0.002828, "loss": 1.0859, "step": 553280 }, { "epoch": 41.526754221388366, "grad_norm": 0.6573895812034607, "learning_rate": 0.002828, "loss": 1.0836, "step": 553344 }, { "epoch": 41.53155722326454, "grad_norm": 0.642605721950531, "learning_rate": 0.002828, "loss": 1.0935, "step": 553408 }, { "epoch": 41.53636022514071, "grad_norm": 0.5573644638061523, "learning_rate": 0.002828, "loss": 1.0861, "step": 553472 }, { "epoch": 41.54116322701689, "grad_norm": 0.48514696955680847, "learning_rate": 0.002828, "loss": 1.0862, "step": 553536 }, { "epoch": 41.54596622889306, "grad_norm": 0.6432372331619263, "learning_rate": 0.002828, "loss": 1.0882, "step": 553600 }, { "epoch": 41.550769230769234, "grad_norm": 0.564486026763916, "learning_rate": 0.002828, "loss": 1.093, "step": 553664 }, { "epoch": 41.5555722326454, "grad_norm": 0.6009403467178345, "learning_rate": 0.002828, "loss": 1.0884, "step": 553728 }, { "epoch": 41.56037523452157, "grad_norm": 0.6792386770248413, "learning_rate": 0.002828, "loss": 1.0835, "step": 553792 }, { "epoch": 41.56517823639775, "grad_norm": 0.49415624141693115, "learning_rate": 0.002828, "loss": 1.0933, "step": 553856 }, { "epoch": 41.56998123827392, "grad_norm": 0.7057024836540222, "learning_rate": 0.002828, "loss": 1.0842, "step": 553920 }, { "epoch": 41.574784240150095, "grad_norm": 0.576983630657196, "learning_rate": 0.002828, "loss": 1.0857, "step": 553984 }, { "epoch": 41.579587242026264, "grad_norm": 0.6637498736381531, "learning_rate": 0.002828, "loss": 1.0828, "step": 554048 }, { "epoch": 41.58439024390244, "grad_norm": 0.7337366938591003, "learning_rate": 0.002828, "loss": 1.0884, "step": 554112 }, { "epoch": 41.58919324577861, "grad_norm": 0.6840522289276123, "learning_rate": 0.002828, "loss": 1.0846, "step": 554176 }, { "epoch": 41.593996247654786, "grad_norm": 0.5753034949302673, "learning_rate": 0.002828, "loss": 1.081, "step": 554240 }, { "epoch": 41.598799249530956, "grad_norm": 0.6625789999961853, "learning_rate": 0.002828, "loss": 1.0843, "step": 554304 }, { "epoch": 41.60360225140713, "grad_norm": 0.6237214207649231, "learning_rate": 0.002828, "loss": 1.088, "step": 554368 }, { "epoch": 41.6084052532833, "grad_norm": 0.6372871994972229, "learning_rate": 0.002828, "loss": 1.0848, "step": 554432 }, { "epoch": 41.61320825515948, "grad_norm": 0.6904455423355103, "learning_rate": 0.002828, "loss": 1.0899, "step": 554496 }, { "epoch": 41.61801125703565, "grad_norm": 0.7011419534683228, "learning_rate": 0.002828, "loss": 1.0889, "step": 554560 }, { "epoch": 41.622814258911816, "grad_norm": 0.620915412902832, "learning_rate": 0.002828, "loss": 1.0887, "step": 554624 }, { "epoch": 41.62761726078799, "grad_norm": 0.5954155921936035, "learning_rate": 0.002828, "loss": 1.0886, "step": 554688 }, { "epoch": 41.63242026266416, "grad_norm": 0.615436851978302, "learning_rate": 0.002828, "loss": 1.0932, "step": 554752 }, { "epoch": 41.63722326454034, "grad_norm": 0.6013243198394775, "learning_rate": 0.002828, "loss": 1.0903, "step": 554816 }, { "epoch": 41.64202626641651, "grad_norm": 0.5366150736808777, "learning_rate": 0.002828, "loss": 1.0909, "step": 554880 }, { "epoch": 41.646829268292684, "grad_norm": 0.5783130526542664, "learning_rate": 0.002828, "loss": 1.0903, "step": 554944 }, { "epoch": 41.651632270168854, "grad_norm": 0.7552841901779175, "learning_rate": 0.002828, "loss": 1.0856, "step": 555008 }, { "epoch": 41.65643527204503, "grad_norm": 0.6122512817382812, "learning_rate": 0.002828, "loss": 1.0872, "step": 555072 }, { "epoch": 41.6612382739212, "grad_norm": 0.5579005479812622, "learning_rate": 0.002828, "loss": 1.0885, "step": 555136 }, { "epoch": 41.666041275797376, "grad_norm": 0.6808589696884155, "learning_rate": 0.002828, "loss": 1.0896, "step": 555200 }, { "epoch": 41.670844277673545, "grad_norm": 0.6607319712638855, "learning_rate": 0.002828, "loss": 1.092, "step": 555264 }, { "epoch": 41.67564727954972, "grad_norm": 0.6016877889633179, "learning_rate": 0.002828, "loss": 1.0902, "step": 555328 }, { "epoch": 41.68045028142589, "grad_norm": 0.546603262424469, "learning_rate": 0.002828, "loss": 1.092, "step": 555392 }, { "epoch": 41.68525328330206, "grad_norm": 0.7585775852203369, "learning_rate": 0.002828, "loss": 1.0935, "step": 555456 }, { "epoch": 41.69005628517824, "grad_norm": 0.5713100433349609, "learning_rate": 0.002828, "loss": 1.0885, "step": 555520 }, { "epoch": 41.694859287054406, "grad_norm": 0.5373910069465637, "learning_rate": 0.002828, "loss": 1.0914, "step": 555584 }, { "epoch": 41.69966228893058, "grad_norm": 0.5792741775512695, "learning_rate": 0.002828, "loss": 1.0889, "step": 555648 }, { "epoch": 41.70446529080675, "grad_norm": 0.5292046070098877, "learning_rate": 0.002828, "loss": 1.0892, "step": 555712 }, { "epoch": 41.70926829268293, "grad_norm": 0.5693057179450989, "learning_rate": 0.002828, "loss": 1.085, "step": 555776 }, { "epoch": 41.7140712945591, "grad_norm": 0.7015483379364014, "learning_rate": 0.002828, "loss": 1.0875, "step": 555840 }, { "epoch": 41.718874296435274, "grad_norm": 0.508471667766571, "learning_rate": 0.002828, "loss": 1.0934, "step": 555904 }, { "epoch": 41.72367729831144, "grad_norm": 0.7722852826118469, "learning_rate": 0.002828, "loss": 1.0912, "step": 555968 }, { "epoch": 41.72848030018762, "grad_norm": 0.8116500973701477, "learning_rate": 0.002828, "loss": 1.0876, "step": 556032 }, { "epoch": 41.73328330206379, "grad_norm": 0.5903668999671936, "learning_rate": 0.002828, "loss": 1.0884, "step": 556096 }, { "epoch": 41.738086303939966, "grad_norm": 0.6233097910881042, "learning_rate": 0.002828, "loss": 1.0913, "step": 556160 }, { "epoch": 41.742889305816135, "grad_norm": 0.5414838194847107, "learning_rate": 0.002828, "loss": 1.0879, "step": 556224 }, { "epoch": 41.747692307692304, "grad_norm": 0.704927384853363, "learning_rate": 0.002828, "loss": 1.0885, "step": 556288 }, { "epoch": 41.75249530956848, "grad_norm": 0.6467387676239014, "learning_rate": 0.002828, "loss": 1.0921, "step": 556352 }, { "epoch": 41.75729831144465, "grad_norm": 0.5254771113395691, "learning_rate": 0.002828, "loss": 1.0931, "step": 556416 }, { "epoch": 41.76210131332083, "grad_norm": 0.5494102835655212, "learning_rate": 0.002828, "loss": 1.0928, "step": 556480 }, { "epoch": 41.766904315196996, "grad_norm": 0.7095513343811035, "learning_rate": 0.002828, "loss": 1.0889, "step": 556544 }, { "epoch": 41.77170731707317, "grad_norm": 0.5700022578239441, "learning_rate": 0.002828, "loss": 1.0934, "step": 556608 }, { "epoch": 41.77651031894934, "grad_norm": 0.7591579556465149, "learning_rate": 0.002828, "loss": 1.0907, "step": 556672 }, { "epoch": 41.78131332082552, "grad_norm": 0.5941113829612732, "learning_rate": 0.002828, "loss": 1.0973, "step": 556736 }, { "epoch": 41.78611632270169, "grad_norm": 0.5992494821548462, "learning_rate": 0.002828, "loss": 1.0896, "step": 556800 }, { "epoch": 41.790919324577864, "grad_norm": 0.585478663444519, "learning_rate": 0.002828, "loss": 1.0916, "step": 556864 }, { "epoch": 41.79572232645403, "grad_norm": 0.5099025964736938, "learning_rate": 0.002828, "loss": 1.0938, "step": 556928 }, { "epoch": 41.80052532833021, "grad_norm": 0.5065568685531616, "learning_rate": 0.002828, "loss": 1.0908, "step": 556992 }, { "epoch": 41.80532833020638, "grad_norm": 0.6000770926475525, "learning_rate": 0.002828, "loss": 1.0907, "step": 557056 }, { "epoch": 41.81013133208255, "grad_norm": 0.5981700420379639, "learning_rate": 0.002828, "loss": 1.0897, "step": 557120 }, { "epoch": 41.814934333958725, "grad_norm": 0.7779768705368042, "learning_rate": 0.002828, "loss": 1.0911, "step": 557184 }, { "epoch": 41.819737335834894, "grad_norm": 0.5827203989028931, "learning_rate": 0.002828, "loss": 1.0905, "step": 557248 }, { "epoch": 41.82454033771107, "grad_norm": 0.5996590256690979, "learning_rate": 0.002828, "loss": 1.0911, "step": 557312 }, { "epoch": 41.82934333958724, "grad_norm": 0.6872954964637756, "learning_rate": 0.002828, "loss": 1.0918, "step": 557376 }, { "epoch": 41.834146341463416, "grad_norm": 0.6303712725639343, "learning_rate": 0.002828, "loss": 1.0872, "step": 557440 }, { "epoch": 41.838949343339586, "grad_norm": 0.5935240387916565, "learning_rate": 0.002828, "loss": 1.0914, "step": 557504 }, { "epoch": 41.84375234521576, "grad_norm": 0.6666385531425476, "learning_rate": 0.002828, "loss": 1.0858, "step": 557568 }, { "epoch": 41.84855534709193, "grad_norm": 0.6222832202911377, "learning_rate": 0.002828, "loss": 1.0932, "step": 557632 }, { "epoch": 41.85335834896811, "grad_norm": 0.5753621459007263, "learning_rate": 0.002828, "loss": 1.0935, "step": 557696 }, { "epoch": 41.85816135084428, "grad_norm": 0.6276858448982239, "learning_rate": 0.002828, "loss": 1.0982, "step": 557760 }, { "epoch": 41.862964352720454, "grad_norm": 0.7073177695274353, "learning_rate": 0.002828, "loss": 1.0942, "step": 557824 }, { "epoch": 41.86776735459662, "grad_norm": 0.5554404854774475, "learning_rate": 0.002828, "loss": 1.0938, "step": 557888 }, { "epoch": 41.87257035647279, "grad_norm": 0.5417864918708801, "learning_rate": 0.002828, "loss": 1.0922, "step": 557952 }, { "epoch": 41.87737335834897, "grad_norm": 0.5264939665794373, "learning_rate": 0.002828, "loss": 1.0935, "step": 558016 }, { "epoch": 41.88217636022514, "grad_norm": 0.5799403190612793, "learning_rate": 0.002828, "loss": 1.0956, "step": 558080 }, { "epoch": 41.886979362101314, "grad_norm": 0.6113492250442505, "learning_rate": 0.002828, "loss": 1.0918, "step": 558144 }, { "epoch": 41.891782363977484, "grad_norm": 0.619025707244873, "learning_rate": 0.002828, "loss": 1.0907, "step": 558208 }, { "epoch": 41.89658536585366, "grad_norm": 0.6421979665756226, "learning_rate": 0.002828, "loss": 1.0933, "step": 558272 }, { "epoch": 41.90138836772983, "grad_norm": 0.5564460158348083, "learning_rate": 0.002828, "loss": 1.088, "step": 558336 }, { "epoch": 41.906191369606006, "grad_norm": 0.7520004510879517, "learning_rate": 0.002828, "loss": 1.0876, "step": 558400 }, { "epoch": 41.910994371482175, "grad_norm": 0.5554314851760864, "learning_rate": 0.002828, "loss": 1.0947, "step": 558464 }, { "epoch": 41.91579737335835, "grad_norm": 0.562497079372406, "learning_rate": 0.002828, "loss": 1.0915, "step": 558528 }, { "epoch": 41.92060037523452, "grad_norm": 0.5724356174468994, "learning_rate": 0.002828, "loss": 1.0886, "step": 558592 }, { "epoch": 41.9254033771107, "grad_norm": 0.5435642004013062, "learning_rate": 0.002828, "loss": 1.1026, "step": 558656 }, { "epoch": 41.93020637898687, "grad_norm": 0.7154808640480042, "learning_rate": 0.002828, "loss": 1.0907, "step": 558720 }, { "epoch": 41.935009380863036, "grad_norm": 0.6489702463150024, "learning_rate": 0.002828, "loss": 1.0937, "step": 558784 }, { "epoch": 41.93981238273921, "grad_norm": 0.5514934659004211, "learning_rate": 0.002828, "loss": 1.0975, "step": 558848 }, { "epoch": 41.94461538461538, "grad_norm": 0.838455855846405, "learning_rate": 0.002828, "loss": 1.096, "step": 558912 }, { "epoch": 41.94941838649156, "grad_norm": 0.5009222030639648, "learning_rate": 0.002828, "loss": 1.0892, "step": 558976 }, { "epoch": 41.95422138836773, "grad_norm": 0.5667208433151245, "learning_rate": 0.002828, "loss": 1.0963, "step": 559040 }, { "epoch": 41.959024390243904, "grad_norm": 0.6530715823173523, "learning_rate": 0.002828, "loss": 1.0912, "step": 559104 }, { "epoch": 41.96382739212007, "grad_norm": 0.621535062789917, "learning_rate": 0.002828, "loss": 1.0917, "step": 559168 }, { "epoch": 41.96863039399625, "grad_norm": 0.5420042872428894, "learning_rate": 0.002828, "loss": 1.0946, "step": 559232 }, { "epoch": 41.97343339587242, "grad_norm": 0.5837153792381287, "learning_rate": 0.002828, "loss": 1.0953, "step": 559296 }, { "epoch": 41.978236397748596, "grad_norm": 0.6675498485565186, "learning_rate": 0.002828, "loss": 1.0909, "step": 559360 }, { "epoch": 41.983039399624765, "grad_norm": 0.6540782451629639, "learning_rate": 0.002828, "loss": 1.0977, "step": 559424 }, { "epoch": 41.98784240150094, "grad_norm": 0.5713112354278564, "learning_rate": 0.002828, "loss": 1.0951, "step": 559488 }, { "epoch": 41.99264540337711, "grad_norm": 0.5297588109970093, "learning_rate": 0.002828, "loss": 1.0938, "step": 559552 }, { "epoch": 41.99744840525328, "grad_norm": 0.6510352492332458, "learning_rate": 0.002828, "loss": 1.0962, "step": 559616 }, { "epoch": 42.00225140712946, "grad_norm": 0.5886886715888977, "learning_rate": 0.002828, "loss": 1.0745, "step": 559680 }, { "epoch": 42.007054409005626, "grad_norm": 0.6688248515129089, "learning_rate": 0.002828, "loss": 1.0537, "step": 559744 }, { "epoch": 42.0118574108818, "grad_norm": 0.6012933850288391, "learning_rate": 0.002828, "loss": 1.0622, "step": 559808 }, { "epoch": 42.01666041275797, "grad_norm": 0.6174977421760559, "learning_rate": 0.002828, "loss": 1.0622, "step": 559872 }, { "epoch": 42.02146341463415, "grad_norm": 0.5445956587791443, "learning_rate": 0.002828, "loss": 1.057, "step": 559936 }, { "epoch": 42.02626641651032, "grad_norm": 0.6119956374168396, "learning_rate": 0.002828, "loss": 1.0605, "step": 560000 }, { "epoch": 42.031069418386494, "grad_norm": 0.6327849626541138, "learning_rate": 0.002828, "loss": 1.0595, "step": 560064 }, { "epoch": 42.03587242026266, "grad_norm": 0.6399878859519958, "learning_rate": 0.002828, "loss": 1.0573, "step": 560128 }, { "epoch": 42.04067542213884, "grad_norm": 0.6213284730911255, "learning_rate": 0.002828, "loss": 1.062, "step": 560192 }, { "epoch": 42.04547842401501, "grad_norm": 0.5296103358268738, "learning_rate": 0.002828, "loss": 1.0645, "step": 560256 }, { "epoch": 42.050281425891185, "grad_norm": 0.698832094669342, "learning_rate": 0.002828, "loss": 1.0657, "step": 560320 }, { "epoch": 42.055084427767355, "grad_norm": 0.6121209859848022, "learning_rate": 0.002828, "loss": 1.0576, "step": 560384 }, { "epoch": 42.059887429643524, "grad_norm": 0.6628225445747375, "learning_rate": 0.002828, "loss": 1.0587, "step": 560448 }, { "epoch": 42.0646904315197, "grad_norm": 0.522656261920929, "learning_rate": 0.002828, "loss": 1.0611, "step": 560512 }, { "epoch": 42.06949343339587, "grad_norm": 0.5090030431747437, "learning_rate": 0.002828, "loss": 1.0588, "step": 560576 }, { "epoch": 42.074296435272046, "grad_norm": 0.7143341898918152, "learning_rate": 0.002828, "loss": 1.0633, "step": 560640 }, { "epoch": 42.079099437148216, "grad_norm": 0.7170779705047607, "learning_rate": 0.002828, "loss": 1.0654, "step": 560704 }, { "epoch": 42.08390243902439, "grad_norm": 0.5964574217796326, "learning_rate": 0.002828, "loss": 1.0649, "step": 560768 }, { "epoch": 42.08870544090056, "grad_norm": 0.6003646850585938, "learning_rate": 0.002828, "loss": 1.0667, "step": 560832 }, { "epoch": 42.09350844277674, "grad_norm": 0.6014326214790344, "learning_rate": 0.002828, "loss": 1.0605, "step": 560896 }, { "epoch": 42.09831144465291, "grad_norm": 0.6906946301460266, "learning_rate": 0.002828, "loss": 1.0681, "step": 560960 }, { "epoch": 42.103114446529084, "grad_norm": 0.5337896347045898, "learning_rate": 0.002828, "loss": 1.0667, "step": 561024 }, { "epoch": 42.10791744840525, "grad_norm": 0.5389870405197144, "learning_rate": 0.002828, "loss": 1.0629, "step": 561088 }, { "epoch": 42.11272045028143, "grad_norm": 0.6632516980171204, "learning_rate": 0.002828, "loss": 1.0687, "step": 561152 }, { "epoch": 42.1175234521576, "grad_norm": 0.6831262707710266, "learning_rate": 0.002828, "loss": 1.0695, "step": 561216 }, { "epoch": 42.12232645403377, "grad_norm": 0.657807469367981, "learning_rate": 0.002828, "loss": 1.0654, "step": 561280 }, { "epoch": 42.127129455909945, "grad_norm": 0.5806097388267517, "learning_rate": 0.002828, "loss": 1.064, "step": 561344 }, { "epoch": 42.131932457786114, "grad_norm": 0.6003397107124329, "learning_rate": 0.002828, "loss": 1.0703, "step": 561408 }, { "epoch": 42.13673545966229, "grad_norm": 0.6153790950775146, "learning_rate": 0.002828, "loss": 1.0617, "step": 561472 }, { "epoch": 42.14153846153846, "grad_norm": 0.6227737069129944, "learning_rate": 0.002828, "loss": 1.0715, "step": 561536 }, { "epoch": 42.146341463414636, "grad_norm": 0.6207556128501892, "learning_rate": 0.002828, "loss": 1.0633, "step": 561600 }, { "epoch": 42.151144465290805, "grad_norm": 0.6176641583442688, "learning_rate": 0.002828, "loss": 1.0667, "step": 561664 }, { "epoch": 42.15594746716698, "grad_norm": 0.7300615310668945, "learning_rate": 0.002828, "loss": 1.0666, "step": 561728 }, { "epoch": 42.16075046904315, "grad_norm": 0.6501210331916809, "learning_rate": 0.002828, "loss": 1.0702, "step": 561792 }, { "epoch": 42.16555347091933, "grad_norm": 0.6231697201728821, "learning_rate": 0.002828, "loss": 1.0685, "step": 561856 }, { "epoch": 42.1703564727955, "grad_norm": 0.5661913156509399, "learning_rate": 0.002828, "loss": 1.069, "step": 561920 }, { "epoch": 42.175159474671666, "grad_norm": 0.6047375798225403, "learning_rate": 0.002828, "loss": 1.0628, "step": 561984 }, { "epoch": 42.17996247654784, "grad_norm": 0.6263827085494995, "learning_rate": 0.002828, "loss": 1.0689, "step": 562048 }, { "epoch": 42.18476547842401, "grad_norm": 0.5567244291305542, "learning_rate": 0.002828, "loss": 1.0702, "step": 562112 }, { "epoch": 42.18956848030019, "grad_norm": 0.5582324266433716, "learning_rate": 0.002828, "loss": 1.0701, "step": 562176 }, { "epoch": 42.19437148217636, "grad_norm": 0.6602678894996643, "learning_rate": 0.002828, "loss": 1.067, "step": 562240 }, { "epoch": 42.199174484052534, "grad_norm": 0.6724734902381897, "learning_rate": 0.002828, "loss": 1.0668, "step": 562304 }, { "epoch": 42.203977485928704, "grad_norm": 0.6203201413154602, "learning_rate": 0.002828, "loss": 1.0708, "step": 562368 }, { "epoch": 42.20878048780488, "grad_norm": 0.5524185299873352, "learning_rate": 0.002828, "loss": 1.066, "step": 562432 }, { "epoch": 42.21358348968105, "grad_norm": 0.7851992845535278, "learning_rate": 0.002828, "loss": 1.0666, "step": 562496 }, { "epoch": 42.218386491557226, "grad_norm": 0.5382575988769531, "learning_rate": 0.002828, "loss": 1.0748, "step": 562560 }, { "epoch": 42.223189493433395, "grad_norm": 0.631264865398407, "learning_rate": 0.002828, "loss": 1.069, "step": 562624 }, { "epoch": 42.22799249530957, "grad_norm": 0.6270348429679871, "learning_rate": 0.002828, "loss": 1.0713, "step": 562688 }, { "epoch": 42.23279549718574, "grad_norm": 0.7309645414352417, "learning_rate": 0.002828, "loss": 1.0723, "step": 562752 }, { "epoch": 42.23759849906191, "grad_norm": 0.6442151069641113, "learning_rate": 0.002828, "loss": 1.0686, "step": 562816 }, { "epoch": 42.24240150093809, "grad_norm": 0.6158406734466553, "learning_rate": 0.002828, "loss": 1.0712, "step": 562880 }, { "epoch": 42.247204502814256, "grad_norm": 0.5718555450439453, "learning_rate": 0.002828, "loss": 1.0751, "step": 562944 }, { "epoch": 42.25200750469043, "grad_norm": 0.6009352207183838, "learning_rate": 0.002828, "loss": 1.0723, "step": 563008 }, { "epoch": 42.2568105065666, "grad_norm": 0.648350715637207, "learning_rate": 0.002828, "loss": 1.0691, "step": 563072 }, { "epoch": 42.26161350844278, "grad_norm": 0.6446778178215027, "learning_rate": 0.002828, "loss": 1.0757, "step": 563136 }, { "epoch": 42.26641651031895, "grad_norm": 0.6527370810508728, "learning_rate": 0.002828, "loss": 1.0731, "step": 563200 }, { "epoch": 42.271219512195124, "grad_norm": 0.7441429495811462, "learning_rate": 0.002828, "loss": 1.0691, "step": 563264 }, { "epoch": 42.27602251407129, "grad_norm": 0.6982424855232239, "learning_rate": 0.002828, "loss": 1.0702, "step": 563328 }, { "epoch": 42.28082551594747, "grad_norm": 0.5313721299171448, "learning_rate": 0.002828, "loss": 1.0748, "step": 563392 }, { "epoch": 42.28562851782364, "grad_norm": 0.7577272057533264, "learning_rate": 0.002828, "loss": 1.0728, "step": 563456 }, { "epoch": 42.290431519699816, "grad_norm": 0.6524193286895752, "learning_rate": 0.002828, "loss": 1.0744, "step": 563520 }, { "epoch": 42.295234521575985, "grad_norm": 0.681130051612854, "learning_rate": 0.002828, "loss": 1.0771, "step": 563584 }, { "epoch": 42.300037523452154, "grad_norm": 0.5935671329498291, "learning_rate": 0.002828, "loss": 1.0698, "step": 563648 }, { "epoch": 42.30484052532833, "grad_norm": 0.7082101106643677, "learning_rate": 0.002828, "loss": 1.0705, "step": 563712 }, { "epoch": 42.3096435272045, "grad_norm": 0.7285563945770264, "learning_rate": 0.002828, "loss": 1.0719, "step": 563776 }, { "epoch": 42.314446529080676, "grad_norm": 0.6174675226211548, "learning_rate": 0.002828, "loss": 1.0679, "step": 563840 }, { "epoch": 42.319249530956846, "grad_norm": 0.7859613299369812, "learning_rate": 0.002828, "loss": 1.0709, "step": 563904 }, { "epoch": 42.32405253283302, "grad_norm": 0.6008791923522949, "learning_rate": 0.002828, "loss": 1.0693, "step": 563968 }, { "epoch": 42.32885553470919, "grad_norm": 0.7026347517967224, "learning_rate": 0.002828, "loss": 1.0731, "step": 564032 }, { "epoch": 42.33365853658537, "grad_norm": 0.569503128528595, "learning_rate": 0.002828, "loss": 1.0694, "step": 564096 }, { "epoch": 42.33846153846154, "grad_norm": 0.5854909420013428, "learning_rate": 0.002828, "loss": 1.0727, "step": 564160 }, { "epoch": 42.343264540337714, "grad_norm": 0.5679054260253906, "learning_rate": 0.002828, "loss": 1.0707, "step": 564224 }, { "epoch": 42.34806754221388, "grad_norm": 0.6394909620285034, "learning_rate": 0.002828, "loss": 1.0756, "step": 564288 }, { "epoch": 42.35287054409006, "grad_norm": 0.6406596899032593, "learning_rate": 0.002828, "loss": 1.0762, "step": 564352 }, { "epoch": 42.35767354596623, "grad_norm": 0.6130920648574829, "learning_rate": 0.002828, "loss": 1.0683, "step": 564416 }, { "epoch": 42.3624765478424, "grad_norm": 0.5922021865844727, "learning_rate": 0.002828, "loss": 1.0765, "step": 564480 }, { "epoch": 42.367279549718575, "grad_norm": 0.7147934436798096, "learning_rate": 0.002828, "loss": 1.0741, "step": 564544 }, { "epoch": 42.372082551594744, "grad_norm": 0.6006954908370972, "learning_rate": 0.002828, "loss": 1.0758, "step": 564608 }, { "epoch": 42.37688555347092, "grad_norm": 0.7138851881027222, "learning_rate": 0.002828, "loss": 1.0811, "step": 564672 }, { "epoch": 42.38168855534709, "grad_norm": 0.6090694069862366, "learning_rate": 0.002828, "loss": 1.0751, "step": 564736 }, { "epoch": 42.386491557223266, "grad_norm": 0.5797130465507507, "learning_rate": 0.002828, "loss": 1.0714, "step": 564800 }, { "epoch": 42.391294559099435, "grad_norm": 0.718805193901062, "learning_rate": 0.002828, "loss": 1.0741, "step": 564864 }, { "epoch": 42.39609756097561, "grad_norm": 0.6358516216278076, "learning_rate": 0.002828, "loss": 1.0725, "step": 564928 }, { "epoch": 42.40090056285178, "grad_norm": 0.5884746313095093, "learning_rate": 0.002828, "loss": 1.0737, "step": 564992 }, { "epoch": 42.40570356472796, "grad_norm": 0.5733670592308044, "learning_rate": 0.002828, "loss": 1.0815, "step": 565056 }, { "epoch": 42.41050656660413, "grad_norm": 0.650057852268219, "learning_rate": 0.002828, "loss": 1.0763, "step": 565120 }, { "epoch": 42.4153095684803, "grad_norm": 0.7314160466194153, "learning_rate": 0.002828, "loss": 1.0779, "step": 565184 }, { "epoch": 42.42011257035647, "grad_norm": 0.5718480348587036, "learning_rate": 0.002828, "loss": 1.0746, "step": 565248 }, { "epoch": 42.42491557223264, "grad_norm": 0.5505502820014954, "learning_rate": 0.002828, "loss": 1.0741, "step": 565312 }, { "epoch": 42.42971857410882, "grad_norm": 0.5616229772567749, "learning_rate": 0.002828, "loss": 1.0761, "step": 565376 }, { "epoch": 42.43452157598499, "grad_norm": 0.6704441905021667, "learning_rate": 0.002828, "loss": 1.0763, "step": 565440 }, { "epoch": 42.439324577861164, "grad_norm": 0.5727601051330566, "learning_rate": 0.002828, "loss": 1.0808, "step": 565504 }, { "epoch": 42.444127579737334, "grad_norm": 0.5947756767272949, "learning_rate": 0.002828, "loss": 1.0725, "step": 565568 }, { "epoch": 42.44893058161351, "grad_norm": 0.6589171290397644, "learning_rate": 0.002828, "loss": 1.0739, "step": 565632 }, { "epoch": 42.45373358348968, "grad_norm": 0.829900324344635, "learning_rate": 0.002828, "loss": 1.0781, "step": 565696 }, { "epoch": 42.458536585365856, "grad_norm": 0.7435891628265381, "learning_rate": 0.002828, "loss": 1.0774, "step": 565760 }, { "epoch": 42.463339587242025, "grad_norm": 0.633306086063385, "learning_rate": 0.002828, "loss": 1.0755, "step": 565824 }, { "epoch": 42.4681425891182, "grad_norm": 0.7354848980903625, "learning_rate": 0.002828, "loss": 1.0779, "step": 565888 }, { "epoch": 42.47294559099437, "grad_norm": 0.7187036871910095, "learning_rate": 0.002828, "loss": 1.079, "step": 565952 }, { "epoch": 42.47774859287055, "grad_norm": 0.6875394582748413, "learning_rate": 0.002828, "loss": 1.0839, "step": 566016 }, { "epoch": 42.48255159474672, "grad_norm": 0.5287431478500366, "learning_rate": 0.002828, "loss": 1.0786, "step": 566080 }, { "epoch": 42.487354596622886, "grad_norm": 0.6273989677429199, "learning_rate": 0.002828, "loss": 1.0789, "step": 566144 }, { "epoch": 42.49215759849906, "grad_norm": 0.7115692496299744, "learning_rate": 0.002828, "loss": 1.0782, "step": 566208 }, { "epoch": 42.49696060037523, "grad_norm": 0.5426573157310486, "learning_rate": 0.002828, "loss": 1.0771, "step": 566272 }, { "epoch": 42.50176360225141, "grad_norm": 0.6621776223182678, "learning_rate": 0.002828, "loss": 1.0761, "step": 566336 }, { "epoch": 42.50656660412758, "grad_norm": 0.5490167140960693, "learning_rate": 0.002828, "loss": 1.0758, "step": 566400 }, { "epoch": 42.511369606003754, "grad_norm": 0.5877586603164673, "learning_rate": 0.002828, "loss": 1.0807, "step": 566464 }, { "epoch": 42.51617260787992, "grad_norm": 0.8068798780441284, "learning_rate": 0.002828, "loss": 1.0761, "step": 566528 }, { "epoch": 42.5209756097561, "grad_norm": 0.6804494261741638, "learning_rate": 0.002828, "loss": 1.0793, "step": 566592 }, { "epoch": 42.52577861163227, "grad_norm": 0.605664074420929, "learning_rate": 0.002828, "loss": 1.0742, "step": 566656 }, { "epoch": 42.530581613508446, "grad_norm": 0.5913964509963989, "learning_rate": 0.002828, "loss": 1.0801, "step": 566720 }, { "epoch": 42.535384615384615, "grad_norm": 0.5786881446838379, "learning_rate": 0.002828, "loss": 1.0806, "step": 566784 }, { "epoch": 42.54018761726079, "grad_norm": 0.5988232493400574, "learning_rate": 0.002828, "loss": 1.0774, "step": 566848 }, { "epoch": 42.54499061913696, "grad_norm": 0.529296875, "learning_rate": 0.002828, "loss": 1.0777, "step": 566912 }, { "epoch": 42.54979362101313, "grad_norm": 0.5336984395980835, "learning_rate": 0.002828, "loss": 1.0807, "step": 566976 }, { "epoch": 42.554596622889306, "grad_norm": 0.5777416229248047, "learning_rate": 0.002828, "loss": 1.0805, "step": 567040 }, { "epoch": 42.559399624765476, "grad_norm": 0.5394503474235535, "learning_rate": 0.002828, "loss": 1.0783, "step": 567104 }, { "epoch": 42.56420262664165, "grad_norm": 0.5473426580429077, "learning_rate": 0.002828, "loss": 1.0749, "step": 567168 }, { "epoch": 42.56900562851782, "grad_norm": 0.6526885032653809, "learning_rate": 0.002828, "loss": 1.0792, "step": 567232 }, { "epoch": 42.573808630394, "grad_norm": 0.5946779251098633, "learning_rate": 0.002828, "loss": 1.0803, "step": 567296 }, { "epoch": 42.57861163227017, "grad_norm": 0.5849866271018982, "learning_rate": 0.002828, "loss": 1.0775, "step": 567360 }, { "epoch": 42.583414634146344, "grad_norm": 0.7357564568519592, "learning_rate": 0.002828, "loss": 1.0806, "step": 567424 }, { "epoch": 42.58821763602251, "grad_norm": 0.6068862080574036, "learning_rate": 0.002828, "loss": 1.0816, "step": 567488 }, { "epoch": 42.59302063789869, "grad_norm": 0.5769990086555481, "learning_rate": 0.002828, "loss": 1.0853, "step": 567552 }, { "epoch": 42.59782363977486, "grad_norm": 0.8670127391815186, "learning_rate": 0.002828, "loss": 1.0748, "step": 567616 }, { "epoch": 42.602626641651035, "grad_norm": 0.5923072695732117, "learning_rate": 0.002828, "loss": 1.0848, "step": 567680 }, { "epoch": 42.607429643527205, "grad_norm": 0.6279094815254211, "learning_rate": 0.002828, "loss": 1.0821, "step": 567744 }, { "epoch": 42.612232645403374, "grad_norm": 0.6318846344947815, "learning_rate": 0.002828, "loss": 1.0801, "step": 567808 }, { "epoch": 42.61703564727955, "grad_norm": 0.5974023342132568, "learning_rate": 0.002828, "loss": 1.0829, "step": 567872 }, { "epoch": 42.62183864915572, "grad_norm": 0.5550240278244019, "learning_rate": 0.002828, "loss": 1.0812, "step": 567936 }, { "epoch": 42.626641651031896, "grad_norm": 0.6231825351715088, "learning_rate": 0.002828, "loss": 1.0753, "step": 568000 }, { "epoch": 42.631444652908066, "grad_norm": 0.6439521908760071, "learning_rate": 0.002828, "loss": 1.0791, "step": 568064 }, { "epoch": 42.63624765478424, "grad_norm": 0.6360310912132263, "learning_rate": 0.002828, "loss": 1.0796, "step": 568128 }, { "epoch": 42.64105065666041, "grad_norm": 0.5486919283866882, "learning_rate": 0.002828, "loss": 1.083, "step": 568192 }, { "epoch": 42.64585365853659, "grad_norm": 0.6617389917373657, "learning_rate": 0.002828, "loss": 1.0778, "step": 568256 }, { "epoch": 42.65065666041276, "grad_norm": 0.6045265197753906, "learning_rate": 0.002828, "loss": 1.0819, "step": 568320 }, { "epoch": 42.65545966228893, "grad_norm": 0.6709670424461365, "learning_rate": 0.002828, "loss": 1.0834, "step": 568384 }, { "epoch": 42.6602626641651, "grad_norm": 0.6926343441009521, "learning_rate": 0.002828, "loss": 1.0829, "step": 568448 }, { "epoch": 42.66506566604128, "grad_norm": 0.6111130714416504, "learning_rate": 0.002828, "loss": 1.0837, "step": 568512 }, { "epoch": 42.66986866791745, "grad_norm": 0.5871363878250122, "learning_rate": 0.002828, "loss": 1.0829, "step": 568576 }, { "epoch": 42.67467166979362, "grad_norm": 0.567190408706665, "learning_rate": 0.002828, "loss": 1.0809, "step": 568640 }, { "epoch": 42.679474671669794, "grad_norm": 0.6697739362716675, "learning_rate": 0.002828, "loss": 1.0859, "step": 568704 }, { "epoch": 42.684277673545964, "grad_norm": 0.6716319918632507, "learning_rate": 0.002828, "loss": 1.0769, "step": 568768 }, { "epoch": 42.68908067542214, "grad_norm": 0.611808180809021, "learning_rate": 0.002828, "loss": 1.0888, "step": 568832 }, { "epoch": 42.69388367729831, "grad_norm": 0.531807005405426, "learning_rate": 0.002828, "loss": 1.0809, "step": 568896 }, { "epoch": 42.698686679174486, "grad_norm": 0.5492876768112183, "learning_rate": 0.002828, "loss": 1.08, "step": 568960 }, { "epoch": 42.703489681050655, "grad_norm": 0.5824979543685913, "learning_rate": 0.002828, "loss": 1.0874, "step": 569024 }, { "epoch": 42.70829268292683, "grad_norm": 0.617067813873291, "learning_rate": 0.002828, "loss": 1.0808, "step": 569088 }, { "epoch": 42.713095684803, "grad_norm": 0.6360995769500732, "learning_rate": 0.002828, "loss": 1.0853, "step": 569152 }, { "epoch": 42.71789868667918, "grad_norm": 0.6540601253509521, "learning_rate": 0.002828, "loss": 1.0849, "step": 569216 }, { "epoch": 42.72270168855535, "grad_norm": 0.7369455099105835, "learning_rate": 0.002828, "loss": 1.0827, "step": 569280 }, { "epoch": 42.727504690431516, "grad_norm": 0.690152108669281, "learning_rate": 0.002828, "loss": 1.0824, "step": 569344 }, { "epoch": 42.73230769230769, "grad_norm": 0.613617479801178, "learning_rate": 0.002828, "loss": 1.0839, "step": 569408 }, { "epoch": 42.73711069418386, "grad_norm": 0.6699286103248596, "learning_rate": 0.002828, "loss": 1.0859, "step": 569472 }, { "epoch": 42.74191369606004, "grad_norm": 0.6793148517608643, "learning_rate": 0.002828, "loss": 1.0866, "step": 569536 }, { "epoch": 42.74671669793621, "grad_norm": 0.6059393286705017, "learning_rate": 0.002828, "loss": 1.0796, "step": 569600 }, { "epoch": 42.751519699812384, "grad_norm": 0.58489590883255, "learning_rate": 0.002828, "loss": 1.0818, "step": 569664 }, { "epoch": 42.75632270168855, "grad_norm": 0.6535030007362366, "learning_rate": 0.002828, "loss": 1.089, "step": 569728 }, { "epoch": 42.76112570356473, "grad_norm": 0.7264353036880493, "learning_rate": 0.002828, "loss": 1.0853, "step": 569792 }, { "epoch": 42.7659287054409, "grad_norm": 0.7156565189361572, "learning_rate": 0.002828, "loss": 1.0861, "step": 569856 }, { "epoch": 42.770731707317076, "grad_norm": 0.56122887134552, "learning_rate": 0.002828, "loss": 1.0835, "step": 569920 }, { "epoch": 42.775534709193245, "grad_norm": 0.5848171710968018, "learning_rate": 0.002828, "loss": 1.0839, "step": 569984 }, { "epoch": 42.78033771106942, "grad_norm": 0.6172915697097778, "learning_rate": 0.002828, "loss": 1.0779, "step": 570048 }, { "epoch": 42.78514071294559, "grad_norm": 0.5863094329833984, "learning_rate": 0.002828, "loss": 1.0871, "step": 570112 }, { "epoch": 42.78994371482176, "grad_norm": 0.5800777673721313, "learning_rate": 0.002828, "loss": 1.0828, "step": 570176 }, { "epoch": 42.79474671669794, "grad_norm": 0.6153944134712219, "learning_rate": 0.002828, "loss": 1.0826, "step": 570240 }, { "epoch": 42.799549718574106, "grad_norm": 0.6796879768371582, "learning_rate": 0.002828, "loss": 1.0891, "step": 570304 }, { "epoch": 42.80435272045028, "grad_norm": 0.688765287399292, "learning_rate": 0.002828, "loss": 1.0849, "step": 570368 }, { "epoch": 42.80915572232645, "grad_norm": 0.6226803660392761, "learning_rate": 0.002828, "loss": 1.0852, "step": 570432 }, { "epoch": 42.81395872420263, "grad_norm": 0.5279849171638489, "learning_rate": 0.002828, "loss": 1.0821, "step": 570496 }, { "epoch": 42.8187617260788, "grad_norm": 0.7052308320999146, "learning_rate": 0.002828, "loss": 1.0825, "step": 570560 }, { "epoch": 42.823564727954974, "grad_norm": 0.5396923422813416, "learning_rate": 0.002828, "loss": 1.0828, "step": 570624 }, { "epoch": 42.82836772983114, "grad_norm": 0.6663107872009277, "learning_rate": 0.002828, "loss": 1.087, "step": 570688 }, { "epoch": 42.83317073170732, "grad_norm": 0.4901922941207886, "learning_rate": 0.002828, "loss": 1.0942, "step": 570752 }, { "epoch": 42.83797373358349, "grad_norm": 0.5605421662330627, "learning_rate": 0.002828, "loss": 1.0865, "step": 570816 }, { "epoch": 42.842776735459665, "grad_norm": 0.7057970762252808, "learning_rate": 0.002828, "loss": 1.0844, "step": 570880 }, { "epoch": 42.847579737335835, "grad_norm": 0.5384121537208557, "learning_rate": 0.002828, "loss": 1.0857, "step": 570944 }, { "epoch": 42.852382739212004, "grad_norm": 0.6306588649749756, "learning_rate": 0.002828, "loss": 1.0856, "step": 571008 }, { "epoch": 42.85718574108818, "grad_norm": 0.5484234094619751, "learning_rate": 0.002828, "loss": 1.0833, "step": 571072 }, { "epoch": 42.86198874296435, "grad_norm": 0.7848870754241943, "learning_rate": 0.002828, "loss": 1.0875, "step": 571136 }, { "epoch": 42.866791744840526, "grad_norm": 0.6511049270629883, "learning_rate": 0.002828, "loss": 1.091, "step": 571200 }, { "epoch": 42.871594746716696, "grad_norm": 0.6501409411430359, "learning_rate": 0.002828, "loss": 1.0861, "step": 571264 }, { "epoch": 42.87639774859287, "grad_norm": 0.6331605315208435, "learning_rate": 0.002828, "loss": 1.0844, "step": 571328 }, { "epoch": 42.88120075046904, "grad_norm": 0.7007723450660706, "learning_rate": 0.002828, "loss": 1.0881, "step": 571392 }, { "epoch": 42.88600375234522, "grad_norm": 0.6939073801040649, "learning_rate": 0.002828, "loss": 1.082, "step": 571456 }, { "epoch": 42.89080675422139, "grad_norm": 0.6270707249641418, "learning_rate": 0.002828, "loss": 1.0808, "step": 571520 }, { "epoch": 42.89560975609756, "grad_norm": 0.7300127148628235, "learning_rate": 0.002828, "loss": 1.0849, "step": 571584 }, { "epoch": 42.90041275797373, "grad_norm": 0.5953959822654724, "learning_rate": 0.002828, "loss": 1.0864, "step": 571648 }, { "epoch": 42.90521575984991, "grad_norm": 0.6020718216896057, "learning_rate": 0.002828, "loss": 1.0895, "step": 571712 }, { "epoch": 42.91001876172608, "grad_norm": 0.6176451444625854, "learning_rate": 0.002828, "loss": 1.0872, "step": 571776 }, { "epoch": 42.91482176360225, "grad_norm": 0.7385719418525696, "learning_rate": 0.002828, "loss": 1.0854, "step": 571840 }, { "epoch": 42.919624765478424, "grad_norm": 0.591037929058075, "learning_rate": 0.002828, "loss": 1.083, "step": 571904 }, { "epoch": 42.924427767354594, "grad_norm": 0.750694751739502, "learning_rate": 0.002828, "loss": 1.0846, "step": 571968 }, { "epoch": 42.92923076923077, "grad_norm": 0.7448388934135437, "learning_rate": 0.002828, "loss": 1.0909, "step": 572032 }, { "epoch": 42.93403377110694, "grad_norm": 0.5910031795501709, "learning_rate": 0.002828, "loss": 1.0899, "step": 572096 }, { "epoch": 42.938836772983116, "grad_norm": 0.652076244354248, "learning_rate": 0.002828, "loss": 1.0898, "step": 572160 }, { "epoch": 42.943639774859285, "grad_norm": 0.5189456939697266, "learning_rate": 0.002828, "loss": 1.0874, "step": 572224 }, { "epoch": 42.94844277673546, "grad_norm": 0.8578477501869202, "learning_rate": 0.002828, "loss": 1.0873, "step": 572288 }, { "epoch": 42.95324577861163, "grad_norm": 0.5538474321365356, "learning_rate": 0.002828, "loss": 1.0907, "step": 572352 }, { "epoch": 42.95804878048781, "grad_norm": 0.6879174709320068, "learning_rate": 0.002828, "loss": 1.0847, "step": 572416 }, { "epoch": 42.96285178236398, "grad_norm": 0.5544927716255188, "learning_rate": 0.002828, "loss": 1.086, "step": 572480 }, { "epoch": 42.96765478424015, "grad_norm": 0.57198166847229, "learning_rate": 0.002828, "loss": 1.0837, "step": 572544 }, { "epoch": 42.97245778611632, "grad_norm": 0.6882445216178894, "learning_rate": 0.002828, "loss": 1.0904, "step": 572608 }, { "epoch": 42.97726078799249, "grad_norm": 0.5579478740692139, "learning_rate": 0.002828, "loss": 1.0857, "step": 572672 }, { "epoch": 42.98206378986867, "grad_norm": 0.7191713452339172, "learning_rate": 0.002828, "loss": 1.0855, "step": 572736 }, { "epoch": 42.98686679174484, "grad_norm": 0.6459416151046753, "learning_rate": 0.002828, "loss": 1.0915, "step": 572800 }, { "epoch": 42.991669793621014, "grad_norm": 0.5977568030357361, "learning_rate": 0.002828, "loss": 1.0839, "step": 572864 }, { "epoch": 42.99647279549718, "grad_norm": 0.6233012676239014, "learning_rate": 0.002828, "loss": 1.0823, "step": 572928 }, { "epoch": 43.00127579737336, "grad_norm": 0.5843372344970703, "learning_rate": 0.002828, "loss": 1.0782, "step": 572992 }, { "epoch": 43.00607879924953, "grad_norm": 0.5951461791992188, "learning_rate": 0.002828, "loss": 1.0502, "step": 573056 }, { "epoch": 43.010881801125706, "grad_norm": 0.7060664296150208, "learning_rate": 0.002828, "loss": 1.0491, "step": 573120 }, { "epoch": 43.015684803001875, "grad_norm": 0.6400389075279236, "learning_rate": 0.002828, "loss": 1.0487, "step": 573184 }, { "epoch": 43.02048780487805, "grad_norm": 0.7333444952964783, "learning_rate": 0.002828, "loss": 1.051, "step": 573248 }, { "epoch": 43.02529080675422, "grad_norm": 0.7291139960289001, "learning_rate": 0.002828, "loss": 1.0521, "step": 573312 }, { "epoch": 43.0300938086304, "grad_norm": 0.5190329551696777, "learning_rate": 0.002828, "loss": 1.0572, "step": 573376 }, { "epoch": 43.03489681050657, "grad_norm": 0.7910537719726562, "learning_rate": 0.002828, "loss": 1.0555, "step": 573440 }, { "epoch": 43.039699812382736, "grad_norm": 0.7930744886398315, "learning_rate": 0.002828, "loss": 1.0526, "step": 573504 }, { "epoch": 43.04450281425891, "grad_norm": 0.5120797157287598, "learning_rate": 0.002828, "loss": 1.0537, "step": 573568 }, { "epoch": 43.04930581613508, "grad_norm": 0.6616923809051514, "learning_rate": 0.002828, "loss": 1.0574, "step": 573632 }, { "epoch": 43.05410881801126, "grad_norm": 0.6355065703392029, "learning_rate": 0.002828, "loss": 1.0548, "step": 573696 }, { "epoch": 43.05891181988743, "grad_norm": 0.5768610835075378, "learning_rate": 0.002828, "loss": 1.0571, "step": 573760 }, { "epoch": 43.063714821763604, "grad_norm": 0.782618522644043, "learning_rate": 0.002828, "loss": 1.0526, "step": 573824 }, { "epoch": 43.06851782363977, "grad_norm": 0.5912759304046631, "learning_rate": 0.002828, "loss": 1.0564, "step": 573888 }, { "epoch": 43.07332082551595, "grad_norm": 0.6178007125854492, "learning_rate": 0.002828, "loss": 1.0585, "step": 573952 }, { "epoch": 43.07812382739212, "grad_norm": 0.597594678401947, "learning_rate": 0.002828, "loss": 1.0582, "step": 574016 }, { "epoch": 43.082926829268295, "grad_norm": 0.5913254618644714, "learning_rate": 0.002828, "loss": 1.0552, "step": 574080 }, { "epoch": 43.087729831144465, "grad_norm": 0.691646933555603, "learning_rate": 0.002828, "loss": 1.055, "step": 574144 }, { "epoch": 43.09253283302064, "grad_norm": 0.5682713389396667, "learning_rate": 0.002828, "loss": 1.0642, "step": 574208 }, { "epoch": 43.09733583489681, "grad_norm": 0.6842798590660095, "learning_rate": 0.002828, "loss": 1.0599, "step": 574272 }, { "epoch": 43.10213883677298, "grad_norm": 0.584943950176239, "learning_rate": 0.002828, "loss": 1.0569, "step": 574336 }, { "epoch": 43.106941838649156, "grad_norm": 0.4885464608669281, "learning_rate": 0.002828, "loss": 1.0596, "step": 574400 }, { "epoch": 43.111744840525326, "grad_norm": 0.5184704065322876, "learning_rate": 0.002828, "loss": 1.0612, "step": 574464 }, { "epoch": 43.1165478424015, "grad_norm": 0.7813940644264221, "learning_rate": 0.002828, "loss": 1.0629, "step": 574528 }, { "epoch": 43.12135084427767, "grad_norm": 0.6903042793273926, "learning_rate": 0.002828, "loss": 1.0614, "step": 574592 }, { "epoch": 43.12615384615385, "grad_norm": 0.5398491621017456, "learning_rate": 0.002828, "loss": 1.058, "step": 574656 }, { "epoch": 43.13095684803002, "grad_norm": 0.6805057525634766, "learning_rate": 0.002828, "loss": 1.0587, "step": 574720 }, { "epoch": 43.135759849906194, "grad_norm": 0.7092175483703613, "learning_rate": 0.002828, "loss": 1.0644, "step": 574784 }, { "epoch": 43.14056285178236, "grad_norm": 0.6952199935913086, "learning_rate": 0.002828, "loss": 1.0601, "step": 574848 }, { "epoch": 43.14536585365854, "grad_norm": 0.6295602321624756, "learning_rate": 0.002828, "loss": 1.0563, "step": 574912 }, { "epoch": 43.15016885553471, "grad_norm": 0.4714643657207489, "learning_rate": 0.002828, "loss": 1.0581, "step": 574976 }, { "epoch": 43.154971857410885, "grad_norm": 0.5860217809677124, "learning_rate": 0.002828, "loss": 1.0595, "step": 575040 }, { "epoch": 43.159774859287054, "grad_norm": 0.5973192453384399, "learning_rate": 0.002828, "loss": 1.0611, "step": 575104 }, { "epoch": 43.164577861163224, "grad_norm": 0.692756712436676, "learning_rate": 0.002828, "loss": 1.0639, "step": 575168 }, { "epoch": 43.1693808630394, "grad_norm": 0.6680248379707336, "learning_rate": 0.002828, "loss": 1.0607, "step": 575232 }, { "epoch": 43.17418386491557, "grad_norm": 0.8391013741493225, "learning_rate": 0.002828, "loss": 1.0617, "step": 575296 }, { "epoch": 43.178986866791746, "grad_norm": 0.7862709164619446, "learning_rate": 0.002828, "loss": 1.0632, "step": 575360 }, { "epoch": 43.183789868667915, "grad_norm": 0.6694145202636719, "learning_rate": 0.002828, "loss": 1.0604, "step": 575424 }, { "epoch": 43.18859287054409, "grad_norm": 0.5881288051605225, "learning_rate": 0.002828, "loss": 1.0608, "step": 575488 }, { "epoch": 43.19339587242026, "grad_norm": 0.6086690425872803, "learning_rate": 0.002828, "loss": 1.0658, "step": 575552 }, { "epoch": 43.19819887429644, "grad_norm": 0.5388079285621643, "learning_rate": 0.002828, "loss": 1.0613, "step": 575616 }, { "epoch": 43.20300187617261, "grad_norm": 0.5871914625167847, "learning_rate": 0.002828, "loss": 1.0581, "step": 575680 }, { "epoch": 43.20780487804878, "grad_norm": 1.0138866901397705, "learning_rate": 0.002828, "loss": 1.0594, "step": 575744 }, { "epoch": 43.21260787992495, "grad_norm": 0.8155320286750793, "learning_rate": 0.002828, "loss": 1.0638, "step": 575808 }, { "epoch": 43.21741088180113, "grad_norm": 0.5958095788955688, "learning_rate": 0.002828, "loss": 1.0669, "step": 575872 }, { "epoch": 43.2222138836773, "grad_norm": 0.7571079730987549, "learning_rate": 0.002828, "loss": 1.0629, "step": 575936 }, { "epoch": 43.22701688555347, "grad_norm": 0.5348535776138306, "learning_rate": 0.002828, "loss": 1.0587, "step": 576000 }, { "epoch": 43.231819887429644, "grad_norm": 0.5872513651847839, "learning_rate": 0.002828, "loss": 1.0663, "step": 576064 }, { "epoch": 43.23662288930581, "grad_norm": 0.5647656917572021, "learning_rate": 0.002828, "loss": 1.0622, "step": 576128 }, { "epoch": 43.24142589118199, "grad_norm": 0.5898438692092896, "learning_rate": 0.002828, "loss": 1.066, "step": 576192 }, { "epoch": 43.24622889305816, "grad_norm": 0.590237557888031, "learning_rate": 0.002828, "loss": 1.0615, "step": 576256 }, { "epoch": 43.251031894934336, "grad_norm": 0.7051697373390198, "learning_rate": 0.002828, "loss": 1.0634, "step": 576320 }, { "epoch": 43.255834896810505, "grad_norm": 0.6399511098861694, "learning_rate": 0.002828, "loss": 1.0649, "step": 576384 }, { "epoch": 43.26063789868668, "grad_norm": 0.6372509598731995, "learning_rate": 0.002828, "loss": 1.0607, "step": 576448 }, { "epoch": 43.26544090056285, "grad_norm": 0.5469287037849426, "learning_rate": 0.002828, "loss": 1.0645, "step": 576512 }, { "epoch": 43.27024390243903, "grad_norm": 0.5622618198394775, "learning_rate": 0.002828, "loss": 1.0631, "step": 576576 }, { "epoch": 43.2750469043152, "grad_norm": 0.6582178473472595, "learning_rate": 0.002828, "loss": 1.0637, "step": 576640 }, { "epoch": 43.27984990619137, "grad_norm": 0.5956276655197144, "learning_rate": 0.002828, "loss": 1.0637, "step": 576704 }, { "epoch": 43.28465290806754, "grad_norm": 0.7692244052886963, "learning_rate": 0.002828, "loss": 1.0628, "step": 576768 }, { "epoch": 43.28945590994371, "grad_norm": 0.6546646356582642, "learning_rate": 0.002828, "loss": 1.061, "step": 576832 }, { "epoch": 43.29425891181989, "grad_norm": 0.6064482927322388, "learning_rate": 0.002828, "loss": 1.0622, "step": 576896 }, { "epoch": 43.29906191369606, "grad_norm": 0.7595977783203125, "learning_rate": 0.002828, "loss": 1.0584, "step": 576960 }, { "epoch": 43.303864915572234, "grad_norm": 0.6731221675872803, "learning_rate": 0.002828, "loss": 1.0669, "step": 577024 }, { "epoch": 43.3086679174484, "grad_norm": 0.6657878756523132, "learning_rate": 0.002828, "loss": 1.066, "step": 577088 }, { "epoch": 43.31347091932458, "grad_norm": 0.5610875487327576, "learning_rate": 0.002828, "loss": 1.0667, "step": 577152 }, { "epoch": 43.31827392120075, "grad_norm": 0.6538919806480408, "learning_rate": 0.002828, "loss": 1.0663, "step": 577216 }, { "epoch": 43.323076923076925, "grad_norm": 0.763543426990509, "learning_rate": 0.002828, "loss": 1.0694, "step": 577280 }, { "epoch": 43.327879924953095, "grad_norm": 0.6737272143363953, "learning_rate": 0.002828, "loss": 1.065, "step": 577344 }, { "epoch": 43.33268292682927, "grad_norm": 0.6752422451972961, "learning_rate": 0.002828, "loss": 1.0697, "step": 577408 }, { "epoch": 43.33748592870544, "grad_norm": 0.7812511920928955, "learning_rate": 0.002828, "loss": 1.0639, "step": 577472 }, { "epoch": 43.34228893058162, "grad_norm": 0.6621730327606201, "learning_rate": 0.002828, "loss": 1.0697, "step": 577536 }, { "epoch": 43.347091932457786, "grad_norm": 0.697150707244873, "learning_rate": 0.002828, "loss": 1.0702, "step": 577600 }, { "epoch": 43.351894934333956, "grad_norm": 0.5495653748512268, "learning_rate": 0.002828, "loss": 1.0665, "step": 577664 }, { "epoch": 43.35669793621013, "grad_norm": 0.6246775984764099, "learning_rate": 0.002828, "loss": 1.0644, "step": 577728 }, { "epoch": 43.3615009380863, "grad_norm": 0.6434612274169922, "learning_rate": 0.002828, "loss": 1.069, "step": 577792 }, { "epoch": 43.36630393996248, "grad_norm": 0.8342929482460022, "learning_rate": 0.002828, "loss": 1.0649, "step": 577856 }, { "epoch": 43.37110694183865, "grad_norm": 0.7098569273948669, "learning_rate": 0.002828, "loss": 1.0673, "step": 577920 }, { "epoch": 43.375909943714824, "grad_norm": 0.6844133138656616, "learning_rate": 0.002828, "loss": 1.0622, "step": 577984 }, { "epoch": 43.38071294559099, "grad_norm": 0.6284855008125305, "learning_rate": 0.002828, "loss": 1.0678, "step": 578048 }, { "epoch": 43.38551594746717, "grad_norm": 0.5995723605155945, "learning_rate": 0.002828, "loss": 1.0661, "step": 578112 }, { "epoch": 43.39031894934334, "grad_norm": 0.5386275053024292, "learning_rate": 0.002828, "loss": 1.0683, "step": 578176 }, { "epoch": 43.395121951219515, "grad_norm": 0.8105739951133728, "learning_rate": 0.002828, "loss": 1.0637, "step": 578240 }, { "epoch": 43.399924953095685, "grad_norm": 0.5679133534431458, "learning_rate": 0.002828, "loss": 1.0674, "step": 578304 }, { "epoch": 43.40472795497186, "grad_norm": 0.6953145861625671, "learning_rate": 0.002828, "loss": 1.0727, "step": 578368 }, { "epoch": 43.40953095684803, "grad_norm": 0.5408474802970886, "learning_rate": 0.002828, "loss": 1.073, "step": 578432 }, { "epoch": 43.4143339587242, "grad_norm": 0.5147654414176941, "learning_rate": 0.002828, "loss": 1.0679, "step": 578496 }, { "epoch": 43.419136960600376, "grad_norm": 0.5925143957138062, "learning_rate": 0.002828, "loss": 1.0722, "step": 578560 }, { "epoch": 43.423939962476545, "grad_norm": 0.5820292234420776, "learning_rate": 0.002828, "loss": 1.0686, "step": 578624 }, { "epoch": 43.42874296435272, "grad_norm": 0.6305953860282898, "learning_rate": 0.002828, "loss": 1.0691, "step": 578688 }, { "epoch": 43.43354596622889, "grad_norm": 0.6557329893112183, "learning_rate": 0.002828, "loss": 1.0656, "step": 578752 }, { "epoch": 43.43834896810507, "grad_norm": 0.6649267673492432, "learning_rate": 0.002828, "loss": 1.077, "step": 578816 }, { "epoch": 43.44315196998124, "grad_norm": 0.7626684904098511, "learning_rate": 0.002828, "loss": 1.0657, "step": 578880 }, { "epoch": 43.44795497185741, "grad_norm": 0.6433974504470825, "learning_rate": 0.002828, "loss": 1.0776, "step": 578944 }, { "epoch": 43.45275797373358, "grad_norm": 0.7001895904541016, "learning_rate": 0.002828, "loss": 1.0702, "step": 579008 }, { "epoch": 43.45756097560976, "grad_norm": 0.7059563994407654, "learning_rate": 0.002828, "loss": 1.0674, "step": 579072 }, { "epoch": 43.46236397748593, "grad_norm": 0.6160516738891602, "learning_rate": 0.002828, "loss": 1.0698, "step": 579136 }, { "epoch": 43.4671669793621, "grad_norm": 0.6591466665267944, "learning_rate": 0.002828, "loss": 1.0658, "step": 579200 }, { "epoch": 43.471969981238274, "grad_norm": 0.6705019474029541, "learning_rate": 0.002828, "loss": 1.0735, "step": 579264 }, { "epoch": 43.476772983114444, "grad_norm": 0.6403300762176514, "learning_rate": 0.002828, "loss": 1.0678, "step": 579328 }, { "epoch": 43.48157598499062, "grad_norm": 0.613906979560852, "learning_rate": 0.002828, "loss": 1.0704, "step": 579392 }, { "epoch": 43.48637898686679, "grad_norm": 0.6602962017059326, "learning_rate": 0.002828, "loss": 1.0747, "step": 579456 }, { "epoch": 43.491181988742966, "grad_norm": 0.6226909160614014, "learning_rate": 0.002828, "loss": 1.0704, "step": 579520 }, { "epoch": 43.495984990619135, "grad_norm": 0.6018249988555908, "learning_rate": 0.002828, "loss": 1.0696, "step": 579584 }, { "epoch": 43.50078799249531, "grad_norm": 0.5919440388679504, "learning_rate": 0.002828, "loss": 1.0725, "step": 579648 }, { "epoch": 43.50559099437148, "grad_norm": 10.90912914276123, "learning_rate": 0.002828, "loss": 1.0727, "step": 579712 }, { "epoch": 43.51039399624766, "grad_norm": 0.6816866397857666, "learning_rate": 0.002828, "loss": 1.0742, "step": 579776 }, { "epoch": 43.51519699812383, "grad_norm": 0.5978453159332275, "learning_rate": 0.002828, "loss": 1.0799, "step": 579840 }, { "epoch": 43.52, "grad_norm": 0.6959922909736633, "learning_rate": 0.002828, "loss": 1.0714, "step": 579904 }, { "epoch": 43.52480300187617, "grad_norm": 0.5744678378105164, "learning_rate": 0.002828, "loss": 1.0667, "step": 579968 }, { "epoch": 43.52960600375234, "grad_norm": 0.6247631907463074, "learning_rate": 0.002828, "loss": 1.0722, "step": 580032 }, { "epoch": 43.53440900562852, "grad_norm": 0.7486253976821899, "learning_rate": 0.002828, "loss": 1.0726, "step": 580096 }, { "epoch": 43.53921200750469, "grad_norm": 0.6821879744529724, "learning_rate": 0.002828, "loss": 1.071, "step": 580160 }, { "epoch": 43.544015009380864, "grad_norm": 0.5762830972671509, "learning_rate": 0.002828, "loss": 1.0728, "step": 580224 }, { "epoch": 43.54881801125703, "grad_norm": 0.5583580732345581, "learning_rate": 0.002828, "loss": 1.0672, "step": 580288 }, { "epoch": 43.55362101313321, "grad_norm": 0.5891516804695129, "learning_rate": 0.002828, "loss": 1.0694, "step": 580352 }, { "epoch": 43.55842401500938, "grad_norm": 0.7095462679862976, "learning_rate": 0.002828, "loss": 1.0733, "step": 580416 }, { "epoch": 43.563227016885556, "grad_norm": 0.5964438915252686, "learning_rate": 0.002828, "loss": 1.0733, "step": 580480 }, { "epoch": 43.568030018761725, "grad_norm": 0.6678001284599304, "learning_rate": 0.002828, "loss": 1.0711, "step": 580544 }, { "epoch": 43.5728330206379, "grad_norm": 0.5241270065307617, "learning_rate": 0.002828, "loss": 1.0736, "step": 580608 }, { "epoch": 43.57763602251407, "grad_norm": 0.5979251265525818, "learning_rate": 0.002828, "loss": 1.0711, "step": 580672 }, { "epoch": 43.58243902439025, "grad_norm": 0.5383949279785156, "learning_rate": 0.002828, "loss": 1.0789, "step": 580736 }, { "epoch": 43.587242026266416, "grad_norm": 0.5068698525428772, "learning_rate": 0.002828, "loss": 1.0756, "step": 580800 }, { "epoch": 43.592045028142586, "grad_norm": 0.6026465892791748, "learning_rate": 0.002828, "loss": 1.0743, "step": 580864 }, { "epoch": 43.59684803001876, "grad_norm": 0.7454937696456909, "learning_rate": 0.002828, "loss": 1.08, "step": 580928 }, { "epoch": 43.60165103189493, "grad_norm": 0.7301528453826904, "learning_rate": 0.002828, "loss": 1.0736, "step": 580992 }, { "epoch": 43.60645403377111, "grad_norm": 0.5982566475868225, "learning_rate": 0.002828, "loss": 1.0758, "step": 581056 }, { "epoch": 43.61125703564728, "grad_norm": 0.6397075653076172, "learning_rate": 0.002828, "loss": 1.0818, "step": 581120 }, { "epoch": 43.616060037523454, "grad_norm": 0.6900129914283752, "learning_rate": 0.002828, "loss": 1.079, "step": 581184 }, { "epoch": 43.62086303939962, "grad_norm": 0.6174712181091309, "learning_rate": 0.002828, "loss": 1.0774, "step": 581248 }, { "epoch": 43.6256660412758, "grad_norm": 0.6319519281387329, "learning_rate": 0.002828, "loss": 1.0751, "step": 581312 }, { "epoch": 43.63046904315197, "grad_norm": 0.5982273817062378, "learning_rate": 0.002828, "loss": 1.0732, "step": 581376 }, { "epoch": 43.635272045028145, "grad_norm": 0.6787490248680115, "learning_rate": 0.002828, "loss": 1.077, "step": 581440 }, { "epoch": 43.640075046904315, "grad_norm": 0.6518996357917786, "learning_rate": 0.002828, "loss": 1.0745, "step": 581504 }, { "epoch": 43.64487804878049, "grad_norm": 0.6295955777168274, "learning_rate": 0.002828, "loss": 1.0759, "step": 581568 }, { "epoch": 43.64968105065666, "grad_norm": 0.5694141983985901, "learning_rate": 0.002828, "loss": 1.076, "step": 581632 }, { "epoch": 43.65448405253283, "grad_norm": 0.6541298627853394, "learning_rate": 0.002828, "loss": 1.0776, "step": 581696 }, { "epoch": 43.659287054409006, "grad_norm": 0.5805025696754456, "learning_rate": 0.002828, "loss": 1.0752, "step": 581760 }, { "epoch": 43.664090056285175, "grad_norm": 0.5690178275108337, "learning_rate": 0.002828, "loss": 1.0786, "step": 581824 }, { "epoch": 43.66889305816135, "grad_norm": 0.8229773044586182, "learning_rate": 0.002828, "loss": 1.0703, "step": 581888 }, { "epoch": 43.67369606003752, "grad_norm": 0.7628452181816101, "learning_rate": 0.002828, "loss": 1.0792, "step": 581952 }, { "epoch": 43.6784990619137, "grad_norm": 0.5986396074295044, "learning_rate": 0.002828, "loss": 1.0772, "step": 582016 }, { "epoch": 43.68330206378987, "grad_norm": 0.632666826248169, "learning_rate": 0.002828, "loss": 1.0767, "step": 582080 }, { "epoch": 43.68810506566604, "grad_norm": 0.6015210151672363, "learning_rate": 0.002828, "loss": 1.071, "step": 582144 }, { "epoch": 43.69290806754221, "grad_norm": 0.6710034608840942, "learning_rate": 0.002828, "loss": 1.0756, "step": 582208 }, { "epoch": 43.69771106941839, "grad_norm": 0.5373445153236389, "learning_rate": 0.002828, "loss": 1.0787, "step": 582272 }, { "epoch": 43.70251407129456, "grad_norm": 0.7191179990768433, "learning_rate": 0.002828, "loss": 1.0803, "step": 582336 }, { "epoch": 43.707317073170735, "grad_norm": 0.8347383141517639, "learning_rate": 0.002828, "loss": 1.0688, "step": 582400 }, { "epoch": 43.712120075046904, "grad_norm": 0.5719173550605774, "learning_rate": 0.002828, "loss": 1.0793, "step": 582464 }, { "epoch": 43.716923076923074, "grad_norm": 0.6953842639923096, "learning_rate": 0.002828, "loss": 1.0763, "step": 582528 }, { "epoch": 43.72172607879925, "grad_norm": 0.6387263536453247, "learning_rate": 0.002828, "loss": 1.0747, "step": 582592 }, { "epoch": 43.72652908067542, "grad_norm": 0.6053597331047058, "learning_rate": 0.002828, "loss": 1.079, "step": 582656 }, { "epoch": 43.731332082551596, "grad_norm": 0.7519330978393555, "learning_rate": 0.002828, "loss": 1.076, "step": 582720 }, { "epoch": 43.736135084427765, "grad_norm": 0.5185713171958923, "learning_rate": 0.002828, "loss": 1.0813, "step": 582784 }, { "epoch": 43.74093808630394, "grad_norm": 0.4951317310333252, "learning_rate": 0.002828, "loss": 1.0802, "step": 582848 }, { "epoch": 43.74574108818011, "grad_norm": 0.5165672302246094, "learning_rate": 0.002828, "loss": 1.0713, "step": 582912 }, { "epoch": 43.75054409005629, "grad_norm": 0.6204631924629211, "learning_rate": 0.002828, "loss": 1.0694, "step": 582976 }, { "epoch": 43.75534709193246, "grad_norm": 0.5312516689300537, "learning_rate": 0.002828, "loss": 1.0771, "step": 583040 }, { "epoch": 43.76015009380863, "grad_norm": 0.6661979556083679, "learning_rate": 0.002828, "loss": 1.0804, "step": 583104 }, { "epoch": 43.7649530956848, "grad_norm": 0.5828707218170166, "learning_rate": 0.002828, "loss": 1.0827, "step": 583168 }, { "epoch": 43.76975609756098, "grad_norm": 0.5881485939025879, "learning_rate": 0.002828, "loss": 1.0765, "step": 583232 }, { "epoch": 43.77455909943715, "grad_norm": 0.6520361304283142, "learning_rate": 0.002828, "loss": 1.0808, "step": 583296 }, { "epoch": 43.77936210131332, "grad_norm": 0.7659959197044373, "learning_rate": 0.002828, "loss": 1.0765, "step": 583360 }, { "epoch": 43.784165103189494, "grad_norm": 0.4846315085887909, "learning_rate": 0.002828, "loss": 1.0789, "step": 583424 }, { "epoch": 43.78896810506566, "grad_norm": 0.5142455101013184, "learning_rate": 0.002828, "loss": 1.0785, "step": 583488 }, { "epoch": 43.79377110694184, "grad_norm": 0.6952672600746155, "learning_rate": 0.002828, "loss": 1.0783, "step": 583552 }, { "epoch": 43.79857410881801, "grad_norm": 0.7225334048271179, "learning_rate": 0.002828, "loss": 1.0759, "step": 583616 }, { "epoch": 43.803377110694186, "grad_norm": 1.8566055297851562, "learning_rate": 0.002828, "loss": 1.0816, "step": 583680 }, { "epoch": 43.808180112570355, "grad_norm": 0.5677652359008789, "learning_rate": 0.002828, "loss": 1.0816, "step": 583744 }, { "epoch": 43.81298311444653, "grad_norm": 0.7882415056228638, "learning_rate": 0.002828, "loss": 1.0761, "step": 583808 }, { "epoch": 43.8177861163227, "grad_norm": 0.7066271901130676, "learning_rate": 0.002828, "loss": 1.0751, "step": 583872 }, { "epoch": 43.82258911819888, "grad_norm": 0.6859885454177856, "learning_rate": 0.002828, "loss": 1.0749, "step": 583936 }, { "epoch": 43.82739212007505, "grad_norm": 0.5205126404762268, "learning_rate": 0.002828, "loss": 1.0768, "step": 584000 }, { "epoch": 43.83219512195122, "grad_norm": 0.5213777422904968, "learning_rate": 0.002828, "loss": 1.0791, "step": 584064 }, { "epoch": 43.83699812382739, "grad_norm": 0.6113591194152832, "learning_rate": 0.002828, "loss": 1.0808, "step": 584128 }, { "epoch": 43.84180112570356, "grad_norm": 0.7442314624786377, "learning_rate": 0.002828, "loss": 1.0756, "step": 584192 }, { "epoch": 43.84660412757974, "grad_norm": 0.7046990990638733, "learning_rate": 0.002828, "loss": 1.0815, "step": 584256 }, { "epoch": 43.85140712945591, "grad_norm": 0.5735657215118408, "learning_rate": 0.002828, "loss": 1.0778, "step": 584320 }, { "epoch": 43.856210131332084, "grad_norm": 0.5720263719558716, "learning_rate": 0.002828, "loss": 1.0824, "step": 584384 }, { "epoch": 43.86101313320825, "grad_norm": 0.7149404883384705, "learning_rate": 0.002828, "loss": 1.0763, "step": 584448 }, { "epoch": 43.86581613508443, "grad_norm": 0.6320396065711975, "learning_rate": 0.002828, "loss": 1.0862, "step": 584512 }, { "epoch": 43.8706191369606, "grad_norm": 0.6746490001678467, "learning_rate": 0.002828, "loss": 1.0848, "step": 584576 }, { "epoch": 43.875422138836775, "grad_norm": 0.5930066108703613, "learning_rate": 0.002828, "loss": 1.0818, "step": 584640 }, { "epoch": 43.880225140712945, "grad_norm": 0.6815347671508789, "learning_rate": 0.002828, "loss": 1.0772, "step": 584704 }, { "epoch": 43.88502814258912, "grad_norm": 0.5609988570213318, "learning_rate": 0.002828, "loss": 1.0778, "step": 584768 }, { "epoch": 43.88983114446529, "grad_norm": 0.6229777336120605, "learning_rate": 0.002828, "loss": 1.0802, "step": 584832 }, { "epoch": 43.89463414634147, "grad_norm": 0.5853741765022278, "learning_rate": 0.002828, "loss": 1.0754, "step": 584896 }, { "epoch": 43.899437148217636, "grad_norm": 0.8227654695510864, "learning_rate": 0.002828, "loss": 1.0778, "step": 584960 }, { "epoch": 43.904240150093806, "grad_norm": 0.5244843363761902, "learning_rate": 0.002828, "loss": 1.078, "step": 585024 }, { "epoch": 43.90904315196998, "grad_norm": 0.6417165398597717, "learning_rate": 0.002828, "loss": 1.0807, "step": 585088 }, { "epoch": 43.91384615384615, "grad_norm": 0.7093722820281982, "learning_rate": 0.002828, "loss": 1.0742, "step": 585152 }, { "epoch": 43.91864915572233, "grad_norm": 0.6699938774108887, "learning_rate": 0.002828, "loss": 1.0833, "step": 585216 }, { "epoch": 43.9234521575985, "grad_norm": 0.6436761021614075, "learning_rate": 0.002828, "loss": 1.0748, "step": 585280 }, { "epoch": 43.92825515947467, "grad_norm": 0.7439795136451721, "learning_rate": 0.002828, "loss": 1.08, "step": 585344 }, { "epoch": 43.93305816135084, "grad_norm": 0.5588270425796509, "learning_rate": 0.002828, "loss": 1.0853, "step": 585408 }, { "epoch": 43.93786116322702, "grad_norm": 0.7395601868629456, "learning_rate": 0.002828, "loss": 1.0813, "step": 585472 }, { "epoch": 43.94266416510319, "grad_norm": 0.6798150539398193, "learning_rate": 0.002828, "loss": 1.082, "step": 585536 }, { "epoch": 43.947467166979365, "grad_norm": 0.6237505078315735, "learning_rate": 0.002828, "loss": 1.0837, "step": 585600 }, { "epoch": 43.952270168855534, "grad_norm": 0.6804483532905579, "learning_rate": 0.002828, "loss": 1.0781, "step": 585664 }, { "epoch": 43.957073170731704, "grad_norm": 0.6526710987091064, "learning_rate": 0.002828, "loss": 1.0844, "step": 585728 }, { "epoch": 43.96187617260788, "grad_norm": 0.7000369429588318, "learning_rate": 0.002828, "loss": 1.0795, "step": 585792 }, { "epoch": 43.96667917448405, "grad_norm": 0.6432533264160156, "learning_rate": 0.002828, "loss": 1.0808, "step": 585856 }, { "epoch": 43.971482176360226, "grad_norm": 0.4752485156059265, "learning_rate": 0.002828, "loss": 1.0787, "step": 585920 }, { "epoch": 43.976285178236395, "grad_norm": 0.5716511607170105, "learning_rate": 0.002828, "loss": 1.081, "step": 585984 }, { "epoch": 43.98108818011257, "grad_norm": 0.5871129631996155, "learning_rate": 0.002828, "loss": 1.0887, "step": 586048 }, { "epoch": 43.98589118198874, "grad_norm": 0.5385404229164124, "learning_rate": 0.002828, "loss": 1.0806, "step": 586112 }, { "epoch": 43.99069418386492, "grad_norm": 0.6556265354156494, "learning_rate": 0.002828, "loss": 1.0862, "step": 586176 }, { "epoch": 43.99549718574109, "grad_norm": 0.547139585018158, "learning_rate": 0.002828, "loss": 1.0825, "step": 586240 }, { "epoch": 44.00030018761726, "grad_norm": 0.6162726283073425, "learning_rate": 0.002828, "loss": 1.0855, "step": 586304 }, { "epoch": 44.00510318949343, "grad_norm": 0.6631591320037842, "learning_rate": 0.002828, "loss": 1.0443, "step": 586368 }, { "epoch": 44.00990619136961, "grad_norm": 0.6567460894584656, "learning_rate": 0.002828, "loss": 1.0485, "step": 586432 }, { "epoch": 44.01470919324578, "grad_norm": 0.6258338093757629, "learning_rate": 0.002828, "loss": 1.0489, "step": 586496 }, { "epoch": 44.01951219512195, "grad_norm": 0.6194806694984436, "learning_rate": 0.002828, "loss": 1.0441, "step": 586560 }, { "epoch": 44.024315196998124, "grad_norm": 0.6310887932777405, "learning_rate": 0.002828, "loss": 1.0494, "step": 586624 }, { "epoch": 44.02911819887429, "grad_norm": 0.590854287147522, "learning_rate": 0.002828, "loss": 1.048, "step": 586688 }, { "epoch": 44.03392120075047, "grad_norm": 0.5728686451911926, "learning_rate": 0.002828, "loss": 1.0507, "step": 586752 }, { "epoch": 44.03872420262664, "grad_norm": 0.6099998354911804, "learning_rate": 0.002828, "loss": 1.047, "step": 586816 }, { "epoch": 44.043527204502816, "grad_norm": 0.5928614139556885, "learning_rate": 0.002828, "loss": 1.0467, "step": 586880 }, { "epoch": 44.048330206378985, "grad_norm": 0.4924786686897278, "learning_rate": 0.002828, "loss": 1.046, "step": 586944 }, { "epoch": 44.05313320825516, "grad_norm": 0.6266118884086609, "learning_rate": 0.002828, "loss": 1.0441, "step": 587008 }, { "epoch": 44.05793621013133, "grad_norm": 0.6437507271766663, "learning_rate": 0.002828, "loss": 1.0523, "step": 587072 }, { "epoch": 44.06273921200751, "grad_norm": 0.6502929329872131, "learning_rate": 0.002828, "loss": 1.0534, "step": 587136 }, { "epoch": 44.06754221388368, "grad_norm": 0.5448206067085266, "learning_rate": 0.002828, "loss": 1.0495, "step": 587200 }, { "epoch": 44.07234521575985, "grad_norm": 0.6379390954971313, "learning_rate": 0.002828, "loss": 1.0465, "step": 587264 }, { "epoch": 44.07714821763602, "grad_norm": 0.6880658268928528, "learning_rate": 0.002828, "loss": 1.0496, "step": 587328 }, { "epoch": 44.08195121951219, "grad_norm": 0.6843789219856262, "learning_rate": 0.002828, "loss": 1.0494, "step": 587392 }, { "epoch": 44.08675422138837, "grad_norm": 0.6257761716842651, "learning_rate": 0.002828, "loss": 1.0552, "step": 587456 }, { "epoch": 44.09155722326454, "grad_norm": 0.5892012119293213, "learning_rate": 0.002828, "loss": 1.0548, "step": 587520 }, { "epoch": 44.096360225140714, "grad_norm": 0.5341951251029968, "learning_rate": 0.002828, "loss": 1.0527, "step": 587584 }, { "epoch": 44.10116322701688, "grad_norm": 0.5494747161865234, "learning_rate": 0.002828, "loss": 1.0548, "step": 587648 }, { "epoch": 44.10596622889306, "grad_norm": 0.6075941324234009, "learning_rate": 0.002828, "loss": 1.0527, "step": 587712 }, { "epoch": 44.11076923076923, "grad_norm": 0.5817174315452576, "learning_rate": 0.002828, "loss": 1.0448, "step": 587776 }, { "epoch": 44.115572232645405, "grad_norm": 0.593499481678009, "learning_rate": 0.002828, "loss": 1.0499, "step": 587840 }, { "epoch": 44.120375234521575, "grad_norm": 0.7568128108978271, "learning_rate": 0.002828, "loss": 1.0579, "step": 587904 }, { "epoch": 44.12517823639775, "grad_norm": 0.6690353751182556, "learning_rate": 0.002828, "loss": 1.0514, "step": 587968 }, { "epoch": 44.12998123827392, "grad_norm": 0.6533127427101135, "learning_rate": 0.002828, "loss": 1.0552, "step": 588032 }, { "epoch": 44.1347842401501, "grad_norm": 0.6924563646316528, "learning_rate": 0.002828, "loss": 1.0515, "step": 588096 }, { "epoch": 44.139587242026266, "grad_norm": 0.5739848613739014, "learning_rate": 0.002828, "loss": 1.0506, "step": 588160 }, { "epoch": 44.144390243902436, "grad_norm": 0.5882086157798767, "learning_rate": 0.002828, "loss": 1.0541, "step": 588224 }, { "epoch": 44.14919324577861, "grad_norm": 0.5814594626426697, "learning_rate": 0.002828, "loss": 1.0537, "step": 588288 }, { "epoch": 44.15399624765478, "grad_norm": 0.5676882266998291, "learning_rate": 0.002828, "loss": 1.0546, "step": 588352 }, { "epoch": 44.15879924953096, "grad_norm": 0.5608579516410828, "learning_rate": 0.002828, "loss": 1.0525, "step": 588416 }, { "epoch": 44.16360225140713, "grad_norm": 0.6067665219306946, "learning_rate": 0.002828, "loss": 1.0546, "step": 588480 }, { "epoch": 44.168405253283304, "grad_norm": 0.6528283953666687, "learning_rate": 0.002828, "loss": 1.0599, "step": 588544 }, { "epoch": 44.17320825515947, "grad_norm": 0.6116089820861816, "learning_rate": 0.002828, "loss": 1.0542, "step": 588608 }, { "epoch": 44.17801125703565, "grad_norm": 0.5617401599884033, "learning_rate": 0.002828, "loss": 1.0509, "step": 588672 }, { "epoch": 44.18281425891182, "grad_norm": 0.624115526676178, "learning_rate": 0.002828, "loss": 1.0551, "step": 588736 }, { "epoch": 44.187617260787995, "grad_norm": 0.6719458699226379, "learning_rate": 0.002828, "loss": 1.057, "step": 588800 }, { "epoch": 44.192420262664164, "grad_norm": 0.6795137524604797, "learning_rate": 0.002828, "loss": 1.0526, "step": 588864 }, { "epoch": 44.19722326454034, "grad_norm": 0.8369677066802979, "learning_rate": 0.002828, "loss": 1.058, "step": 588928 }, { "epoch": 44.20202626641651, "grad_norm": 0.6024190187454224, "learning_rate": 0.002828, "loss": 1.0584, "step": 588992 }, { "epoch": 44.20682926829268, "grad_norm": 0.6553812623023987, "learning_rate": 0.002828, "loss": 1.0631, "step": 589056 }, { "epoch": 44.211632270168856, "grad_norm": 0.7060786485671997, "learning_rate": 0.002828, "loss": 1.0599, "step": 589120 }, { "epoch": 44.216435272045025, "grad_norm": 0.7535731792449951, "learning_rate": 0.002828, "loss": 1.0558, "step": 589184 }, { "epoch": 44.2212382739212, "grad_norm": 0.6029979586601257, "learning_rate": 0.002828, "loss": 1.0588, "step": 589248 }, { "epoch": 44.22604127579737, "grad_norm": 0.6775466799736023, "learning_rate": 0.002828, "loss": 1.0511, "step": 589312 }, { "epoch": 44.23084427767355, "grad_norm": 0.5644216537475586, "learning_rate": 0.002828, "loss": 1.0551, "step": 589376 }, { "epoch": 44.23564727954972, "grad_norm": 0.55975741147995, "learning_rate": 0.002828, "loss": 1.0575, "step": 589440 }, { "epoch": 44.24045028142589, "grad_norm": 0.4965690076351166, "learning_rate": 0.002828, "loss": 1.0561, "step": 589504 }, { "epoch": 44.24525328330206, "grad_norm": 0.6054225564002991, "learning_rate": 0.002828, "loss": 1.0546, "step": 589568 }, { "epoch": 44.25005628517824, "grad_norm": 0.591658890247345, "learning_rate": 0.002828, "loss": 1.0581, "step": 589632 }, { "epoch": 44.25485928705441, "grad_norm": 0.7469866871833801, "learning_rate": 0.002828, "loss": 1.0621, "step": 589696 }, { "epoch": 44.259662288930585, "grad_norm": 0.5662860870361328, "learning_rate": 0.002828, "loss": 1.0586, "step": 589760 }, { "epoch": 44.264465290806754, "grad_norm": 0.6618179082870483, "learning_rate": 0.002828, "loss": 1.0568, "step": 589824 }, { "epoch": 44.26926829268292, "grad_norm": 0.6655414700508118, "learning_rate": 0.002828, "loss": 1.0598, "step": 589888 }, { "epoch": 44.2740712945591, "grad_norm": 0.5782995223999023, "learning_rate": 0.002828, "loss": 1.0534, "step": 589952 }, { "epoch": 44.27887429643527, "grad_norm": 0.5280641317367554, "learning_rate": 0.002828, "loss": 1.0596, "step": 590016 }, { "epoch": 44.283677298311446, "grad_norm": 0.7310107946395874, "learning_rate": 0.002828, "loss": 1.058, "step": 590080 }, { "epoch": 44.288480300187615, "grad_norm": 0.6584124565124512, "learning_rate": 0.002828, "loss": 1.0576, "step": 590144 }, { "epoch": 44.29328330206379, "grad_norm": 0.5985362529754639, "learning_rate": 0.002828, "loss": 1.0604, "step": 590208 }, { "epoch": 44.29808630393996, "grad_norm": 0.6973876357078552, "learning_rate": 0.002828, "loss": 1.0635, "step": 590272 }, { "epoch": 44.30288930581614, "grad_norm": 0.542786717414856, "learning_rate": 0.002828, "loss": 1.0587, "step": 590336 }, { "epoch": 44.30769230769231, "grad_norm": 0.5430316925048828, "learning_rate": 0.002828, "loss": 1.0616, "step": 590400 }, { "epoch": 44.31249530956848, "grad_norm": 0.6455052495002747, "learning_rate": 0.002828, "loss": 1.0537, "step": 590464 }, { "epoch": 44.31729831144465, "grad_norm": 0.6836392283439636, "learning_rate": 0.002828, "loss": 1.0507, "step": 590528 }, { "epoch": 44.32210131332083, "grad_norm": 0.6295143365859985, "learning_rate": 0.002828, "loss": 1.0554, "step": 590592 }, { "epoch": 44.326904315197, "grad_norm": 0.742729127407074, "learning_rate": 0.002828, "loss": 1.0575, "step": 590656 }, { "epoch": 44.33170731707317, "grad_norm": 0.6139900088310242, "learning_rate": 0.002828, "loss": 1.0603, "step": 590720 }, { "epoch": 44.336510318949344, "grad_norm": 0.5415821075439453, "learning_rate": 0.002828, "loss": 1.0553, "step": 590784 }, { "epoch": 44.34131332082551, "grad_norm": 0.5520257949829102, "learning_rate": 0.002828, "loss": 1.0552, "step": 590848 }, { "epoch": 44.34611632270169, "grad_norm": 0.5664125084877014, "learning_rate": 0.002828, "loss": 1.0595, "step": 590912 }, { "epoch": 44.35091932457786, "grad_norm": 0.5885429382324219, "learning_rate": 0.002828, "loss": 1.0568, "step": 590976 }, { "epoch": 44.355722326454035, "grad_norm": 0.7497702836990356, "learning_rate": 0.002828, "loss": 1.056, "step": 591040 }, { "epoch": 44.360525328330205, "grad_norm": 0.6161512732505798, "learning_rate": 0.002828, "loss": 1.0547, "step": 591104 }, { "epoch": 44.36532833020638, "grad_norm": 0.6655434370040894, "learning_rate": 0.002828, "loss": 1.0586, "step": 591168 }, { "epoch": 44.37013133208255, "grad_norm": 0.5859283208847046, "learning_rate": 0.002828, "loss": 1.06, "step": 591232 }, { "epoch": 44.37493433395873, "grad_norm": 0.7172648906707764, "learning_rate": 0.002828, "loss": 1.0526, "step": 591296 }, { "epoch": 44.379737335834896, "grad_norm": 0.6022630333900452, "learning_rate": 0.002828, "loss": 1.055, "step": 591360 }, { "epoch": 44.38454033771107, "grad_norm": 0.6173017024993896, "learning_rate": 0.002828, "loss": 1.0574, "step": 591424 }, { "epoch": 44.38934333958724, "grad_norm": 0.7868699431419373, "learning_rate": 0.002828, "loss": 1.0516, "step": 591488 }, { "epoch": 44.39414634146341, "grad_norm": 0.559699535369873, "learning_rate": 0.002828, "loss": 1.0522, "step": 591552 }, { "epoch": 44.39894934333959, "grad_norm": 0.7050228714942932, "learning_rate": 0.002828, "loss": 1.0564, "step": 591616 }, { "epoch": 44.40375234521576, "grad_norm": 0.6284798383712769, "learning_rate": 0.002828, "loss": 1.0459, "step": 591680 }, { "epoch": 44.408555347091934, "grad_norm": 0.5507165789604187, "learning_rate": 0.002828, "loss": 1.0461, "step": 591744 }, { "epoch": 44.4133583489681, "grad_norm": 0.6204633712768555, "learning_rate": 0.002828, "loss": 1.0416, "step": 591808 }, { "epoch": 44.41816135084428, "grad_norm": 0.5714849829673767, "learning_rate": 0.002828, "loss": 1.0496, "step": 591872 }, { "epoch": 44.42296435272045, "grad_norm": 0.6399840712547302, "learning_rate": 0.002828, "loss": 1.0458, "step": 591936 }, { "epoch": 44.427767354596625, "grad_norm": 0.5837627053260803, "learning_rate": 0.002828, "loss": 1.0512, "step": 592000 }, { "epoch": 44.432570356472795, "grad_norm": 0.6572296023368835, "learning_rate": 0.002828, "loss": 1.0509, "step": 592064 }, { "epoch": 44.43737335834897, "grad_norm": 0.6775990724563599, "learning_rate": 0.002828, "loss": 1.0559, "step": 592128 }, { "epoch": 44.44217636022514, "grad_norm": 0.6193767189979553, "learning_rate": 0.002828, "loss": 1.0516, "step": 592192 }, { "epoch": 44.44697936210132, "grad_norm": 0.6084091663360596, "learning_rate": 0.002828, "loss": 1.0495, "step": 592256 }, { "epoch": 44.451782363977486, "grad_norm": 0.6361521482467651, "learning_rate": 0.002828, "loss": 1.0467, "step": 592320 }, { "epoch": 44.456585365853655, "grad_norm": 0.6244654655456543, "learning_rate": 0.002828, "loss": 1.0485, "step": 592384 }, { "epoch": 44.46138836772983, "grad_norm": 0.6915056109428406, "learning_rate": 0.002828, "loss": 1.049, "step": 592448 }, { "epoch": 44.466191369606, "grad_norm": 0.5671323537826538, "learning_rate": 0.002828, "loss": 1.0476, "step": 592512 }, { "epoch": 44.47099437148218, "grad_norm": 0.6313081979751587, "learning_rate": 0.002828, "loss": 1.054, "step": 592576 }, { "epoch": 44.47579737335835, "grad_norm": 0.5822582840919495, "learning_rate": 0.002828, "loss": 1.0507, "step": 592640 }, { "epoch": 44.48060037523452, "grad_norm": 0.6180717945098877, "learning_rate": 0.002828, "loss": 1.0482, "step": 592704 }, { "epoch": 44.48540337711069, "grad_norm": 0.7753960490226746, "learning_rate": 0.002828, "loss": 1.0501, "step": 592768 }, { "epoch": 44.49020637898687, "grad_norm": 0.887162983417511, "learning_rate": 0.002828, "loss": 1.0539, "step": 592832 }, { "epoch": 44.49500938086304, "grad_norm": 0.6842669248580933, "learning_rate": 0.002828, "loss": 1.0504, "step": 592896 }, { "epoch": 44.499812382739215, "grad_norm": 0.5897248983383179, "learning_rate": 0.002828, "loss": 1.0542, "step": 592960 }, { "epoch": 44.504615384615384, "grad_norm": 0.6817046999931335, "learning_rate": 0.002828, "loss": 1.0452, "step": 593024 }, { "epoch": 44.50941838649156, "grad_norm": 0.6712552905082703, "learning_rate": 0.002828, "loss": 1.0497, "step": 593088 }, { "epoch": 44.51422138836773, "grad_norm": 0.7757641673088074, "learning_rate": 0.002828, "loss": 1.0498, "step": 593152 }, { "epoch": 44.5190243902439, "grad_norm": 0.6338831186294556, "learning_rate": 0.002828, "loss": 1.0529, "step": 593216 }, { "epoch": 44.523827392120076, "grad_norm": 0.5562050342559814, "learning_rate": 0.002828, "loss": 1.0446, "step": 593280 }, { "epoch": 44.528630393996245, "grad_norm": 0.6536921262741089, "learning_rate": 0.002828, "loss": 1.0569, "step": 593344 }, { "epoch": 44.53343339587242, "grad_norm": 0.5887206792831421, "learning_rate": 0.002828, "loss": 1.0521, "step": 593408 }, { "epoch": 44.53823639774859, "grad_norm": 0.7711880803108215, "learning_rate": 0.002828, "loss": 1.0535, "step": 593472 }, { "epoch": 44.54303939962477, "grad_norm": 0.604606032371521, "learning_rate": 0.002828, "loss": 1.0512, "step": 593536 }, { "epoch": 44.54784240150094, "grad_norm": 0.5851953029632568, "learning_rate": 0.002828, "loss": 1.049, "step": 593600 }, { "epoch": 44.55264540337711, "grad_norm": 0.8617910146713257, "learning_rate": 0.002828, "loss": 1.0514, "step": 593664 }, { "epoch": 44.55744840525328, "grad_norm": 0.6344445943832397, "learning_rate": 0.002828, "loss": 1.0521, "step": 593728 }, { "epoch": 44.56225140712946, "grad_norm": 0.6127328276634216, "learning_rate": 0.002828, "loss": 1.0516, "step": 593792 }, { "epoch": 44.56705440900563, "grad_norm": 0.6657578945159912, "learning_rate": 0.002828, "loss": 1.0553, "step": 593856 }, { "epoch": 44.571857410881805, "grad_norm": 0.7788225412368774, "learning_rate": 0.002828, "loss": 1.0466, "step": 593920 }, { "epoch": 44.576660412757974, "grad_norm": 0.5609893798828125, "learning_rate": 0.002828, "loss": 1.0561, "step": 593984 }, { "epoch": 44.58146341463414, "grad_norm": 0.6387379765510559, "learning_rate": 0.002828, "loss": 1.0482, "step": 594048 }, { "epoch": 44.58626641651032, "grad_norm": 0.6570412516593933, "learning_rate": 0.002828, "loss": 1.0516, "step": 594112 }, { "epoch": 44.59106941838649, "grad_norm": 0.494945228099823, "learning_rate": 0.002828, "loss": 1.0541, "step": 594176 }, { "epoch": 44.595872420262666, "grad_norm": 0.6139020323753357, "learning_rate": 0.002828, "loss": 1.0536, "step": 594240 }, { "epoch": 44.600675422138835, "grad_norm": 0.7307867407798767, "learning_rate": 0.002828, "loss": 1.0527, "step": 594304 }, { "epoch": 44.60547842401501, "grad_norm": 0.6578233242034912, "learning_rate": 0.002828, "loss": 1.0518, "step": 594368 }, { "epoch": 44.61028142589118, "grad_norm": 0.6268816590309143, "learning_rate": 0.002828, "loss": 1.0562, "step": 594432 }, { "epoch": 44.61508442776736, "grad_norm": 0.6375176310539246, "learning_rate": 0.002828, "loss": 1.0543, "step": 594496 }, { "epoch": 44.619887429643526, "grad_norm": 0.5896298885345459, "learning_rate": 0.002828, "loss": 1.0482, "step": 594560 }, { "epoch": 44.6246904315197, "grad_norm": 0.6761197447776794, "learning_rate": 0.002828, "loss": 1.0599, "step": 594624 }, { "epoch": 44.62949343339587, "grad_norm": 0.5964671969413757, "learning_rate": 0.002828, "loss": 1.0609, "step": 594688 }, { "epoch": 44.63429643527205, "grad_norm": 0.6479743123054504, "learning_rate": 0.002828, "loss": 1.0531, "step": 594752 }, { "epoch": 44.63909943714822, "grad_norm": 0.6290108561515808, "learning_rate": 0.002828, "loss": 1.0567, "step": 594816 }, { "epoch": 44.64390243902439, "grad_norm": 0.7283443808555603, "learning_rate": 0.002828, "loss": 1.0541, "step": 594880 }, { "epoch": 44.648705440900564, "grad_norm": 0.6581125855445862, "learning_rate": 0.002828, "loss": 1.0503, "step": 594944 }, { "epoch": 44.65350844277673, "grad_norm": 0.6930689811706543, "learning_rate": 0.002828, "loss": 1.0572, "step": 595008 }, { "epoch": 44.65831144465291, "grad_norm": 0.5703624486923218, "learning_rate": 0.002828, "loss": 1.0597, "step": 595072 }, { "epoch": 44.66311444652908, "grad_norm": 0.7384238243103027, "learning_rate": 0.002828, "loss": 1.0528, "step": 595136 }, { "epoch": 44.667917448405255, "grad_norm": 0.6679426431655884, "learning_rate": 0.002828, "loss": 1.0638, "step": 595200 }, { "epoch": 44.672720450281425, "grad_norm": 0.5475023984909058, "learning_rate": 0.002828, "loss": 1.0582, "step": 595264 }, { "epoch": 44.6775234521576, "grad_norm": 0.5508772730827332, "learning_rate": 0.002828, "loss": 1.0594, "step": 595328 }, { "epoch": 44.68232645403377, "grad_norm": 0.6432110071182251, "learning_rate": 0.002828, "loss": 1.0586, "step": 595392 }, { "epoch": 44.68712945590995, "grad_norm": 0.5725296139717102, "learning_rate": 0.002828, "loss": 1.0557, "step": 595456 }, { "epoch": 44.691932457786116, "grad_norm": 0.679014265537262, "learning_rate": 0.002828, "loss": 1.0536, "step": 595520 }, { "epoch": 44.696735459662285, "grad_norm": 0.7023448944091797, "learning_rate": 0.002828, "loss": 1.0614, "step": 595584 }, { "epoch": 44.70153846153846, "grad_norm": 0.563921332359314, "learning_rate": 0.002828, "loss": 1.0599, "step": 595648 }, { "epoch": 44.70634146341463, "grad_norm": 0.5842373967170715, "learning_rate": 0.002828, "loss": 1.0508, "step": 595712 }, { "epoch": 44.71114446529081, "grad_norm": 0.5589472651481628, "learning_rate": 0.002828, "loss": 1.0557, "step": 595776 }, { "epoch": 44.71594746716698, "grad_norm": 0.6475051045417786, "learning_rate": 0.002828, "loss": 1.0557, "step": 595840 }, { "epoch": 44.72075046904315, "grad_norm": 0.5432165861129761, "learning_rate": 0.002828, "loss": 1.057, "step": 595904 }, { "epoch": 44.72555347091932, "grad_norm": 0.5137358903884888, "learning_rate": 0.002828, "loss": 1.062, "step": 595968 }, { "epoch": 44.7303564727955, "grad_norm": 0.6988866925239563, "learning_rate": 0.002828, "loss": 1.0518, "step": 596032 }, { "epoch": 44.73515947467167, "grad_norm": 0.7478548884391785, "learning_rate": 0.002828, "loss": 1.0593, "step": 596096 }, { "epoch": 44.739962476547845, "grad_norm": 0.4760184586048126, "learning_rate": 0.002828, "loss": 1.0603, "step": 596160 }, { "epoch": 44.744765478424014, "grad_norm": 0.7488265633583069, "learning_rate": 0.002828, "loss": 1.0586, "step": 596224 }, { "epoch": 44.74956848030019, "grad_norm": 0.6295368671417236, "learning_rate": 0.002828, "loss": 1.0557, "step": 596288 }, { "epoch": 44.75437148217636, "grad_norm": 0.4992520809173584, "learning_rate": 0.002828, "loss": 1.0539, "step": 596352 }, { "epoch": 44.75917448405253, "grad_norm": 0.6316255927085876, "learning_rate": 0.002828, "loss": 1.0576, "step": 596416 }, { "epoch": 44.763977485928706, "grad_norm": 0.6836703419685364, "learning_rate": 0.002828, "loss": 1.0535, "step": 596480 }, { "epoch": 44.768780487804875, "grad_norm": 0.5981588959693909, "learning_rate": 0.002828, "loss": 1.0597, "step": 596544 }, { "epoch": 44.77358348968105, "grad_norm": 0.8249488472938538, "learning_rate": 0.002828, "loss": 1.0532, "step": 596608 }, { "epoch": 44.77838649155722, "grad_norm": 0.6853210926055908, "learning_rate": 0.002828, "loss": 1.0572, "step": 596672 }, { "epoch": 44.7831894934334, "grad_norm": 0.5512163043022156, "learning_rate": 0.002828, "loss": 1.0565, "step": 596736 }, { "epoch": 44.78799249530957, "grad_norm": 0.6343268156051636, "learning_rate": 0.002828, "loss": 1.0494, "step": 596800 }, { "epoch": 44.79279549718574, "grad_norm": 0.6317381262779236, "learning_rate": 0.002828, "loss": 1.0577, "step": 596864 }, { "epoch": 44.79759849906191, "grad_norm": 0.6074554324150085, "learning_rate": 0.002828, "loss": 1.0542, "step": 596928 }, { "epoch": 44.80240150093809, "grad_norm": 0.5829271078109741, "learning_rate": 0.002828, "loss": 1.0538, "step": 596992 }, { "epoch": 44.80720450281426, "grad_norm": 0.5427216291427612, "learning_rate": 0.002828, "loss": 1.0613, "step": 597056 }, { "epoch": 44.812007504690435, "grad_norm": 0.7172414660453796, "learning_rate": 0.002828, "loss": 1.0536, "step": 597120 }, { "epoch": 44.816810506566604, "grad_norm": 0.5971596837043762, "learning_rate": 0.002828, "loss": 1.0618, "step": 597184 }, { "epoch": 44.82161350844277, "grad_norm": 0.5736137628555298, "learning_rate": 0.002828, "loss": 1.0551, "step": 597248 }, { "epoch": 44.82641651031895, "grad_norm": 0.7229673266410828, "learning_rate": 0.002828, "loss": 1.0606, "step": 597312 }, { "epoch": 44.83121951219512, "grad_norm": 0.79429030418396, "learning_rate": 0.002828, "loss": 1.0533, "step": 597376 }, { "epoch": 44.836022514071296, "grad_norm": 0.5500268340110779, "learning_rate": 0.002828, "loss": 1.0581, "step": 597440 }, { "epoch": 44.840825515947465, "grad_norm": 0.6283418536186218, "learning_rate": 0.002828, "loss": 1.0618, "step": 597504 }, { "epoch": 44.84562851782364, "grad_norm": 0.5786862969398499, "learning_rate": 0.002828, "loss": 1.0523, "step": 597568 }, { "epoch": 44.85043151969981, "grad_norm": 0.6707375645637512, "learning_rate": 0.002828, "loss": 1.0596, "step": 597632 }, { "epoch": 44.85523452157599, "grad_norm": 0.6113284826278687, "learning_rate": 0.002828, "loss": 1.0573, "step": 597696 }, { "epoch": 44.86003752345216, "grad_norm": 0.7945263385772705, "learning_rate": 0.002828, "loss": 1.0632, "step": 597760 }, { "epoch": 44.86484052532833, "grad_norm": 0.8358811736106873, "learning_rate": 0.002828, "loss": 1.0602, "step": 597824 }, { "epoch": 44.8696435272045, "grad_norm": 0.5964508652687073, "learning_rate": 0.002828, "loss": 1.057, "step": 597888 }, { "epoch": 44.87444652908068, "grad_norm": 0.6710835099220276, "learning_rate": 0.002828, "loss": 1.0595, "step": 597952 }, { "epoch": 44.87924953095685, "grad_norm": 0.6213023066520691, "learning_rate": 0.002828, "loss": 1.057, "step": 598016 }, { "epoch": 44.88405253283302, "grad_norm": 0.6238622665405273, "learning_rate": 0.002828, "loss": 1.0514, "step": 598080 }, { "epoch": 44.888855534709194, "grad_norm": 0.7188548445701599, "learning_rate": 0.002828, "loss": 1.0622, "step": 598144 }, { "epoch": 44.89365853658536, "grad_norm": 0.5461704730987549, "learning_rate": 0.002828, "loss": 1.0625, "step": 598208 }, { "epoch": 44.89846153846154, "grad_norm": 0.5917800068855286, "learning_rate": 0.002828, "loss": 1.0594, "step": 598272 }, { "epoch": 44.90326454033771, "grad_norm": 0.5917359590530396, "learning_rate": 0.002828, "loss": 1.0653, "step": 598336 }, { "epoch": 44.908067542213885, "grad_norm": 0.7558372020721436, "learning_rate": 0.002828, "loss": 1.0642, "step": 598400 }, { "epoch": 44.912870544090055, "grad_norm": 0.5604377388954163, "learning_rate": 0.002828, "loss": 1.0558, "step": 598464 }, { "epoch": 44.91767354596623, "grad_norm": 0.6405630111694336, "learning_rate": 0.002828, "loss": 1.0626, "step": 598528 }, { "epoch": 44.9224765478424, "grad_norm": 0.6783610582351685, "learning_rate": 0.002828, "loss": 1.0597, "step": 598592 }, { "epoch": 44.92727954971858, "grad_norm": 0.6869999766349792, "learning_rate": 0.002828, "loss": 1.0595, "step": 598656 }, { "epoch": 44.932082551594746, "grad_norm": 0.5955403447151184, "learning_rate": 0.002828, "loss": 1.058, "step": 598720 }, { "epoch": 44.93688555347092, "grad_norm": 0.5020884275436401, "learning_rate": 0.002828, "loss": 1.0592, "step": 598784 }, { "epoch": 44.94168855534709, "grad_norm": 0.6526339054107666, "learning_rate": 0.002828, "loss": 1.0546, "step": 598848 }, { "epoch": 44.94649155722326, "grad_norm": 0.7076884508132935, "learning_rate": 0.002828, "loss": 1.0607, "step": 598912 }, { "epoch": 44.95129455909944, "grad_norm": 0.7215730547904968, "learning_rate": 0.002828, "loss": 1.0632, "step": 598976 }, { "epoch": 44.95609756097561, "grad_norm": 0.6171411871910095, "learning_rate": 0.002828, "loss": 1.0619, "step": 599040 }, { "epoch": 44.96090056285178, "grad_norm": 0.5962241291999817, "learning_rate": 0.002828, "loss": 1.0596, "step": 599104 }, { "epoch": 44.96570356472795, "grad_norm": 0.5820527672767639, "learning_rate": 0.002828, "loss": 1.0633, "step": 599168 }, { "epoch": 44.97050656660413, "grad_norm": 0.5785463452339172, "learning_rate": 0.002828, "loss": 1.0584, "step": 599232 }, { "epoch": 44.9753095684803, "grad_norm": 0.6721450090408325, "learning_rate": 0.002828, "loss": 1.0603, "step": 599296 }, { "epoch": 44.980112570356475, "grad_norm": 0.5723146200180054, "learning_rate": 0.002828, "loss": 1.0643, "step": 599360 }, { "epoch": 44.984915572232644, "grad_norm": 0.5571084022521973, "learning_rate": 0.002828, "loss": 1.0574, "step": 599424 }, { "epoch": 44.98971857410882, "grad_norm": 0.6663368344306946, "learning_rate": 0.002828, "loss": 1.0591, "step": 599488 }, { "epoch": 44.99452157598499, "grad_norm": 0.6615773439407349, "learning_rate": 0.002828, "loss": 1.0612, "step": 599552 }, { "epoch": 44.99932457786117, "grad_norm": 0.6352889537811279, "learning_rate": 0.002828, "loss": 1.0617, "step": 599616 }, { "epoch": 45.004127579737336, "grad_norm": 0.6930376291275024, "learning_rate": 0.002828, "loss": 1.0447, "step": 599680 }, { "epoch": 45.008930581613505, "grad_norm": 0.6235330104827881, "learning_rate": 0.002828, "loss": 1.0442, "step": 599744 }, { "epoch": 45.01373358348968, "grad_norm": 0.6568100452423096, "learning_rate": 0.002828, "loss": 1.0478, "step": 599808 }, { "epoch": 45.01853658536585, "grad_norm": 0.6526299118995667, "learning_rate": 0.002828, "loss": 1.0404, "step": 599872 }, { "epoch": 45.02333958724203, "grad_norm": 0.7410742044448853, "learning_rate": 0.002828, "loss": 1.0454, "step": 599936 }, { "epoch": 45.0281425891182, "grad_norm": 0.6709450483322144, "learning_rate": 0.002828, "loss": 1.0456, "step": 600000 }, { "epoch": 45.03294559099437, "grad_norm": 0.6473307013511658, "learning_rate": 0.002828, "loss": 1.05, "step": 600064 }, { "epoch": 45.03774859287054, "grad_norm": 0.6768342852592468, "learning_rate": 0.002828, "loss": 1.0417, "step": 600128 }, { "epoch": 45.04255159474672, "grad_norm": 0.5911619067192078, "learning_rate": 0.002828, "loss": 1.0458, "step": 600192 }, { "epoch": 45.04735459662289, "grad_norm": 0.5454323291778564, "learning_rate": 0.002828, "loss": 1.0384, "step": 600256 }, { "epoch": 45.052157598499065, "grad_norm": 0.7044894099235535, "learning_rate": 0.002828, "loss": 1.0402, "step": 600320 }, { "epoch": 45.056960600375234, "grad_norm": 0.6176587343215942, "learning_rate": 0.002828, "loss": 1.0492, "step": 600384 }, { "epoch": 45.06176360225141, "grad_norm": 0.6300610303878784, "learning_rate": 0.002828, "loss": 1.044, "step": 600448 }, { "epoch": 45.06656660412758, "grad_norm": 0.7107115387916565, "learning_rate": 0.002828, "loss": 1.0383, "step": 600512 }, { "epoch": 45.07136960600375, "grad_norm": 0.6533573865890503, "learning_rate": 0.002828, "loss": 1.0458, "step": 600576 }, { "epoch": 45.076172607879926, "grad_norm": 0.6106192469596863, "learning_rate": 0.002828, "loss": 1.0454, "step": 600640 }, { "epoch": 45.080975609756095, "grad_norm": 0.7222683429718018, "learning_rate": 0.002828, "loss": 1.0459, "step": 600704 }, { "epoch": 45.08577861163227, "grad_norm": 0.6331077814102173, "learning_rate": 0.002828, "loss": 1.0471, "step": 600768 }, { "epoch": 45.09058161350844, "grad_norm": 0.5737351179122925, "learning_rate": 0.002828, "loss": 1.0424, "step": 600832 }, { "epoch": 45.09538461538462, "grad_norm": 0.6387905478477478, "learning_rate": 0.002828, "loss": 1.0538, "step": 600896 }, { "epoch": 45.10018761726079, "grad_norm": 0.558758556842804, "learning_rate": 0.002828, "loss": 1.05, "step": 600960 }, { "epoch": 45.10499061913696, "grad_norm": 0.6962189078330994, "learning_rate": 0.002828, "loss": 1.0503, "step": 601024 }, { "epoch": 45.10979362101313, "grad_norm": 0.6257447600364685, "learning_rate": 0.002828, "loss": 1.0464, "step": 601088 }, { "epoch": 45.11459662288931, "grad_norm": 0.6099736094474792, "learning_rate": 0.002828, "loss": 1.0466, "step": 601152 }, { "epoch": 45.11939962476548, "grad_norm": 0.6403970718383789, "learning_rate": 0.002828, "loss": 1.0487, "step": 601216 }, { "epoch": 45.124202626641654, "grad_norm": 0.8435704708099365, "learning_rate": 0.002828, "loss": 1.0434, "step": 601280 }, { "epoch": 45.129005628517824, "grad_norm": 0.6793568134307861, "learning_rate": 0.002828, "loss": 1.0476, "step": 601344 }, { "epoch": 45.13380863039399, "grad_norm": 0.5509681701660156, "learning_rate": 0.002828, "loss": 1.0538, "step": 601408 }, { "epoch": 45.13861163227017, "grad_norm": 0.5556575655937195, "learning_rate": 0.002828, "loss": 1.0492, "step": 601472 }, { "epoch": 45.14341463414634, "grad_norm": 0.6454841494560242, "learning_rate": 0.002828, "loss": 1.0442, "step": 601536 }, { "epoch": 45.148217636022515, "grad_norm": 0.5697574019432068, "learning_rate": 0.002828, "loss": 1.0418, "step": 601600 }, { "epoch": 45.153020637898685, "grad_norm": 0.8331282734870911, "learning_rate": 0.002828, "loss": 1.0482, "step": 601664 }, { "epoch": 45.15782363977486, "grad_norm": 0.6625939011573792, "learning_rate": 0.002828, "loss": 1.0433, "step": 601728 }, { "epoch": 45.16262664165103, "grad_norm": 0.7174499034881592, "learning_rate": 0.002828, "loss": 1.05, "step": 601792 }, { "epoch": 45.16742964352721, "grad_norm": 0.7669702768325806, "learning_rate": 0.002828, "loss": 1.0489, "step": 601856 }, { "epoch": 45.172232645403376, "grad_norm": 0.6364301443099976, "learning_rate": 0.002828, "loss": 1.0477, "step": 601920 }, { "epoch": 45.17703564727955, "grad_norm": 0.5847406387329102, "learning_rate": 0.002828, "loss": 1.057, "step": 601984 }, { "epoch": 45.18183864915572, "grad_norm": 0.6891112923622131, "learning_rate": 0.002828, "loss": 1.0542, "step": 602048 }, { "epoch": 45.1866416510319, "grad_norm": 0.559404194355011, "learning_rate": 0.002828, "loss": 1.051, "step": 602112 }, { "epoch": 45.19144465290807, "grad_norm": 0.6273938417434692, "learning_rate": 0.002828, "loss": 1.0541, "step": 602176 }, { "epoch": 45.19624765478424, "grad_norm": 0.6550655364990234, "learning_rate": 0.002828, "loss": 1.0464, "step": 602240 }, { "epoch": 45.20105065666041, "grad_norm": 0.6907812356948853, "learning_rate": 0.002828, "loss": 1.0524, "step": 602304 }, { "epoch": 45.20585365853658, "grad_norm": 0.5884630084037781, "learning_rate": 0.002828, "loss": 1.0524, "step": 602368 }, { "epoch": 45.21065666041276, "grad_norm": 0.5913063287734985, "learning_rate": 0.002828, "loss": 1.053, "step": 602432 }, { "epoch": 45.21545966228893, "grad_norm": 0.6427793502807617, "learning_rate": 0.002828, "loss": 1.0524, "step": 602496 }, { "epoch": 45.220262664165105, "grad_norm": 0.5619731545448303, "learning_rate": 0.002828, "loss": 1.0514, "step": 602560 }, { "epoch": 45.225065666041274, "grad_norm": 0.5781331062316895, "learning_rate": 0.002828, "loss": 1.0546, "step": 602624 }, { "epoch": 45.22986866791745, "grad_norm": 0.5728752613067627, "learning_rate": 0.002828, "loss": 1.0498, "step": 602688 }, { "epoch": 45.23467166979362, "grad_norm": 0.5659903883934021, "learning_rate": 0.002828, "loss": 1.0552, "step": 602752 }, { "epoch": 45.2394746716698, "grad_norm": 0.6563093066215515, "learning_rate": 0.002828, "loss": 1.0511, "step": 602816 }, { "epoch": 45.244277673545966, "grad_norm": 0.5482872724533081, "learning_rate": 0.002828, "loss": 1.0545, "step": 602880 }, { "epoch": 45.249080675422135, "grad_norm": 0.5136545300483704, "learning_rate": 0.002828, "loss": 1.0573, "step": 602944 }, { "epoch": 45.25388367729831, "grad_norm": 0.5944323539733887, "learning_rate": 0.002828, "loss": 1.0615, "step": 603008 }, { "epoch": 45.25868667917448, "grad_norm": 0.8258227109909058, "learning_rate": 0.002828, "loss": 1.0524, "step": 603072 }, { "epoch": 45.26348968105066, "grad_norm": 0.68766850233078, "learning_rate": 0.002828, "loss": 1.0561, "step": 603136 }, { "epoch": 45.26829268292683, "grad_norm": 0.5555733442306519, "learning_rate": 0.002828, "loss": 1.0525, "step": 603200 }, { "epoch": 45.273095684803, "grad_norm": 0.7993525266647339, "learning_rate": 0.002828, "loss": 1.0562, "step": 603264 }, { "epoch": 45.27789868667917, "grad_norm": 0.6797704696655273, "learning_rate": 0.002828, "loss": 1.053, "step": 603328 }, { "epoch": 45.28270168855535, "grad_norm": 0.8095536231994629, "learning_rate": 0.002828, "loss": 1.0536, "step": 603392 }, { "epoch": 45.28750469043152, "grad_norm": 0.6387770175933838, "learning_rate": 0.002828, "loss": 1.0532, "step": 603456 }, { "epoch": 45.292307692307695, "grad_norm": 0.7661316990852356, "learning_rate": 0.002828, "loss": 1.0567, "step": 603520 }, { "epoch": 45.297110694183864, "grad_norm": 0.6616008877754211, "learning_rate": 0.002828, "loss": 1.0502, "step": 603584 }, { "epoch": 45.30191369606004, "grad_norm": 0.6984626054763794, "learning_rate": 0.002828, "loss": 1.049, "step": 603648 }, { "epoch": 45.30671669793621, "grad_norm": 0.6384255886077881, "learning_rate": 0.002828, "loss": 1.058, "step": 603712 }, { "epoch": 45.31151969981238, "grad_norm": 0.6473172307014465, "learning_rate": 0.002828, "loss": 1.0551, "step": 603776 }, { "epoch": 45.316322701688556, "grad_norm": 0.7297527194023132, "learning_rate": 0.002828, "loss": 1.0615, "step": 603840 }, { "epoch": 45.321125703564725, "grad_norm": 0.6076101064682007, "learning_rate": 0.002828, "loss": 1.0537, "step": 603904 }, { "epoch": 45.3259287054409, "grad_norm": 0.6504645347595215, "learning_rate": 0.002828, "loss": 1.054, "step": 603968 }, { "epoch": 45.33073170731707, "grad_norm": 0.7188733220100403, "learning_rate": 0.002828, "loss": 1.0501, "step": 604032 }, { "epoch": 45.33553470919325, "grad_norm": 0.6785242557525635, "learning_rate": 0.002828, "loss": 1.0502, "step": 604096 }, { "epoch": 45.34033771106942, "grad_norm": 0.7085686326026917, "learning_rate": 0.002828, "loss": 1.0577, "step": 604160 }, { "epoch": 45.34514071294559, "grad_norm": 0.5494343638420105, "learning_rate": 0.002828, "loss": 1.0569, "step": 604224 }, { "epoch": 45.34994371482176, "grad_norm": 0.6185113787651062, "learning_rate": 0.002828, "loss": 1.0554, "step": 604288 }, { "epoch": 45.35474671669794, "grad_norm": 0.6287076473236084, "learning_rate": 0.002828, "loss": 1.059, "step": 604352 }, { "epoch": 45.35954971857411, "grad_norm": 0.6744497418403625, "learning_rate": 0.002828, "loss": 1.0573, "step": 604416 }, { "epoch": 45.364352720450285, "grad_norm": 0.6540379524230957, "learning_rate": 0.002828, "loss": 1.0519, "step": 604480 }, { "epoch": 45.369155722326454, "grad_norm": 0.648766040802002, "learning_rate": 0.002828, "loss": 1.0582, "step": 604544 }, { "epoch": 45.37395872420262, "grad_norm": 0.7529312968254089, "learning_rate": 0.002828, "loss": 1.0562, "step": 604608 }, { "epoch": 45.3787617260788, "grad_norm": 0.6116892695426941, "learning_rate": 0.002828, "loss": 1.0572, "step": 604672 }, { "epoch": 45.38356472795497, "grad_norm": 0.6726174354553223, "learning_rate": 0.002828, "loss": 1.0555, "step": 604736 }, { "epoch": 45.388367729831145, "grad_norm": 0.5719484090805054, "learning_rate": 0.002828, "loss": 1.0567, "step": 604800 }, { "epoch": 45.393170731707315, "grad_norm": 0.7066259980201721, "learning_rate": 0.002828, "loss": 1.0599, "step": 604864 }, { "epoch": 45.39797373358349, "grad_norm": 0.6202705502510071, "learning_rate": 0.002828, "loss": 1.0587, "step": 604928 }, { "epoch": 45.40277673545966, "grad_norm": 0.8698914051055908, "learning_rate": 0.002828, "loss": 1.0569, "step": 604992 }, { "epoch": 45.40757973733584, "grad_norm": 0.6540534496307373, "learning_rate": 0.002828, "loss": 1.0578, "step": 605056 }, { "epoch": 45.412382739212006, "grad_norm": 0.5570470094680786, "learning_rate": 0.002828, "loss": 1.0609, "step": 605120 }, { "epoch": 45.41718574108818, "grad_norm": 0.7164893746376038, "learning_rate": 0.002828, "loss": 1.0548, "step": 605184 }, { "epoch": 45.42198874296435, "grad_norm": 0.5541973114013672, "learning_rate": 0.002828, "loss": 1.0545, "step": 605248 }, { "epoch": 45.42679174484053, "grad_norm": 0.7188869118690491, "learning_rate": 0.002828, "loss": 1.0525, "step": 605312 }, { "epoch": 45.4315947467167, "grad_norm": 0.6505350470542908, "learning_rate": 0.002828, "loss": 1.0601, "step": 605376 }, { "epoch": 45.43639774859287, "grad_norm": 0.5822009444236755, "learning_rate": 0.002828, "loss": 1.0615, "step": 605440 }, { "epoch": 45.441200750469044, "grad_norm": 0.7058424353599548, "learning_rate": 0.002828, "loss": 1.0595, "step": 605504 }, { "epoch": 45.44600375234521, "grad_norm": 0.5275965929031372, "learning_rate": 0.002828, "loss": 1.0624, "step": 605568 }, { "epoch": 45.45080675422139, "grad_norm": 0.5741568207740784, "learning_rate": 0.002828, "loss": 1.0561, "step": 605632 }, { "epoch": 45.45560975609756, "grad_norm": 0.6492908596992493, "learning_rate": 0.002828, "loss": 1.063, "step": 605696 }, { "epoch": 45.460412757973735, "grad_norm": 0.5441504716873169, "learning_rate": 0.002828, "loss": 1.0581, "step": 605760 }, { "epoch": 45.465215759849904, "grad_norm": 0.6401174664497375, "learning_rate": 0.002828, "loss": 1.0585, "step": 605824 }, { "epoch": 45.47001876172608, "grad_norm": 0.6390235424041748, "learning_rate": 0.002828, "loss": 1.0655, "step": 605888 }, { "epoch": 45.47482176360225, "grad_norm": 0.6424334049224854, "learning_rate": 0.002828, "loss": 1.0606, "step": 605952 }, { "epoch": 45.47962476547843, "grad_norm": 0.5485963821411133, "learning_rate": 0.002828, "loss": 1.0646, "step": 606016 }, { "epoch": 45.484427767354596, "grad_norm": 0.8131200075149536, "learning_rate": 0.002828, "loss": 1.058, "step": 606080 }, { "epoch": 45.48923076923077, "grad_norm": 0.7325833439826965, "learning_rate": 0.002828, "loss": 1.0633, "step": 606144 }, { "epoch": 45.49403377110694, "grad_norm": 0.6808172464370728, "learning_rate": 0.002828, "loss": 1.0572, "step": 606208 }, { "epoch": 45.49883677298311, "grad_norm": 0.7559350728988647, "learning_rate": 0.002828, "loss": 1.0633, "step": 606272 }, { "epoch": 45.50363977485929, "grad_norm": 0.723712146282196, "learning_rate": 0.002828, "loss": 1.0573, "step": 606336 }, { "epoch": 45.50844277673546, "grad_norm": 0.7043648958206177, "learning_rate": 0.002828, "loss": 1.059, "step": 606400 }, { "epoch": 45.51324577861163, "grad_norm": 0.630072832107544, "learning_rate": 0.002828, "loss": 1.0598, "step": 606464 }, { "epoch": 45.5180487804878, "grad_norm": 0.6712070107460022, "learning_rate": 0.002828, "loss": 1.0591, "step": 606528 }, { "epoch": 45.52285178236398, "grad_norm": 0.6040600538253784, "learning_rate": 0.002828, "loss": 1.0606, "step": 606592 }, { "epoch": 45.52765478424015, "grad_norm": 0.6480143666267395, "learning_rate": 0.002828, "loss": 1.0629, "step": 606656 }, { "epoch": 45.532457786116325, "grad_norm": 0.5866736769676208, "learning_rate": 0.002828, "loss": 1.0608, "step": 606720 }, { "epoch": 45.537260787992494, "grad_norm": 0.5952351093292236, "learning_rate": 0.002828, "loss": 1.0573, "step": 606784 }, { "epoch": 45.54206378986867, "grad_norm": 0.6766398549079895, "learning_rate": 0.002828, "loss": 1.0595, "step": 606848 }, { "epoch": 45.54686679174484, "grad_norm": 0.71608966588974, "learning_rate": 0.002828, "loss": 1.0592, "step": 606912 }, { "epoch": 45.551669793621016, "grad_norm": 0.7299851179122925, "learning_rate": 0.002828, "loss": 1.0594, "step": 606976 }, { "epoch": 45.556472795497186, "grad_norm": 0.6860955953598022, "learning_rate": 0.002828, "loss": 1.0664, "step": 607040 }, { "epoch": 45.561275797373355, "grad_norm": 0.5690156817436218, "learning_rate": 0.002828, "loss": 1.0674, "step": 607104 }, { "epoch": 45.56607879924953, "grad_norm": 0.7102258801460266, "learning_rate": 0.002828, "loss": 1.0603, "step": 607168 }, { "epoch": 45.5708818011257, "grad_norm": 0.5700670480728149, "learning_rate": 0.002828, "loss": 1.0582, "step": 607232 }, { "epoch": 45.57568480300188, "grad_norm": 0.628460168838501, "learning_rate": 0.002828, "loss": 1.0626, "step": 607296 }, { "epoch": 45.58048780487805, "grad_norm": 0.5459248423576355, "learning_rate": 0.002828, "loss": 1.0644, "step": 607360 }, { "epoch": 45.58529080675422, "grad_norm": 0.7937043905258179, "learning_rate": 0.002828, "loss": 1.0617, "step": 607424 }, { "epoch": 45.59009380863039, "grad_norm": 0.8268114924430847, "learning_rate": 0.002828, "loss": 1.0699, "step": 607488 }, { "epoch": 45.59489681050657, "grad_norm": 0.5842559337615967, "learning_rate": 0.002828, "loss": 1.0617, "step": 607552 }, { "epoch": 45.59969981238274, "grad_norm": 0.5671669840812683, "learning_rate": 0.002828, "loss": 1.0591, "step": 607616 }, { "epoch": 45.604502814258915, "grad_norm": 0.7660974264144897, "learning_rate": 0.002828, "loss": 1.0595, "step": 607680 }, { "epoch": 45.609305816135084, "grad_norm": 0.6504864692687988, "learning_rate": 0.002828, "loss": 1.0621, "step": 607744 }, { "epoch": 45.61410881801126, "grad_norm": 0.590907633304596, "learning_rate": 0.002828, "loss": 1.0661, "step": 607808 }, { "epoch": 45.61891181988743, "grad_norm": 0.7128926515579224, "learning_rate": 0.002828, "loss": 1.0601, "step": 607872 }, { "epoch": 45.6237148217636, "grad_norm": 0.643826961517334, "learning_rate": 0.002828, "loss": 1.0661, "step": 607936 }, { "epoch": 45.628517823639775, "grad_norm": 0.6559561491012573, "learning_rate": 0.002828, "loss": 1.0649, "step": 608000 }, { "epoch": 45.633320825515945, "grad_norm": 0.6678355932235718, "learning_rate": 0.002828, "loss": 1.0637, "step": 608064 }, { "epoch": 45.63812382739212, "grad_norm": 0.5866308808326721, "learning_rate": 0.002828, "loss": 1.0636, "step": 608128 }, { "epoch": 45.64292682926829, "grad_norm": 0.4977904260158539, "learning_rate": 0.002828, "loss": 1.064, "step": 608192 }, { "epoch": 45.64772983114447, "grad_norm": 0.6667592525482178, "learning_rate": 0.002828, "loss": 1.0693, "step": 608256 }, { "epoch": 45.652532833020636, "grad_norm": 0.571887195110321, "learning_rate": 0.002828, "loss": 1.0597, "step": 608320 }, { "epoch": 45.65733583489681, "grad_norm": 0.7032132744789124, "learning_rate": 0.002828, "loss": 1.0719, "step": 608384 }, { "epoch": 45.66213883677298, "grad_norm": 0.5857048630714417, "learning_rate": 0.002828, "loss": 1.0604, "step": 608448 }, { "epoch": 45.66694183864916, "grad_norm": 0.5976815223693848, "learning_rate": 0.002828, "loss": 1.0618, "step": 608512 }, { "epoch": 45.67174484052533, "grad_norm": 0.59645015001297, "learning_rate": 0.002828, "loss": 1.0643, "step": 608576 }, { "epoch": 45.676547842401504, "grad_norm": 0.6468033790588379, "learning_rate": 0.002828, "loss": 1.0637, "step": 608640 }, { "epoch": 45.681350844277674, "grad_norm": 0.5100387334823608, "learning_rate": 0.002828, "loss": 1.0612, "step": 608704 }, { "epoch": 45.68615384615384, "grad_norm": 0.6013880968093872, "learning_rate": 0.002828, "loss": 1.0661, "step": 608768 }, { "epoch": 45.69095684803002, "grad_norm": 0.7553115487098694, "learning_rate": 0.002828, "loss": 1.0637, "step": 608832 }, { "epoch": 45.69575984990619, "grad_norm": 0.7213432788848877, "learning_rate": 0.002828, "loss": 1.0638, "step": 608896 }, { "epoch": 45.700562851782365, "grad_norm": 0.7093005776405334, "learning_rate": 0.002828, "loss": 1.0617, "step": 608960 }, { "epoch": 45.705365853658535, "grad_norm": 0.5940427780151367, "learning_rate": 0.002828, "loss": 1.0667, "step": 609024 }, { "epoch": 45.71016885553471, "grad_norm": 0.5454390048980713, "learning_rate": 0.002828, "loss": 1.0621, "step": 609088 }, { "epoch": 45.71497185741088, "grad_norm": 0.6252509951591492, "learning_rate": 0.002828, "loss": 1.0669, "step": 609152 }, { "epoch": 45.71977485928706, "grad_norm": 0.7278851270675659, "learning_rate": 0.002828, "loss": 1.0644, "step": 609216 }, { "epoch": 45.724577861163226, "grad_norm": 0.6815802454948425, "learning_rate": 0.002828, "loss": 1.0657, "step": 609280 }, { "epoch": 45.7293808630394, "grad_norm": 0.6935381889343262, "learning_rate": 0.002828, "loss": 1.0701, "step": 609344 }, { "epoch": 45.73418386491557, "grad_norm": 0.5729061365127563, "learning_rate": 0.002828, "loss": 1.0662, "step": 609408 }, { "epoch": 45.73898686679175, "grad_norm": 0.6517598628997803, "learning_rate": 0.002828, "loss": 1.0643, "step": 609472 }, { "epoch": 45.74378986866792, "grad_norm": 0.6475855112075806, "learning_rate": 0.002828, "loss": 1.0675, "step": 609536 }, { "epoch": 45.74859287054409, "grad_norm": 0.7667732834815979, "learning_rate": 0.002828, "loss": 1.0653, "step": 609600 }, { "epoch": 45.75339587242026, "grad_norm": 0.782156229019165, "learning_rate": 0.002828, "loss": 1.062, "step": 609664 }, { "epoch": 45.75819887429643, "grad_norm": 0.6261060237884521, "learning_rate": 0.002828, "loss": 1.0675, "step": 609728 }, { "epoch": 45.76300187617261, "grad_norm": 0.5714720487594604, "learning_rate": 0.002828, "loss": 1.0648, "step": 609792 }, { "epoch": 45.76780487804878, "grad_norm": 0.7018872499465942, "learning_rate": 0.002828, "loss": 1.0621, "step": 609856 }, { "epoch": 45.772607879924955, "grad_norm": 0.5345858931541443, "learning_rate": 0.002828, "loss": 1.0628, "step": 609920 }, { "epoch": 45.777410881801124, "grad_norm": 0.728009819984436, "learning_rate": 0.002828, "loss": 1.0657, "step": 609984 }, { "epoch": 45.7822138836773, "grad_norm": 0.5718938112258911, "learning_rate": 0.002828, "loss": 1.0606, "step": 610048 }, { "epoch": 45.78701688555347, "grad_norm": 0.533854603767395, "learning_rate": 0.002828, "loss": 1.0623, "step": 610112 }, { "epoch": 45.79181988742965, "grad_norm": 0.5917041301727295, "learning_rate": 0.002828, "loss": 1.0697, "step": 610176 }, { "epoch": 45.796622889305816, "grad_norm": 0.7023472785949707, "learning_rate": 0.002828, "loss": 1.0664, "step": 610240 }, { "epoch": 45.80142589118199, "grad_norm": 0.5526425838470459, "learning_rate": 0.002828, "loss": 1.0667, "step": 610304 }, { "epoch": 45.80622889305816, "grad_norm": 0.7164841294288635, "learning_rate": 0.002828, "loss": 1.0693, "step": 610368 }, { "epoch": 45.81103189493433, "grad_norm": 0.6742995381355286, "learning_rate": 0.002828, "loss": 1.0676, "step": 610432 }, { "epoch": 45.81583489681051, "grad_norm": 0.5527735948562622, "learning_rate": 0.002828, "loss": 1.0698, "step": 610496 }, { "epoch": 45.82063789868668, "grad_norm": 0.6233502626419067, "learning_rate": 0.002828, "loss": 1.065, "step": 610560 }, { "epoch": 45.82544090056285, "grad_norm": 0.5897585153579712, "learning_rate": 0.002828, "loss": 1.0648, "step": 610624 }, { "epoch": 45.83024390243902, "grad_norm": 0.6187131404876709, "learning_rate": 0.002828, "loss": 1.0725, "step": 610688 }, { "epoch": 45.8350469043152, "grad_norm": 0.7255659699440002, "learning_rate": 0.002828, "loss": 1.0625, "step": 610752 }, { "epoch": 45.83984990619137, "grad_norm": 0.9766494631767273, "learning_rate": 0.002828, "loss": 1.0674, "step": 610816 }, { "epoch": 45.844652908067545, "grad_norm": 0.6275309324264526, "learning_rate": 0.002828, "loss": 1.0621, "step": 610880 }, { "epoch": 45.849455909943714, "grad_norm": 0.6769102811813354, "learning_rate": 0.002828, "loss": 1.0727, "step": 610944 }, { "epoch": 45.85425891181989, "grad_norm": 0.5380213260650635, "learning_rate": 0.002828, "loss": 1.0707, "step": 611008 }, { "epoch": 45.85906191369606, "grad_norm": 0.7997820377349854, "learning_rate": 0.002828, "loss": 1.0693, "step": 611072 }, { "epoch": 45.863864915572236, "grad_norm": 0.5203222632408142, "learning_rate": 0.002828, "loss": 1.0704, "step": 611136 }, { "epoch": 45.868667917448406, "grad_norm": 0.6101234555244446, "learning_rate": 0.002828, "loss": 1.0677, "step": 611200 }, { "epoch": 45.873470919324575, "grad_norm": 0.5174065232276917, "learning_rate": 0.002828, "loss": 1.0717, "step": 611264 }, { "epoch": 45.87827392120075, "grad_norm": 0.6431335210800171, "learning_rate": 0.002828, "loss": 1.069, "step": 611328 }, { "epoch": 45.88307692307692, "grad_norm": 0.5777249932289124, "learning_rate": 0.002828, "loss": 1.0649, "step": 611392 }, { "epoch": 45.8878799249531, "grad_norm": 0.5198315978050232, "learning_rate": 0.002828, "loss": 1.0688, "step": 611456 }, { "epoch": 45.892682926829266, "grad_norm": 0.7460737824440002, "learning_rate": 0.002828, "loss": 1.0706, "step": 611520 }, { "epoch": 45.89748592870544, "grad_norm": 0.6131591796875, "learning_rate": 0.002828, "loss": 1.0666, "step": 611584 }, { "epoch": 45.90228893058161, "grad_norm": 0.7207826972007751, "learning_rate": 0.002828, "loss": 1.0662, "step": 611648 }, { "epoch": 45.90709193245779, "grad_norm": 0.5071738362312317, "learning_rate": 0.002828, "loss": 1.0707, "step": 611712 }, { "epoch": 45.91189493433396, "grad_norm": 0.638817310333252, "learning_rate": 0.002828, "loss": 1.0696, "step": 611776 }, { "epoch": 45.916697936210134, "grad_norm": 0.5947349071502686, "learning_rate": 0.002828, "loss": 1.0707, "step": 611840 }, { "epoch": 45.921500938086304, "grad_norm": 0.7027119994163513, "learning_rate": 0.002828, "loss": 1.0681, "step": 611904 }, { "epoch": 45.92630393996247, "grad_norm": 0.667210042476654, "learning_rate": 0.002828, "loss": 1.0658, "step": 611968 }, { "epoch": 45.93110694183865, "grad_norm": 0.7699005603790283, "learning_rate": 0.002828, "loss": 1.0635, "step": 612032 }, { "epoch": 45.93590994371482, "grad_norm": 0.7679214477539062, "learning_rate": 0.002828, "loss": 1.0624, "step": 612096 }, { "epoch": 45.940712945590995, "grad_norm": 0.5503223538398743, "learning_rate": 0.002828, "loss": 1.0688, "step": 612160 }, { "epoch": 45.945515947467165, "grad_norm": 0.7197269201278687, "learning_rate": 0.002828, "loss": 1.0723, "step": 612224 }, { "epoch": 45.95031894934334, "grad_norm": 0.8216593861579895, "learning_rate": 0.002828, "loss": 1.07, "step": 612288 }, { "epoch": 45.95512195121951, "grad_norm": 0.7016892433166504, "learning_rate": 0.002828, "loss": 1.0717, "step": 612352 }, { "epoch": 45.95992495309569, "grad_norm": 0.568098783493042, "learning_rate": 0.002828, "loss": 1.0734, "step": 612416 }, { "epoch": 45.964727954971856, "grad_norm": 0.7623087167739868, "learning_rate": 0.002828, "loss": 1.07, "step": 612480 }, { "epoch": 45.96953095684803, "grad_norm": 0.6430860161781311, "learning_rate": 0.002828, "loss": 1.0715, "step": 612544 }, { "epoch": 45.9743339587242, "grad_norm": 0.7953311800956726, "learning_rate": 0.002828, "loss": 1.0693, "step": 612608 }, { "epoch": 45.97913696060038, "grad_norm": 0.555494487285614, "learning_rate": 0.002828, "loss": 1.0678, "step": 612672 }, { "epoch": 45.98393996247655, "grad_norm": 0.5385116934776306, "learning_rate": 0.002828, "loss": 1.0696, "step": 612736 }, { "epoch": 45.98874296435272, "grad_norm": 0.5468719601631165, "learning_rate": 0.002828, "loss": 1.073, "step": 612800 }, { "epoch": 45.99354596622889, "grad_norm": 0.5610455870628357, "learning_rate": 0.002828, "loss": 1.0707, "step": 612864 }, { "epoch": 45.99834896810506, "grad_norm": 0.7394833564758301, "learning_rate": 0.002828, "loss": 1.0752, "step": 612928 }, { "epoch": 46.00315196998124, "grad_norm": 0.529012143611908, "learning_rate": 0.002828, "loss": 1.037, "step": 612992 }, { "epoch": 46.00795497185741, "grad_norm": 0.7306380271911621, "learning_rate": 0.002828, "loss": 1.042, "step": 613056 }, { "epoch": 46.012757973733585, "grad_norm": 0.5341888070106506, "learning_rate": 0.002828, "loss": 1.036, "step": 613120 }, { "epoch": 46.017560975609754, "grad_norm": 0.6116080284118652, "learning_rate": 0.002828, "loss": 1.0346, "step": 613184 }, { "epoch": 46.02236397748593, "grad_norm": 0.6336046457290649, "learning_rate": 0.002828, "loss": 1.0421, "step": 613248 }, { "epoch": 46.0271669793621, "grad_norm": 0.8078818917274475, "learning_rate": 0.002828, "loss": 1.0385, "step": 613312 }, { "epoch": 46.03196998123828, "grad_norm": 0.6536026000976562, "learning_rate": 0.002828, "loss": 1.0373, "step": 613376 }, { "epoch": 46.036772983114446, "grad_norm": 0.6699345111846924, "learning_rate": 0.002828, "loss": 1.0352, "step": 613440 }, { "epoch": 46.04157598499062, "grad_norm": 0.7356686592102051, "learning_rate": 0.002828, "loss": 1.0307, "step": 613504 }, { "epoch": 46.04637898686679, "grad_norm": 0.5745231509208679, "learning_rate": 0.002828, "loss": 1.0375, "step": 613568 }, { "epoch": 46.05118198874296, "grad_norm": 0.5645381212234497, "learning_rate": 0.002828, "loss": 1.0389, "step": 613632 }, { "epoch": 46.05598499061914, "grad_norm": 0.6182840466499329, "learning_rate": 0.002828, "loss": 1.0426, "step": 613696 }, { "epoch": 46.06078799249531, "grad_norm": 0.5951291918754578, "learning_rate": 0.002828, "loss": 1.0327, "step": 613760 }, { "epoch": 46.06559099437148, "grad_norm": 0.7547361850738525, "learning_rate": 0.002828, "loss": 1.035, "step": 613824 }, { "epoch": 46.07039399624765, "grad_norm": 0.6684466004371643, "learning_rate": 0.002828, "loss": 1.0391, "step": 613888 }, { "epoch": 46.07519699812383, "grad_norm": 0.8261184692382812, "learning_rate": 0.002828, "loss": 1.0406, "step": 613952 }, { "epoch": 46.08, "grad_norm": 0.6962255239486694, "learning_rate": 0.002828, "loss": 1.0452, "step": 614016 }, { "epoch": 46.084803001876175, "grad_norm": 0.594768762588501, "learning_rate": 0.002828, "loss": 1.0394, "step": 614080 }, { "epoch": 46.089606003752344, "grad_norm": 0.6519895792007446, "learning_rate": 0.002828, "loss": 1.0406, "step": 614144 }, { "epoch": 46.09440900562852, "grad_norm": 0.605659008026123, "learning_rate": 0.002828, "loss": 1.0396, "step": 614208 }, { "epoch": 46.09921200750469, "grad_norm": 0.798794150352478, "learning_rate": 0.002828, "loss": 1.0375, "step": 614272 }, { "epoch": 46.104015009380866, "grad_norm": 0.6516727209091187, "learning_rate": 0.002828, "loss": 1.0366, "step": 614336 }, { "epoch": 46.108818011257036, "grad_norm": 0.5554437637329102, "learning_rate": 0.002828, "loss": 1.0429, "step": 614400 }, { "epoch": 46.113621013133205, "grad_norm": 0.7370342016220093, "learning_rate": 0.002828, "loss": 1.044, "step": 614464 }, { "epoch": 46.11842401500938, "grad_norm": 0.5281388163566589, "learning_rate": 0.002828, "loss": 1.038, "step": 614528 }, { "epoch": 46.12322701688555, "grad_norm": 0.6068486571311951, "learning_rate": 0.002828, "loss": 1.043, "step": 614592 }, { "epoch": 46.12803001876173, "grad_norm": 0.5642166137695312, "learning_rate": 0.002828, "loss": 1.0401, "step": 614656 }, { "epoch": 46.1328330206379, "grad_norm": 0.7300256490707397, "learning_rate": 0.002828, "loss": 1.0428, "step": 614720 }, { "epoch": 46.13763602251407, "grad_norm": 0.5526905655860901, "learning_rate": 0.002828, "loss": 1.0444, "step": 614784 }, { "epoch": 46.14243902439024, "grad_norm": 0.5648698806762695, "learning_rate": 0.002828, "loss": 1.0403, "step": 614848 }, { "epoch": 46.14724202626642, "grad_norm": 0.5436872839927673, "learning_rate": 0.002828, "loss": 1.0417, "step": 614912 }, { "epoch": 46.15204502814259, "grad_norm": 0.7099076509475708, "learning_rate": 0.002828, "loss": 1.0422, "step": 614976 }, { "epoch": 46.156848030018764, "grad_norm": 0.668596088886261, "learning_rate": 0.002828, "loss": 1.0413, "step": 615040 }, { "epoch": 46.161651031894934, "grad_norm": 0.6142210960388184, "learning_rate": 0.002828, "loss": 1.0408, "step": 615104 }, { "epoch": 46.16645403377111, "grad_norm": 0.5688113570213318, "learning_rate": 0.002828, "loss": 1.04, "step": 615168 }, { "epoch": 46.17125703564728, "grad_norm": 0.5963445901870728, "learning_rate": 0.002828, "loss": 1.0417, "step": 615232 }, { "epoch": 46.17606003752345, "grad_norm": 0.6413314938545227, "learning_rate": 0.002828, "loss": 1.0467, "step": 615296 }, { "epoch": 46.180863039399625, "grad_norm": 0.729892909526825, "learning_rate": 0.002828, "loss": 1.0436, "step": 615360 }, { "epoch": 46.185666041275795, "grad_norm": 0.5900475382804871, "learning_rate": 0.002828, "loss": 1.0513, "step": 615424 }, { "epoch": 46.19046904315197, "grad_norm": 0.5937836766242981, "learning_rate": 0.002828, "loss": 1.0463, "step": 615488 }, { "epoch": 46.19527204502814, "grad_norm": 0.5667184591293335, "learning_rate": 0.002828, "loss": 1.044, "step": 615552 }, { "epoch": 46.20007504690432, "grad_norm": 0.7023417949676514, "learning_rate": 0.002828, "loss": 1.0459, "step": 615616 }, { "epoch": 46.204878048780486, "grad_norm": 0.6091719269752502, "learning_rate": 0.002828, "loss": 1.0431, "step": 615680 }, { "epoch": 46.20968105065666, "grad_norm": 0.6026754379272461, "learning_rate": 0.002828, "loss": 1.0404, "step": 615744 }, { "epoch": 46.21448405253283, "grad_norm": 0.7550497651100159, "learning_rate": 0.002828, "loss": 1.0404, "step": 615808 }, { "epoch": 46.21928705440901, "grad_norm": 0.6541439294815063, "learning_rate": 0.002828, "loss": 1.0424, "step": 615872 }, { "epoch": 46.22409005628518, "grad_norm": 0.678283154964447, "learning_rate": 0.002828, "loss": 1.0498, "step": 615936 }, { "epoch": 46.228893058161354, "grad_norm": 0.8214104771614075, "learning_rate": 0.002828, "loss": 1.0465, "step": 616000 }, { "epoch": 46.23369606003752, "grad_norm": 0.629660427570343, "learning_rate": 0.002828, "loss": 1.0469, "step": 616064 }, { "epoch": 46.23849906191369, "grad_norm": 0.5089216232299805, "learning_rate": 0.002828, "loss": 1.0481, "step": 616128 }, { "epoch": 46.24330206378987, "grad_norm": 0.6166875958442688, "learning_rate": 0.002828, "loss": 1.044, "step": 616192 }, { "epoch": 46.24810506566604, "grad_norm": 0.6610133051872253, "learning_rate": 0.002828, "loss": 1.0462, "step": 616256 }, { "epoch": 46.252908067542215, "grad_norm": 0.6949422359466553, "learning_rate": 0.002828, "loss": 1.0411, "step": 616320 }, { "epoch": 46.257711069418384, "grad_norm": 0.6649113297462463, "learning_rate": 0.002828, "loss": 1.0463, "step": 616384 }, { "epoch": 46.26251407129456, "grad_norm": 0.7349320650100708, "learning_rate": 0.002828, "loss": 1.0439, "step": 616448 }, { "epoch": 46.26731707317073, "grad_norm": 0.6438102722167969, "learning_rate": 0.002828, "loss": 1.0438, "step": 616512 }, { "epoch": 46.27212007504691, "grad_norm": 0.688700258731842, "learning_rate": 0.002828, "loss": 1.0475, "step": 616576 }, { "epoch": 46.276923076923076, "grad_norm": 0.49075180292129517, "learning_rate": 0.002828, "loss": 1.0411, "step": 616640 }, { "epoch": 46.28172607879925, "grad_norm": 0.4977070987224579, "learning_rate": 0.002828, "loss": 1.0468, "step": 616704 }, { "epoch": 46.28652908067542, "grad_norm": 0.6536993980407715, "learning_rate": 0.002828, "loss": 1.0524, "step": 616768 }, { "epoch": 46.2913320825516, "grad_norm": 0.5656940340995789, "learning_rate": 0.002828, "loss": 1.0464, "step": 616832 }, { "epoch": 46.29613508442777, "grad_norm": 0.7046701908111572, "learning_rate": 0.002828, "loss": 1.0495, "step": 616896 }, { "epoch": 46.30093808630394, "grad_norm": 0.7004103660583496, "learning_rate": 0.002828, "loss": 1.0455, "step": 616960 }, { "epoch": 46.30574108818011, "grad_norm": 0.6272410154342651, "learning_rate": 0.002828, "loss": 1.0444, "step": 617024 }, { "epoch": 46.31054409005628, "grad_norm": 0.6549649834632874, "learning_rate": 0.002828, "loss": 1.0468, "step": 617088 }, { "epoch": 46.31534709193246, "grad_norm": 0.6230428218841553, "learning_rate": 0.002828, "loss": 1.0448, "step": 617152 }, { "epoch": 46.32015009380863, "grad_norm": 0.576143741607666, "learning_rate": 0.002828, "loss": 1.0445, "step": 617216 }, { "epoch": 46.324953095684805, "grad_norm": 0.6095119118690491, "learning_rate": 0.002828, "loss": 1.0525, "step": 617280 }, { "epoch": 46.329756097560974, "grad_norm": 0.7142927050590515, "learning_rate": 0.002828, "loss": 1.0497, "step": 617344 }, { "epoch": 46.33455909943715, "grad_norm": 0.6191550493240356, "learning_rate": 0.002828, "loss": 1.0512, "step": 617408 }, { "epoch": 46.33936210131332, "grad_norm": 0.681013286113739, "learning_rate": 0.002828, "loss": 1.0467, "step": 617472 }, { "epoch": 46.344165103189496, "grad_norm": 0.5898663401603699, "learning_rate": 0.002828, "loss": 1.0524, "step": 617536 }, { "epoch": 46.348968105065666, "grad_norm": 0.7214446067810059, "learning_rate": 0.002828, "loss": 1.0506, "step": 617600 }, { "epoch": 46.35377110694184, "grad_norm": 0.6058822274208069, "learning_rate": 0.002828, "loss": 1.0548, "step": 617664 }, { "epoch": 46.35857410881801, "grad_norm": 0.6141964197158813, "learning_rate": 0.002828, "loss": 1.0531, "step": 617728 }, { "epoch": 46.36337711069418, "grad_norm": 0.5327640175819397, "learning_rate": 0.002828, "loss": 1.0462, "step": 617792 }, { "epoch": 46.36818011257036, "grad_norm": 0.7058151364326477, "learning_rate": 0.002828, "loss": 1.0535, "step": 617856 }, { "epoch": 46.37298311444653, "grad_norm": 0.46748828887939453, "learning_rate": 0.002828, "loss": 1.0495, "step": 617920 }, { "epoch": 46.3777861163227, "grad_norm": 0.5134317874908447, "learning_rate": 0.002828, "loss": 1.0479, "step": 617984 }, { "epoch": 46.38258911819887, "grad_norm": 0.799745500087738, "learning_rate": 0.002828, "loss": 1.0535, "step": 618048 }, { "epoch": 46.38739212007505, "grad_norm": 0.5732191801071167, "learning_rate": 0.002828, "loss": 1.0489, "step": 618112 }, { "epoch": 46.39219512195122, "grad_norm": 0.5947842597961426, "learning_rate": 0.002828, "loss": 1.0524, "step": 618176 }, { "epoch": 46.396998123827395, "grad_norm": 0.5764126181602478, "learning_rate": 0.002828, "loss": 1.0514, "step": 618240 }, { "epoch": 46.401801125703564, "grad_norm": 0.4855901300907135, "learning_rate": 0.002828, "loss": 1.0534, "step": 618304 }, { "epoch": 46.40660412757974, "grad_norm": 0.6275175213813782, "learning_rate": 0.002828, "loss": 1.0553, "step": 618368 }, { "epoch": 46.41140712945591, "grad_norm": 0.6019368171691895, "learning_rate": 0.002828, "loss": 1.0534, "step": 618432 }, { "epoch": 46.416210131332086, "grad_norm": 0.531459391117096, "learning_rate": 0.002828, "loss": 1.0519, "step": 618496 }, { "epoch": 46.421013133208255, "grad_norm": 0.5653847455978394, "learning_rate": 0.002828, "loss": 1.055, "step": 618560 }, { "epoch": 46.425816135084425, "grad_norm": 0.6705509424209595, "learning_rate": 0.002828, "loss": 1.0548, "step": 618624 }, { "epoch": 46.4306191369606, "grad_norm": 0.6440606117248535, "learning_rate": 0.002828, "loss": 1.0522, "step": 618688 }, { "epoch": 46.43542213883677, "grad_norm": 0.6682266592979431, "learning_rate": 0.002828, "loss": 1.0521, "step": 618752 }, { "epoch": 46.44022514071295, "grad_norm": 0.6298613548278809, "learning_rate": 0.002828, "loss": 1.0512, "step": 618816 }, { "epoch": 46.445028142589116, "grad_norm": 0.776021420955658, "learning_rate": 0.002828, "loss": 1.0534, "step": 618880 }, { "epoch": 46.44983114446529, "grad_norm": 0.5949306488037109, "learning_rate": 0.002828, "loss": 1.0521, "step": 618944 }, { "epoch": 46.45463414634146, "grad_norm": 0.6268242597579956, "learning_rate": 0.002828, "loss": 1.0556, "step": 619008 }, { "epoch": 46.45943714821764, "grad_norm": 0.5667731761932373, "learning_rate": 0.002828, "loss": 1.0518, "step": 619072 }, { "epoch": 46.46424015009381, "grad_norm": 0.5637649297714233, "learning_rate": 0.002828, "loss": 1.052, "step": 619136 }, { "epoch": 46.469043151969984, "grad_norm": 0.6626964211463928, "learning_rate": 0.002828, "loss": 1.0514, "step": 619200 }, { "epoch": 46.473846153846154, "grad_norm": 0.8213684558868408, "learning_rate": 0.002828, "loss": 1.0564, "step": 619264 }, { "epoch": 46.47864915572232, "grad_norm": 0.7015941739082336, "learning_rate": 0.002828, "loss": 1.0542, "step": 619328 }, { "epoch": 46.4834521575985, "grad_norm": 0.6902109980583191, "learning_rate": 0.002828, "loss": 1.0554, "step": 619392 }, { "epoch": 46.48825515947467, "grad_norm": 0.6375514268875122, "learning_rate": 0.002828, "loss": 1.0525, "step": 619456 }, { "epoch": 46.493058161350845, "grad_norm": 0.6744531989097595, "learning_rate": 0.002828, "loss": 1.0577, "step": 619520 }, { "epoch": 46.497861163227014, "grad_norm": 0.6031131148338318, "learning_rate": 0.002828, "loss": 1.0535, "step": 619584 }, { "epoch": 46.50266416510319, "grad_norm": 0.5269550681114197, "learning_rate": 0.002828, "loss": 1.0565, "step": 619648 }, { "epoch": 46.50746716697936, "grad_norm": 0.5548364520072937, "learning_rate": 0.002828, "loss": 1.0579, "step": 619712 }, { "epoch": 46.51227016885554, "grad_norm": 0.6689755320549011, "learning_rate": 0.002828, "loss": 1.0569, "step": 619776 }, { "epoch": 46.517073170731706, "grad_norm": 0.5496863722801208, "learning_rate": 0.002828, "loss": 1.0548, "step": 619840 }, { "epoch": 46.52187617260788, "grad_norm": 0.5853809714317322, "learning_rate": 0.002828, "loss": 1.051, "step": 619904 }, { "epoch": 46.52667917448405, "grad_norm": 0.6034921407699585, "learning_rate": 0.002828, "loss": 1.0507, "step": 619968 }, { "epoch": 46.53148217636023, "grad_norm": 0.6705649495124817, "learning_rate": 0.002828, "loss": 1.0521, "step": 620032 }, { "epoch": 46.5362851782364, "grad_norm": 0.5398948192596436, "learning_rate": 0.002828, "loss": 1.0468, "step": 620096 }, { "epoch": 46.54108818011257, "grad_norm": 0.5841556787490845, "learning_rate": 0.002828, "loss": 1.0577, "step": 620160 }, { "epoch": 46.54589118198874, "grad_norm": 0.6120521426200867, "learning_rate": 0.002828, "loss": 1.0484, "step": 620224 }, { "epoch": 46.55069418386491, "grad_norm": 0.6358661651611328, "learning_rate": 0.002828, "loss": 1.0567, "step": 620288 }, { "epoch": 46.55549718574109, "grad_norm": 0.7871175408363342, "learning_rate": 0.002828, "loss": 1.054, "step": 620352 }, { "epoch": 46.56030018761726, "grad_norm": 0.6011291742324829, "learning_rate": 0.002828, "loss": 1.0511, "step": 620416 }, { "epoch": 46.565103189493435, "grad_norm": 0.7039676308631897, "learning_rate": 0.002828, "loss": 1.0543, "step": 620480 }, { "epoch": 46.569906191369604, "grad_norm": 0.7038636207580566, "learning_rate": 0.002828, "loss": 1.0553, "step": 620544 }, { "epoch": 46.57470919324578, "grad_norm": 0.6768410205841064, "learning_rate": 0.002828, "loss": 1.0512, "step": 620608 }, { "epoch": 46.57951219512195, "grad_norm": 0.6707714796066284, "learning_rate": 0.002828, "loss": 1.0508, "step": 620672 }, { "epoch": 46.584315196998126, "grad_norm": 0.678458034992218, "learning_rate": 0.002828, "loss": 1.0575, "step": 620736 }, { "epoch": 46.589118198874296, "grad_norm": 0.6657276749610901, "learning_rate": 0.002828, "loss": 1.057, "step": 620800 }, { "epoch": 46.59392120075047, "grad_norm": 0.6960893273353577, "learning_rate": 0.002828, "loss": 1.0588, "step": 620864 }, { "epoch": 46.59872420262664, "grad_norm": 0.6260180473327637, "learning_rate": 0.002828, "loss": 1.0576, "step": 620928 }, { "epoch": 46.60352720450281, "grad_norm": 0.5803308486938477, "learning_rate": 0.002828, "loss": 1.0544, "step": 620992 }, { "epoch": 46.60833020637899, "grad_norm": 0.563851535320282, "learning_rate": 0.002828, "loss": 1.0559, "step": 621056 }, { "epoch": 46.61313320825516, "grad_norm": 0.6698217988014221, "learning_rate": 0.002828, "loss": 1.0547, "step": 621120 }, { "epoch": 46.61793621013133, "grad_norm": 0.670448899269104, "learning_rate": 0.002828, "loss": 1.0593, "step": 621184 }, { "epoch": 46.6227392120075, "grad_norm": 0.5835123658180237, "learning_rate": 0.002828, "loss": 1.0567, "step": 621248 }, { "epoch": 46.62754221388368, "grad_norm": 0.6441203951835632, "learning_rate": 0.002828, "loss": 1.0545, "step": 621312 }, { "epoch": 46.63234521575985, "grad_norm": 0.666936457157135, "learning_rate": 0.002828, "loss": 1.0577, "step": 621376 }, { "epoch": 46.637148217636025, "grad_norm": 0.6317396759986877, "learning_rate": 0.002828, "loss": 1.0607, "step": 621440 }, { "epoch": 46.641951219512194, "grad_norm": 0.5824635028839111, "learning_rate": 0.002828, "loss": 1.0611, "step": 621504 }, { "epoch": 46.64675422138837, "grad_norm": 0.6023385524749756, "learning_rate": 0.002828, "loss": 1.0541, "step": 621568 }, { "epoch": 46.65155722326454, "grad_norm": 0.6010758876800537, "learning_rate": 0.002828, "loss": 1.0565, "step": 621632 }, { "epoch": 46.656360225140716, "grad_norm": 0.6052330732345581, "learning_rate": 0.002828, "loss": 1.0539, "step": 621696 }, { "epoch": 46.661163227016885, "grad_norm": 0.6076634526252747, "learning_rate": 0.002828, "loss": 1.0563, "step": 621760 }, { "epoch": 46.665966228893055, "grad_norm": 0.6403579711914062, "learning_rate": 0.002828, "loss": 1.0607, "step": 621824 }, { "epoch": 46.67076923076923, "grad_norm": 0.6364386081695557, "learning_rate": 0.002828, "loss": 1.0626, "step": 621888 }, { "epoch": 46.6755722326454, "grad_norm": 0.6403948068618774, "learning_rate": 0.002828, "loss": 1.0538, "step": 621952 }, { "epoch": 46.68037523452158, "grad_norm": 0.48538199067115784, "learning_rate": 0.002828, "loss": 1.0576, "step": 622016 }, { "epoch": 46.685178236397746, "grad_norm": 0.6486366391181946, "learning_rate": 0.002828, "loss": 1.0551, "step": 622080 }, { "epoch": 46.68998123827392, "grad_norm": 0.650784969329834, "learning_rate": 0.002828, "loss": 1.0575, "step": 622144 }, { "epoch": 46.69478424015009, "grad_norm": 0.7481862902641296, "learning_rate": 0.002828, "loss": 1.0558, "step": 622208 }, { "epoch": 46.69958724202627, "grad_norm": 0.6436601281166077, "learning_rate": 0.002828, "loss": 1.0596, "step": 622272 }, { "epoch": 46.70439024390244, "grad_norm": 0.6628262996673584, "learning_rate": 0.002828, "loss": 1.0623, "step": 622336 }, { "epoch": 46.709193245778614, "grad_norm": 0.680276095867157, "learning_rate": 0.002828, "loss": 1.0563, "step": 622400 }, { "epoch": 46.713996247654784, "grad_norm": 0.6068404912948608, "learning_rate": 0.002828, "loss": 1.0491, "step": 622464 }, { "epoch": 46.71879924953096, "grad_norm": 0.5882270932197571, "learning_rate": 0.002828, "loss": 1.06, "step": 622528 }, { "epoch": 46.72360225140713, "grad_norm": 0.6210886240005493, "learning_rate": 0.002828, "loss": 1.0512, "step": 622592 }, { "epoch": 46.7284052532833, "grad_norm": 0.5923880934715271, "learning_rate": 0.002828, "loss": 1.0589, "step": 622656 }, { "epoch": 46.733208255159475, "grad_norm": 0.6892109513282776, "learning_rate": 0.002828, "loss": 1.0578, "step": 622720 }, { "epoch": 46.738011257035645, "grad_norm": 0.6397147178649902, "learning_rate": 0.002828, "loss": 1.0595, "step": 622784 }, { "epoch": 46.74281425891182, "grad_norm": 0.6659414172172546, "learning_rate": 0.002828, "loss": 1.0609, "step": 622848 }, { "epoch": 46.74761726078799, "grad_norm": 0.7534034252166748, "learning_rate": 0.002828, "loss": 1.0582, "step": 622912 }, { "epoch": 46.75242026266417, "grad_norm": 0.6626340746879578, "learning_rate": 0.002828, "loss": 1.0608, "step": 622976 }, { "epoch": 46.757223264540336, "grad_norm": 0.6399165391921997, "learning_rate": 0.002828, "loss": 1.0617, "step": 623040 }, { "epoch": 46.76202626641651, "grad_norm": 0.5546966195106506, "learning_rate": 0.002828, "loss": 1.0594, "step": 623104 }, { "epoch": 46.76682926829268, "grad_norm": 0.6276941299438477, "learning_rate": 0.002828, "loss": 1.0606, "step": 623168 }, { "epoch": 46.77163227016886, "grad_norm": 0.6073575615882874, "learning_rate": 0.002828, "loss": 1.0576, "step": 623232 }, { "epoch": 46.77643527204503, "grad_norm": 0.688251793384552, "learning_rate": 0.002828, "loss": 1.0573, "step": 623296 }, { "epoch": 46.781238273921204, "grad_norm": 0.6676405668258667, "learning_rate": 0.002828, "loss": 1.0561, "step": 623360 }, { "epoch": 46.78604127579737, "grad_norm": 0.578522264957428, "learning_rate": 0.002828, "loss": 1.0615, "step": 623424 }, { "epoch": 46.79084427767354, "grad_norm": 0.6574742197990417, "learning_rate": 0.002828, "loss": 1.0592, "step": 623488 }, { "epoch": 46.79564727954972, "grad_norm": 0.7181845307350159, "learning_rate": 0.002828, "loss": 1.066, "step": 623552 }, { "epoch": 46.80045028142589, "grad_norm": 0.5383839011192322, "learning_rate": 0.002828, "loss": 1.056, "step": 623616 }, { "epoch": 46.805253283302065, "grad_norm": 0.5833193063735962, "learning_rate": 0.002828, "loss": 1.0584, "step": 623680 }, { "epoch": 46.810056285178234, "grad_norm": 0.6272100210189819, "learning_rate": 0.002828, "loss": 1.0568, "step": 623744 }, { "epoch": 46.81485928705441, "grad_norm": 0.5736045241355896, "learning_rate": 0.002828, "loss": 1.0598, "step": 623808 }, { "epoch": 46.81966228893058, "grad_norm": 0.8262968063354492, "learning_rate": 0.002828, "loss": 1.0644, "step": 623872 }, { "epoch": 46.82446529080676, "grad_norm": 0.6368528604507446, "learning_rate": 0.002828, "loss": 1.0644, "step": 623936 }, { "epoch": 46.829268292682926, "grad_norm": 0.6689131259918213, "learning_rate": 0.002828, "loss": 1.06, "step": 624000 }, { "epoch": 46.8340712945591, "grad_norm": 0.5683980584144592, "learning_rate": 0.002828, "loss": 1.0604, "step": 624064 }, { "epoch": 46.83887429643527, "grad_norm": 0.5765275955200195, "learning_rate": 0.002828, "loss": 1.0654, "step": 624128 }, { "epoch": 46.84367729831145, "grad_norm": 0.5761871933937073, "learning_rate": 0.002828, "loss": 1.0644, "step": 624192 }, { "epoch": 46.84848030018762, "grad_norm": 0.5934112071990967, "learning_rate": 0.002828, "loss": 1.0633, "step": 624256 }, { "epoch": 46.85328330206379, "grad_norm": 0.521324634552002, "learning_rate": 0.002828, "loss": 1.0687, "step": 624320 }, { "epoch": 46.85808630393996, "grad_norm": 0.6639213562011719, "learning_rate": 0.002828, "loss": 1.0542, "step": 624384 }, { "epoch": 46.86288930581613, "grad_norm": 0.6850979328155518, "learning_rate": 0.002828, "loss": 1.0618, "step": 624448 }, { "epoch": 46.86769230769231, "grad_norm": 0.6221501231193542, "learning_rate": 0.002828, "loss": 1.0626, "step": 624512 }, { "epoch": 46.87249530956848, "grad_norm": 0.6977725028991699, "learning_rate": 0.002828, "loss": 1.0613, "step": 624576 }, { "epoch": 46.877298311444655, "grad_norm": 0.6750850081443787, "learning_rate": 0.002828, "loss": 1.0663, "step": 624640 }, { "epoch": 46.882101313320824, "grad_norm": 0.6050744652748108, "learning_rate": 0.002828, "loss": 1.0634, "step": 624704 }, { "epoch": 46.886904315197, "grad_norm": 0.7393004298210144, "learning_rate": 0.002828, "loss": 1.0625, "step": 624768 }, { "epoch": 46.89170731707317, "grad_norm": 0.669156551361084, "learning_rate": 0.002828, "loss": 1.0646, "step": 624832 }, { "epoch": 46.896510318949346, "grad_norm": 0.5985300540924072, "learning_rate": 0.002828, "loss": 1.0641, "step": 624896 }, { "epoch": 46.901313320825516, "grad_norm": 0.5303430557250977, "learning_rate": 0.002828, "loss": 1.0651, "step": 624960 }, { "epoch": 46.90611632270169, "grad_norm": 0.5656881332397461, "learning_rate": 0.002828, "loss": 1.0672, "step": 625024 }, { "epoch": 46.91091932457786, "grad_norm": 0.6489809155464172, "learning_rate": 0.002828, "loss": 1.0628, "step": 625088 }, { "epoch": 46.91572232645403, "grad_norm": 0.5730379223823547, "learning_rate": 0.002828, "loss": 1.0618, "step": 625152 }, { "epoch": 46.92052532833021, "grad_norm": 0.6608423590660095, "learning_rate": 0.002828, "loss": 1.0624, "step": 625216 }, { "epoch": 46.925328330206376, "grad_norm": 0.4727746248245239, "learning_rate": 0.002828, "loss": 1.0652, "step": 625280 }, { "epoch": 46.93013133208255, "grad_norm": 0.6244372129440308, "learning_rate": 0.002828, "loss": 1.0659, "step": 625344 }, { "epoch": 46.93493433395872, "grad_norm": 0.7188080549240112, "learning_rate": 0.002828, "loss": 1.0652, "step": 625408 }, { "epoch": 46.9397373358349, "grad_norm": 0.62159663438797, "learning_rate": 0.002828, "loss": 1.0667, "step": 625472 }, { "epoch": 46.94454033771107, "grad_norm": 0.6020349264144897, "learning_rate": 0.002828, "loss": 1.0609, "step": 625536 }, { "epoch": 46.949343339587244, "grad_norm": 0.7522795796394348, "learning_rate": 0.002828, "loss": 1.0665, "step": 625600 }, { "epoch": 46.954146341463414, "grad_norm": 0.6020083427429199, "learning_rate": 0.002828, "loss": 1.0613, "step": 625664 }, { "epoch": 46.95894934333959, "grad_norm": 0.6429307460784912, "learning_rate": 0.002828, "loss": 1.0667, "step": 625728 }, { "epoch": 46.96375234521576, "grad_norm": 0.5400229692459106, "learning_rate": 0.002828, "loss": 1.0607, "step": 625792 }, { "epoch": 46.968555347091936, "grad_norm": 0.5039812326431274, "learning_rate": 0.002828, "loss": 1.0663, "step": 625856 }, { "epoch": 46.973358348968105, "grad_norm": 0.5142475366592407, "learning_rate": 0.002828, "loss": 1.0598, "step": 625920 }, { "epoch": 46.978161350844275, "grad_norm": 0.6738988161087036, "learning_rate": 0.002828, "loss": 1.0629, "step": 625984 }, { "epoch": 46.98296435272045, "grad_norm": 0.5502144694328308, "learning_rate": 0.002828, "loss": 1.0555, "step": 626048 }, { "epoch": 46.98776735459662, "grad_norm": 0.5779480934143066, "learning_rate": 0.002828, "loss": 1.0604, "step": 626112 }, { "epoch": 46.9925703564728, "grad_norm": 0.809315025806427, "learning_rate": 0.002828, "loss": 1.0589, "step": 626176 }, { "epoch": 46.997373358348966, "grad_norm": 0.7586283087730408, "learning_rate": 0.002828, "loss": 1.0606, "step": 626240 }, { "epoch": 47.00217636022514, "grad_norm": 0.5281000733375549, "learning_rate": 0.002828, "loss": 1.0499, "step": 626304 }, { "epoch": 47.00697936210131, "grad_norm": 0.6618357300758362, "learning_rate": 0.002828, "loss": 1.026, "step": 626368 }, { "epoch": 47.01178236397749, "grad_norm": 0.6292099356651306, "learning_rate": 0.002828, "loss": 1.0272, "step": 626432 }, { "epoch": 47.01658536585366, "grad_norm": 0.6515204906463623, "learning_rate": 0.002828, "loss": 1.0305, "step": 626496 }, { "epoch": 47.021388367729834, "grad_norm": 0.7430369257926941, "learning_rate": 0.002828, "loss": 1.0339, "step": 626560 }, { "epoch": 47.026191369606, "grad_norm": 0.6072025895118713, "learning_rate": 0.002828, "loss": 1.0312, "step": 626624 }, { "epoch": 47.03099437148218, "grad_norm": 0.5867000222206116, "learning_rate": 0.002828, "loss": 1.0357, "step": 626688 }, { "epoch": 47.03579737335835, "grad_norm": 0.6820483207702637, "learning_rate": 0.002828, "loss": 1.03, "step": 626752 }, { "epoch": 47.04060037523452, "grad_norm": 0.528099536895752, "learning_rate": 0.002828, "loss": 1.0369, "step": 626816 }, { "epoch": 47.045403377110695, "grad_norm": 0.7703579664230347, "learning_rate": 0.002828, "loss": 1.0248, "step": 626880 }, { "epoch": 47.050206378986864, "grad_norm": 0.6891136765480042, "learning_rate": 0.002828, "loss": 1.0322, "step": 626944 }, { "epoch": 47.05500938086304, "grad_norm": 0.6598283648490906, "learning_rate": 0.002828, "loss": 1.0344, "step": 627008 }, { "epoch": 47.05981238273921, "grad_norm": 0.7029429078102112, "learning_rate": 0.002828, "loss": 1.0324, "step": 627072 }, { "epoch": 47.06461538461539, "grad_norm": 0.6200289130210876, "learning_rate": 0.002828, "loss": 1.0355, "step": 627136 }, { "epoch": 47.069418386491556, "grad_norm": 0.7782018780708313, "learning_rate": 0.002828, "loss": 1.0301, "step": 627200 }, { "epoch": 47.07422138836773, "grad_norm": 0.5827069282531738, "learning_rate": 0.002828, "loss": 1.0322, "step": 627264 }, { "epoch": 47.0790243902439, "grad_norm": 0.6420133113861084, "learning_rate": 0.002828, "loss": 1.0348, "step": 627328 }, { "epoch": 47.08382739212008, "grad_norm": 0.6323050856590271, "learning_rate": 0.002828, "loss": 1.0299, "step": 627392 }, { "epoch": 47.08863039399625, "grad_norm": 0.6225181221961975, "learning_rate": 0.002828, "loss": 1.0318, "step": 627456 }, { "epoch": 47.09343339587242, "grad_norm": 0.5773589611053467, "learning_rate": 0.002828, "loss": 1.0342, "step": 627520 }, { "epoch": 47.09823639774859, "grad_norm": 0.6008696556091309, "learning_rate": 0.002828, "loss": 1.0334, "step": 627584 }, { "epoch": 47.10303939962476, "grad_norm": 0.5864893198013306, "learning_rate": 0.002828, "loss": 1.0331, "step": 627648 }, { "epoch": 47.10784240150094, "grad_norm": 0.6275175213813782, "learning_rate": 0.002828, "loss": 1.0303, "step": 627712 }, { "epoch": 47.11264540337711, "grad_norm": 0.5716657638549805, "learning_rate": 0.002828, "loss": 1.033, "step": 627776 }, { "epoch": 47.117448405253285, "grad_norm": 0.5904713273048401, "learning_rate": 0.002828, "loss": 1.0367, "step": 627840 }, { "epoch": 47.122251407129454, "grad_norm": 0.6342942714691162, "learning_rate": 0.002828, "loss": 1.0411, "step": 627904 }, { "epoch": 47.12705440900563, "grad_norm": 0.5893776416778564, "learning_rate": 0.002828, "loss": 1.0379, "step": 627968 }, { "epoch": 47.1318574108818, "grad_norm": 0.6401630640029907, "learning_rate": 0.002828, "loss": 1.0359, "step": 628032 }, { "epoch": 47.136660412757976, "grad_norm": 0.7856540083885193, "learning_rate": 0.002828, "loss": 1.0388, "step": 628096 }, { "epoch": 47.141463414634146, "grad_norm": 0.5844777822494507, "learning_rate": 0.002828, "loss": 1.0367, "step": 628160 }, { "epoch": 47.14626641651032, "grad_norm": 0.7515383958816528, "learning_rate": 0.002828, "loss": 1.0368, "step": 628224 }, { "epoch": 47.15106941838649, "grad_norm": 0.59804368019104, "learning_rate": 0.002828, "loss": 1.0379, "step": 628288 }, { "epoch": 47.15587242026266, "grad_norm": 0.5939375162124634, "learning_rate": 0.002828, "loss": 1.0351, "step": 628352 }, { "epoch": 47.16067542213884, "grad_norm": 0.5455921292304993, "learning_rate": 0.002828, "loss": 1.0341, "step": 628416 }, { "epoch": 47.16547842401501, "grad_norm": 0.6681742668151855, "learning_rate": 0.002828, "loss": 1.0352, "step": 628480 }, { "epoch": 47.17028142589118, "grad_norm": 0.6716008186340332, "learning_rate": 0.002828, "loss": 1.0383, "step": 628544 }, { "epoch": 47.17508442776735, "grad_norm": 0.6943315863609314, "learning_rate": 0.002828, "loss": 1.0372, "step": 628608 }, { "epoch": 47.17988742964353, "grad_norm": 0.6753778457641602, "learning_rate": 0.002828, "loss": 1.0434, "step": 628672 }, { "epoch": 47.1846904315197, "grad_norm": 0.7257936000823975, "learning_rate": 0.002828, "loss": 1.0382, "step": 628736 }, { "epoch": 47.189493433395874, "grad_norm": 0.6734530329704285, "learning_rate": 0.002828, "loss": 1.0364, "step": 628800 }, { "epoch": 47.194296435272044, "grad_norm": 0.7898691892623901, "learning_rate": 0.002828, "loss": 1.0407, "step": 628864 }, { "epoch": 47.19909943714822, "grad_norm": 0.6546264886856079, "learning_rate": 0.002828, "loss": 1.0377, "step": 628928 }, { "epoch": 47.20390243902439, "grad_norm": 0.5156117677688599, "learning_rate": 0.002828, "loss": 1.041, "step": 628992 }, { "epoch": 47.208705440900566, "grad_norm": 0.6237413883209229, "learning_rate": 0.002828, "loss": 1.0378, "step": 629056 }, { "epoch": 47.213508442776735, "grad_norm": 0.7476790547370911, "learning_rate": 0.002828, "loss": 1.0435, "step": 629120 }, { "epoch": 47.218311444652905, "grad_norm": 0.643621027469635, "learning_rate": 0.002828, "loss": 1.0387, "step": 629184 }, { "epoch": 47.22311444652908, "grad_norm": 0.7706505060195923, "learning_rate": 0.002828, "loss": 1.0363, "step": 629248 }, { "epoch": 47.22791744840525, "grad_norm": 0.582042932510376, "learning_rate": 0.002828, "loss": 1.0398, "step": 629312 }, { "epoch": 47.23272045028143, "grad_norm": 0.6429329514503479, "learning_rate": 0.002828, "loss": 1.0377, "step": 629376 }, { "epoch": 47.237523452157596, "grad_norm": 0.5040108561515808, "learning_rate": 0.002828, "loss": 1.0448, "step": 629440 }, { "epoch": 47.24232645403377, "grad_norm": 0.7274630069732666, "learning_rate": 0.002828, "loss": 1.0394, "step": 629504 }, { "epoch": 47.24712945590994, "grad_norm": 0.6705653071403503, "learning_rate": 0.002828, "loss": 1.0376, "step": 629568 }, { "epoch": 47.25193245778612, "grad_norm": 0.6059752106666565, "learning_rate": 0.002828, "loss": 1.0384, "step": 629632 }, { "epoch": 47.25673545966229, "grad_norm": 0.5783113241195679, "learning_rate": 0.002828, "loss": 1.0454, "step": 629696 }, { "epoch": 47.261538461538464, "grad_norm": 0.505687415599823, "learning_rate": 0.002828, "loss": 1.0363, "step": 629760 }, { "epoch": 47.26634146341463, "grad_norm": 0.5358524918556213, "learning_rate": 0.002828, "loss": 1.0411, "step": 629824 }, { "epoch": 47.27114446529081, "grad_norm": 0.8537362813949585, "learning_rate": 0.002828, "loss": 1.0409, "step": 629888 }, { "epoch": 47.27594746716698, "grad_norm": 0.5460085868835449, "learning_rate": 0.002828, "loss": 1.0437, "step": 629952 }, { "epoch": 47.28075046904315, "grad_norm": 0.788359522819519, "learning_rate": 0.002828, "loss": 1.0475, "step": 630016 }, { "epoch": 47.285553470919325, "grad_norm": 0.5909889340400696, "learning_rate": 0.002828, "loss": 1.0402, "step": 630080 }, { "epoch": 47.290356472795494, "grad_norm": 0.6027221083641052, "learning_rate": 0.002828, "loss": 1.0388, "step": 630144 }, { "epoch": 47.29515947467167, "grad_norm": 0.6677124500274658, "learning_rate": 0.002828, "loss": 1.0417, "step": 630208 }, { "epoch": 47.29996247654784, "grad_norm": 0.6280375719070435, "learning_rate": 0.002828, "loss": 1.0443, "step": 630272 }, { "epoch": 47.30476547842402, "grad_norm": 0.5601394176483154, "learning_rate": 0.002828, "loss": 1.0407, "step": 630336 }, { "epoch": 47.309568480300186, "grad_norm": 0.7195338606834412, "learning_rate": 0.002828, "loss": 1.0456, "step": 630400 }, { "epoch": 47.31437148217636, "grad_norm": 0.6633152961730957, "learning_rate": 0.002828, "loss": 1.047, "step": 630464 }, { "epoch": 47.31917448405253, "grad_norm": 0.595317542552948, "learning_rate": 0.002828, "loss": 1.0443, "step": 630528 }, { "epoch": 47.32397748592871, "grad_norm": 0.6574602723121643, "learning_rate": 0.002828, "loss": 1.0383, "step": 630592 }, { "epoch": 47.32878048780488, "grad_norm": 0.5694827437400818, "learning_rate": 0.002828, "loss": 1.0408, "step": 630656 }, { "epoch": 47.333583489681054, "grad_norm": 0.5001234412193298, "learning_rate": 0.002828, "loss": 1.041, "step": 630720 }, { "epoch": 47.33838649155722, "grad_norm": 0.5457995533943176, "learning_rate": 0.002828, "loss": 1.0394, "step": 630784 }, { "epoch": 47.34318949343339, "grad_norm": 0.6352452039718628, "learning_rate": 0.002828, "loss": 1.042, "step": 630848 }, { "epoch": 47.34799249530957, "grad_norm": 0.565528154373169, "learning_rate": 0.002828, "loss": 1.0451, "step": 630912 }, { "epoch": 47.35279549718574, "grad_norm": 0.5789456963539124, "learning_rate": 0.002828, "loss": 1.0471, "step": 630976 }, { "epoch": 47.357598499061915, "grad_norm": 0.7093526124954224, "learning_rate": 0.002828, "loss": 1.0432, "step": 631040 }, { "epoch": 47.362401500938084, "grad_norm": 0.5827301144599915, "learning_rate": 0.002828, "loss": 1.0422, "step": 631104 }, { "epoch": 47.36720450281426, "grad_norm": 0.5498403906822205, "learning_rate": 0.002828, "loss": 1.0393, "step": 631168 }, { "epoch": 47.37200750469043, "grad_norm": 0.7173840403556824, "learning_rate": 0.002828, "loss": 1.046, "step": 631232 }, { "epoch": 47.376810506566606, "grad_norm": 0.6041868329048157, "learning_rate": 0.002828, "loss": 1.042, "step": 631296 }, { "epoch": 47.381613508442776, "grad_norm": 0.6391956806182861, "learning_rate": 0.002828, "loss": 1.0457, "step": 631360 }, { "epoch": 47.38641651031895, "grad_norm": 0.5060358047485352, "learning_rate": 0.002828, "loss": 1.0422, "step": 631424 }, { "epoch": 47.39121951219512, "grad_norm": 0.6481066346168518, "learning_rate": 0.002828, "loss": 1.0487, "step": 631488 }, { "epoch": 47.3960225140713, "grad_norm": 0.6985580325126648, "learning_rate": 0.002828, "loss": 1.0431, "step": 631552 }, { "epoch": 47.40082551594747, "grad_norm": 0.7035173773765564, "learning_rate": 0.002828, "loss": 1.0418, "step": 631616 }, { "epoch": 47.40562851782364, "grad_norm": 0.6084043383598328, "learning_rate": 0.002828, "loss": 1.0437, "step": 631680 }, { "epoch": 47.41043151969981, "grad_norm": 0.6706401705741882, "learning_rate": 0.002828, "loss": 1.0426, "step": 631744 }, { "epoch": 47.41523452157598, "grad_norm": 0.7065619230270386, "learning_rate": 0.002828, "loss": 1.0459, "step": 631808 }, { "epoch": 47.42003752345216, "grad_norm": 0.6161830425262451, "learning_rate": 0.002828, "loss": 1.0483, "step": 631872 }, { "epoch": 47.42484052532833, "grad_norm": 0.6157974600791931, "learning_rate": 0.002828, "loss": 1.0434, "step": 631936 }, { "epoch": 47.429643527204504, "grad_norm": 0.782438337802887, "learning_rate": 0.002828, "loss": 1.0485, "step": 632000 }, { "epoch": 47.434446529080674, "grad_norm": 0.6360306739807129, "learning_rate": 0.002828, "loss": 1.0463, "step": 632064 }, { "epoch": 47.43924953095685, "grad_norm": 0.6451125741004944, "learning_rate": 0.002828, "loss": 1.0484, "step": 632128 }, { "epoch": 47.44405253283302, "grad_norm": 0.7209149599075317, "learning_rate": 0.002828, "loss": 1.0443, "step": 632192 }, { "epoch": 47.448855534709196, "grad_norm": 0.5689561367034912, "learning_rate": 0.002828, "loss": 1.0479, "step": 632256 }, { "epoch": 47.453658536585365, "grad_norm": 0.632634699344635, "learning_rate": 0.002828, "loss": 1.0456, "step": 632320 }, { "epoch": 47.45846153846154, "grad_norm": 0.6760092377662659, "learning_rate": 0.002828, "loss": 1.0422, "step": 632384 }, { "epoch": 47.46326454033771, "grad_norm": 0.563585638999939, "learning_rate": 0.002828, "loss": 1.0476, "step": 632448 }, { "epoch": 47.46806754221388, "grad_norm": 0.6286882162094116, "learning_rate": 0.002828, "loss": 1.0466, "step": 632512 }, { "epoch": 47.47287054409006, "grad_norm": 0.7374217510223389, "learning_rate": 0.002828, "loss": 1.0463, "step": 632576 }, { "epoch": 47.477673545966226, "grad_norm": 0.5990324020385742, "learning_rate": 0.002828, "loss": 1.0424, "step": 632640 }, { "epoch": 47.4824765478424, "grad_norm": 0.5541861653327942, "learning_rate": 0.002828, "loss": 1.0487, "step": 632704 }, { "epoch": 47.48727954971857, "grad_norm": 0.49984487891197205, "learning_rate": 0.002828, "loss": 1.0469, "step": 632768 }, { "epoch": 47.49208255159475, "grad_norm": 0.7705683708190918, "learning_rate": 0.002828, "loss": 1.0461, "step": 632832 }, { "epoch": 47.49688555347092, "grad_norm": 0.5586681365966797, "learning_rate": 0.002828, "loss": 1.0466, "step": 632896 }, { "epoch": 47.501688555347094, "grad_norm": 0.6764875054359436, "learning_rate": 0.002828, "loss": 1.0536, "step": 632960 }, { "epoch": 47.506491557223264, "grad_norm": 0.6699135303497314, "learning_rate": 0.002828, "loss": 1.0484, "step": 633024 }, { "epoch": 47.51129455909944, "grad_norm": 0.7383409738540649, "learning_rate": 0.002828, "loss": 1.05, "step": 633088 }, { "epoch": 47.51609756097561, "grad_norm": 0.6897132396697998, "learning_rate": 0.002828, "loss": 1.0514, "step": 633152 }, { "epoch": 47.520900562851786, "grad_norm": 0.5044294595718384, "learning_rate": 0.002828, "loss": 1.0491, "step": 633216 }, { "epoch": 47.525703564727955, "grad_norm": 0.520473301410675, "learning_rate": 0.002828, "loss": 1.044, "step": 633280 }, { "epoch": 47.530506566604124, "grad_norm": 0.5634440183639526, "learning_rate": 0.002828, "loss": 1.046, "step": 633344 }, { "epoch": 47.5353095684803, "grad_norm": 0.6947305798530579, "learning_rate": 0.002828, "loss": 1.0506, "step": 633408 }, { "epoch": 47.54011257035647, "grad_norm": 0.6053649187088013, "learning_rate": 0.002828, "loss": 1.0543, "step": 633472 }, { "epoch": 47.54491557223265, "grad_norm": 0.644389271736145, "learning_rate": 0.002828, "loss": 1.0462, "step": 633536 }, { "epoch": 47.549718574108816, "grad_norm": 0.5872748494148254, "learning_rate": 0.002828, "loss": 1.0452, "step": 633600 }, { "epoch": 47.55452157598499, "grad_norm": 0.7785335779190063, "learning_rate": 0.002828, "loss": 1.0488, "step": 633664 }, { "epoch": 47.55932457786116, "grad_norm": 0.6512903571128845, "learning_rate": 0.002828, "loss": 1.0492, "step": 633728 }, { "epoch": 47.56412757973734, "grad_norm": 0.6293616890907288, "learning_rate": 0.002828, "loss": 1.0518, "step": 633792 }, { "epoch": 47.56893058161351, "grad_norm": 0.6513379216194153, "learning_rate": 0.002828, "loss": 1.0479, "step": 633856 }, { "epoch": 47.573733583489684, "grad_norm": 0.6688045859336853, "learning_rate": 0.002828, "loss": 1.0514, "step": 633920 }, { "epoch": 47.57853658536585, "grad_norm": 0.8207405209541321, "learning_rate": 0.002828, "loss": 1.0461, "step": 633984 }, { "epoch": 47.58333958724203, "grad_norm": 0.7107495069503784, "learning_rate": 0.002828, "loss": 1.0482, "step": 634048 }, { "epoch": 47.5881425891182, "grad_norm": 0.6240464448928833, "learning_rate": 0.002828, "loss": 1.0461, "step": 634112 }, { "epoch": 47.59294559099437, "grad_norm": 0.7034191489219666, "learning_rate": 0.002828, "loss": 1.0495, "step": 634176 }, { "epoch": 47.597748592870545, "grad_norm": 0.606263279914856, "learning_rate": 0.002828, "loss": 1.0536, "step": 634240 }, { "epoch": 47.602551594746714, "grad_norm": 0.6022868752479553, "learning_rate": 0.002828, "loss": 1.0513, "step": 634304 }, { "epoch": 47.60735459662289, "grad_norm": 0.9316113591194153, "learning_rate": 0.002828, "loss": 1.0488, "step": 634368 }, { "epoch": 47.61215759849906, "grad_norm": 0.5415453910827637, "learning_rate": 0.002828, "loss": 1.0522, "step": 634432 }, { "epoch": 47.616960600375236, "grad_norm": 0.6964948177337646, "learning_rate": 0.002828, "loss": 1.0508, "step": 634496 }, { "epoch": 47.621763602251406, "grad_norm": 0.6361346244812012, "learning_rate": 0.002828, "loss": 1.0522, "step": 634560 }, { "epoch": 47.62656660412758, "grad_norm": 0.778515100479126, "learning_rate": 0.002828, "loss": 1.053, "step": 634624 }, { "epoch": 47.63136960600375, "grad_norm": 0.6617556214332581, "learning_rate": 0.002828, "loss": 1.0443, "step": 634688 }, { "epoch": 47.63617260787993, "grad_norm": 0.642494261264801, "learning_rate": 0.002828, "loss": 1.0483, "step": 634752 }, { "epoch": 47.6409756097561, "grad_norm": 0.6483986973762512, "learning_rate": 0.002828, "loss": 1.0541, "step": 634816 }, { "epoch": 47.64577861163227, "grad_norm": 0.5259507894515991, "learning_rate": 0.002828, "loss": 1.0493, "step": 634880 }, { "epoch": 47.65058161350844, "grad_norm": 0.5775465369224548, "learning_rate": 0.002828, "loss": 1.0492, "step": 634944 }, { "epoch": 47.65538461538461, "grad_norm": 0.6609646677970886, "learning_rate": 0.002828, "loss": 1.0489, "step": 635008 }, { "epoch": 47.66018761726079, "grad_norm": 0.6297633647918701, "learning_rate": 0.002828, "loss": 1.0525, "step": 635072 }, { "epoch": 47.66499061913696, "grad_norm": 0.6732721924781799, "learning_rate": 0.002828, "loss": 1.0498, "step": 635136 }, { "epoch": 47.669793621013135, "grad_norm": 0.5661013722419739, "learning_rate": 0.002828, "loss": 1.0517, "step": 635200 }, { "epoch": 47.674596622889304, "grad_norm": 0.5473097562789917, "learning_rate": 0.002828, "loss": 1.0491, "step": 635264 }, { "epoch": 47.67939962476548, "grad_norm": 0.6529828906059265, "learning_rate": 0.002828, "loss": 1.0518, "step": 635328 }, { "epoch": 47.68420262664165, "grad_norm": 0.7554152607917786, "learning_rate": 0.002828, "loss": 1.0479, "step": 635392 }, { "epoch": 47.689005628517826, "grad_norm": 0.7058707475662231, "learning_rate": 0.002828, "loss": 1.0535, "step": 635456 }, { "epoch": 47.693808630393995, "grad_norm": 0.5582723021507263, "learning_rate": 0.002828, "loss": 1.0546, "step": 635520 }, { "epoch": 47.69861163227017, "grad_norm": 0.6613869667053223, "learning_rate": 0.002828, "loss": 1.0532, "step": 635584 }, { "epoch": 47.70341463414634, "grad_norm": 0.5572169423103333, "learning_rate": 0.002828, "loss": 1.0467, "step": 635648 }, { "epoch": 47.70821763602251, "grad_norm": 0.6240736246109009, "learning_rate": 0.002828, "loss": 1.0477, "step": 635712 }, { "epoch": 47.71302063789869, "grad_norm": 0.7397733926773071, "learning_rate": 0.002828, "loss": 1.0547, "step": 635776 }, { "epoch": 47.717823639774856, "grad_norm": 0.5110957026481628, "learning_rate": 0.002828, "loss": 1.0523, "step": 635840 }, { "epoch": 47.72262664165103, "grad_norm": 0.8373028039932251, "learning_rate": 0.002828, "loss": 1.0462, "step": 635904 }, { "epoch": 47.7274296435272, "grad_norm": 0.7218450903892517, "learning_rate": 0.002828, "loss": 1.057, "step": 635968 }, { "epoch": 47.73223264540338, "grad_norm": 0.6247406601905823, "learning_rate": 0.002828, "loss": 1.0528, "step": 636032 }, { "epoch": 47.73703564727955, "grad_norm": 0.720674455165863, "learning_rate": 0.002828, "loss": 1.0491, "step": 636096 }, { "epoch": 47.741838649155724, "grad_norm": 0.7045348286628723, "learning_rate": 0.002828, "loss": 1.0559, "step": 636160 }, { "epoch": 47.746641651031894, "grad_norm": 0.5386797189712524, "learning_rate": 0.002828, "loss": 1.0483, "step": 636224 }, { "epoch": 47.75144465290807, "grad_norm": 0.5467872619628906, "learning_rate": 0.002828, "loss": 1.0584, "step": 636288 }, { "epoch": 47.75624765478424, "grad_norm": 0.5429150462150574, "learning_rate": 0.002828, "loss": 1.0543, "step": 636352 }, { "epoch": 47.761050656660416, "grad_norm": 0.6549360752105713, "learning_rate": 0.002828, "loss": 1.0504, "step": 636416 }, { "epoch": 47.765853658536585, "grad_norm": 0.6696134209632874, "learning_rate": 0.002828, "loss": 1.0576, "step": 636480 }, { "epoch": 47.770656660412754, "grad_norm": 0.6573948264122009, "learning_rate": 0.002828, "loss": 1.0522, "step": 636544 }, { "epoch": 47.77545966228893, "grad_norm": 0.6131066083908081, "learning_rate": 0.002828, "loss": 1.0602, "step": 636608 }, { "epoch": 47.7802626641651, "grad_norm": 0.6040283441543579, "learning_rate": 0.002828, "loss": 1.0636, "step": 636672 }, { "epoch": 47.78506566604128, "grad_norm": 0.6244000196456909, "learning_rate": 0.002828, "loss": 1.0531, "step": 636736 }, { "epoch": 47.789868667917446, "grad_norm": 0.7662160992622375, "learning_rate": 0.002828, "loss": 1.0522, "step": 636800 }, { "epoch": 47.79467166979362, "grad_norm": 0.5642756223678589, "learning_rate": 0.002828, "loss": 1.0548, "step": 636864 }, { "epoch": 47.79947467166979, "grad_norm": 0.7008233070373535, "learning_rate": 0.002828, "loss": 1.0548, "step": 636928 }, { "epoch": 47.80427767354597, "grad_norm": 0.6942220330238342, "learning_rate": 0.002828, "loss": 1.0566, "step": 636992 }, { "epoch": 47.80908067542214, "grad_norm": 0.7647560238838196, "learning_rate": 0.002828, "loss": 1.0545, "step": 637056 }, { "epoch": 47.813883677298314, "grad_norm": 0.6823753714561462, "learning_rate": 0.002828, "loss": 1.0528, "step": 637120 }, { "epoch": 47.81868667917448, "grad_norm": 0.5762351155281067, "learning_rate": 0.002828, "loss": 1.0522, "step": 637184 }, { "epoch": 47.82348968105066, "grad_norm": 0.6646673083305359, "learning_rate": 0.002828, "loss": 1.0487, "step": 637248 }, { "epoch": 47.82829268292683, "grad_norm": 0.604059100151062, "learning_rate": 0.002828, "loss": 1.0624, "step": 637312 }, { "epoch": 47.833095684803, "grad_norm": 0.6984003186225891, "learning_rate": 0.002828, "loss": 1.0504, "step": 637376 }, { "epoch": 47.837898686679175, "grad_norm": 0.5845495462417603, "learning_rate": 0.002828, "loss": 1.0545, "step": 637440 }, { "epoch": 47.842701688555344, "grad_norm": 0.6453345417976379, "learning_rate": 0.002828, "loss": 1.0536, "step": 637504 }, { "epoch": 47.84750469043152, "grad_norm": 0.5302745699882507, "learning_rate": 0.002828, "loss": 1.0527, "step": 637568 }, { "epoch": 47.85230769230769, "grad_norm": 0.6094585061073303, "learning_rate": 0.002828, "loss": 1.055, "step": 637632 }, { "epoch": 47.857110694183866, "grad_norm": 0.5993672013282776, "learning_rate": 0.002828, "loss": 1.0542, "step": 637696 }, { "epoch": 47.861913696060036, "grad_norm": 0.6446161866188049, "learning_rate": 0.002828, "loss": 1.0516, "step": 637760 }, { "epoch": 47.86671669793621, "grad_norm": 0.6541599631309509, "learning_rate": 0.002828, "loss": 1.0557, "step": 637824 }, { "epoch": 47.87151969981238, "grad_norm": 0.6368933916091919, "learning_rate": 0.002828, "loss": 1.0582, "step": 637888 }, { "epoch": 47.87632270168856, "grad_norm": 0.7711507678031921, "learning_rate": 0.002828, "loss": 1.0542, "step": 637952 }, { "epoch": 47.88112570356473, "grad_norm": 0.7595430016517639, "learning_rate": 0.002828, "loss": 1.0588, "step": 638016 }, { "epoch": 47.885928705440904, "grad_norm": 0.5265303254127502, "learning_rate": 0.002828, "loss": 1.0572, "step": 638080 }, { "epoch": 47.89073170731707, "grad_norm": 0.7377369999885559, "learning_rate": 0.002828, "loss": 1.0622, "step": 638144 }, { "epoch": 47.89553470919324, "grad_norm": 0.6574912667274475, "learning_rate": 0.002828, "loss": 1.0527, "step": 638208 }, { "epoch": 47.90033771106942, "grad_norm": 0.6473036408424377, "learning_rate": 0.002828, "loss": 1.0571, "step": 638272 }, { "epoch": 47.90514071294559, "grad_norm": 0.6601776480674744, "learning_rate": 0.002828, "loss": 1.0571, "step": 638336 }, { "epoch": 47.909943714821765, "grad_norm": 0.6472011208534241, "learning_rate": 0.002828, "loss": 1.0577, "step": 638400 }, { "epoch": 47.914746716697934, "grad_norm": 0.701826274394989, "learning_rate": 0.002828, "loss": 1.0603, "step": 638464 }, { "epoch": 47.91954971857411, "grad_norm": 0.5442734360694885, "learning_rate": 0.002828, "loss": 1.0587, "step": 638528 }, { "epoch": 47.92435272045028, "grad_norm": 0.683586061000824, "learning_rate": 0.002828, "loss": 1.0598, "step": 638592 }, { "epoch": 47.929155722326456, "grad_norm": 0.6531975269317627, "learning_rate": 0.002828, "loss": 1.054, "step": 638656 }, { "epoch": 47.933958724202625, "grad_norm": 0.7838884592056274, "learning_rate": 0.002828, "loss": 1.0581, "step": 638720 }, { "epoch": 47.9387617260788, "grad_norm": 0.6613420248031616, "learning_rate": 0.002828, "loss": 1.0596, "step": 638784 }, { "epoch": 47.94356472795497, "grad_norm": 0.6097777485847473, "learning_rate": 0.002828, "loss": 1.0533, "step": 638848 }, { "epoch": 47.94836772983115, "grad_norm": 0.5991477370262146, "learning_rate": 0.002828, "loss": 1.0531, "step": 638912 }, { "epoch": 47.95317073170732, "grad_norm": 0.741007924079895, "learning_rate": 0.002828, "loss": 1.0585, "step": 638976 }, { "epoch": 47.957973733583486, "grad_norm": 0.6317166090011597, "learning_rate": 0.002828, "loss": 1.0596, "step": 639040 }, { "epoch": 47.96277673545966, "grad_norm": 0.6188191771507263, "learning_rate": 0.002828, "loss": 1.0558, "step": 639104 }, { "epoch": 47.96757973733583, "grad_norm": 0.7189934253692627, "learning_rate": 0.002828, "loss": 1.0571, "step": 639168 }, { "epoch": 47.97238273921201, "grad_norm": 0.6403295993804932, "learning_rate": 0.002828, "loss": 1.0533, "step": 639232 }, { "epoch": 47.97718574108818, "grad_norm": 0.6693562865257263, "learning_rate": 0.002828, "loss": 1.0531, "step": 639296 }, { "epoch": 47.981988742964354, "grad_norm": 0.545728325843811, "learning_rate": 0.002828, "loss": 1.0589, "step": 639360 }, { "epoch": 47.986791744840524, "grad_norm": 0.6077553629875183, "learning_rate": 0.002828, "loss": 1.0634, "step": 639424 }, { "epoch": 47.9915947467167, "grad_norm": 0.5200755000114441, "learning_rate": 0.002828, "loss": 1.061, "step": 639488 }, { "epoch": 47.99639774859287, "grad_norm": 0.589529812335968, "learning_rate": 0.002828, "loss": 1.0567, "step": 639552 }, { "epoch": 48.001200750469046, "grad_norm": 0.7281575202941895, "learning_rate": 0.002828, "loss": 1.0468, "step": 639616 }, { "epoch": 48.006003752345215, "grad_norm": 0.6403776407241821, "learning_rate": 0.002828, "loss": 1.0214, "step": 639680 }, { "epoch": 48.01080675422139, "grad_norm": 0.5161173939704895, "learning_rate": 0.002828, "loss": 1.0208, "step": 639744 }, { "epoch": 48.01560975609756, "grad_norm": 0.5105540752410889, "learning_rate": 0.002828, "loss": 1.0238, "step": 639808 }, { "epoch": 48.02041275797373, "grad_norm": 0.6909409761428833, "learning_rate": 0.002828, "loss": 1.02, "step": 639872 }, { "epoch": 48.02521575984991, "grad_norm": 0.6992323398590088, "learning_rate": 0.002828, "loss": 1.0245, "step": 639936 }, { "epoch": 48.030018761726076, "grad_norm": 0.6982917785644531, "learning_rate": 0.002828, "loss": 1.0298, "step": 640000 }, { "epoch": 48.03482176360225, "grad_norm": 0.7123545408248901, "learning_rate": 0.002828, "loss": 1.019, "step": 640064 }, { "epoch": 48.03962476547842, "grad_norm": 0.6596068143844604, "learning_rate": 0.002828, "loss": 1.0297, "step": 640128 }, { "epoch": 48.0444277673546, "grad_norm": 0.7315285205841064, "learning_rate": 0.002828, "loss": 1.0268, "step": 640192 }, { "epoch": 48.04923076923077, "grad_norm": 0.6878241896629333, "learning_rate": 0.002828, "loss": 1.0234, "step": 640256 }, { "epoch": 48.054033771106944, "grad_norm": 0.5641777515411377, "learning_rate": 0.002828, "loss": 1.0302, "step": 640320 }, { "epoch": 48.05883677298311, "grad_norm": 0.6245089769363403, "learning_rate": 0.002828, "loss": 1.0281, "step": 640384 }, { "epoch": 48.06363977485929, "grad_norm": 0.7929434180259705, "learning_rate": 0.002828, "loss": 1.0297, "step": 640448 }, { "epoch": 48.06844277673546, "grad_norm": 0.5667207837104797, "learning_rate": 0.002828, "loss": 1.0263, "step": 640512 }, { "epoch": 48.073245778611636, "grad_norm": 0.6004430055618286, "learning_rate": 0.002828, "loss": 1.0275, "step": 640576 }, { "epoch": 48.078048780487805, "grad_norm": 0.6360554695129395, "learning_rate": 0.002828, "loss": 1.0265, "step": 640640 }, { "epoch": 48.082851782363974, "grad_norm": 0.6814728379249573, "learning_rate": 0.002828, "loss": 1.031, "step": 640704 }, { "epoch": 48.08765478424015, "grad_norm": 0.657783567905426, "learning_rate": 0.002828, "loss": 1.0234, "step": 640768 }, { "epoch": 48.09245778611632, "grad_norm": 0.5107567310333252, "learning_rate": 0.002828, "loss": 1.0254, "step": 640832 }, { "epoch": 48.0972607879925, "grad_norm": 0.6924241781234741, "learning_rate": 0.002828, "loss": 1.0326, "step": 640896 }, { "epoch": 48.102063789868666, "grad_norm": 0.6154158115386963, "learning_rate": 0.002828, "loss": 1.0229, "step": 640960 }, { "epoch": 48.10686679174484, "grad_norm": 0.685716986656189, "learning_rate": 0.002828, "loss": 1.0256, "step": 641024 }, { "epoch": 48.11166979362101, "grad_norm": 0.6853328347206116, "learning_rate": 0.002828, "loss": 1.0376, "step": 641088 }, { "epoch": 48.11647279549719, "grad_norm": 0.5760686993598938, "learning_rate": 0.002828, "loss": 1.028, "step": 641152 }, { "epoch": 48.12127579737336, "grad_norm": 0.5979732275009155, "learning_rate": 0.002828, "loss": 1.0299, "step": 641216 }, { "epoch": 48.126078799249534, "grad_norm": 0.6231614947319031, "learning_rate": 0.002828, "loss": 1.0352, "step": 641280 }, { "epoch": 48.1308818011257, "grad_norm": 0.6504188179969788, "learning_rate": 0.002828, "loss": 1.0261, "step": 641344 }, { "epoch": 48.13568480300188, "grad_norm": 0.6307108402252197, "learning_rate": 0.002828, "loss": 1.0322, "step": 641408 }, { "epoch": 48.14048780487805, "grad_norm": 0.591966986656189, "learning_rate": 0.002828, "loss": 1.0313, "step": 641472 }, { "epoch": 48.14529080675422, "grad_norm": 0.5324594378471375, "learning_rate": 0.002828, "loss": 1.0335, "step": 641536 }, { "epoch": 48.150093808630395, "grad_norm": 0.7049140930175781, "learning_rate": 0.002828, "loss": 1.0277, "step": 641600 }, { "epoch": 48.154896810506564, "grad_norm": 0.7575385570526123, "learning_rate": 0.002828, "loss": 1.0301, "step": 641664 }, { "epoch": 48.15969981238274, "grad_norm": 0.7463407516479492, "learning_rate": 0.002828, "loss": 1.0307, "step": 641728 }, { "epoch": 48.16450281425891, "grad_norm": 0.6462583541870117, "learning_rate": 0.002828, "loss": 1.0286, "step": 641792 }, { "epoch": 48.169305816135086, "grad_norm": 0.6489346623420715, "learning_rate": 0.002828, "loss": 1.0273, "step": 641856 }, { "epoch": 48.174108818011256, "grad_norm": 0.6180921196937561, "learning_rate": 0.002828, "loss": 1.0381, "step": 641920 }, { "epoch": 48.17891181988743, "grad_norm": 0.7339633703231812, "learning_rate": 0.002828, "loss": 1.0339, "step": 641984 }, { "epoch": 48.1837148217636, "grad_norm": 0.6105630993843079, "learning_rate": 0.002828, "loss": 1.0335, "step": 642048 }, { "epoch": 48.18851782363978, "grad_norm": 0.5919303894042969, "learning_rate": 0.002828, "loss": 1.0318, "step": 642112 }, { "epoch": 48.19332082551595, "grad_norm": 0.6174924969673157, "learning_rate": 0.002828, "loss": 1.0308, "step": 642176 }, { "epoch": 48.19812382739212, "grad_norm": 0.7035306692123413, "learning_rate": 0.002828, "loss": 1.0316, "step": 642240 }, { "epoch": 48.20292682926829, "grad_norm": 0.5673324465751648, "learning_rate": 0.002828, "loss": 1.0362, "step": 642304 }, { "epoch": 48.20772983114446, "grad_norm": 0.6283726692199707, "learning_rate": 0.002828, "loss": 1.0305, "step": 642368 }, { "epoch": 48.21253283302064, "grad_norm": 0.6875553727149963, "learning_rate": 0.002828, "loss": 1.0311, "step": 642432 }, { "epoch": 48.21733583489681, "grad_norm": 0.7149839997291565, "learning_rate": 0.002828, "loss": 1.0359, "step": 642496 }, { "epoch": 48.222138836772984, "grad_norm": 0.5285342931747437, "learning_rate": 0.002828, "loss": 1.0351, "step": 642560 }, { "epoch": 48.226941838649154, "grad_norm": 0.5428668856620789, "learning_rate": 0.002828, "loss": 1.0336, "step": 642624 }, { "epoch": 48.23174484052533, "grad_norm": 0.5632164478302002, "learning_rate": 0.002828, "loss": 1.037, "step": 642688 }, { "epoch": 48.2365478424015, "grad_norm": 0.710423469543457, "learning_rate": 0.002828, "loss": 1.0398, "step": 642752 }, { "epoch": 48.241350844277676, "grad_norm": 0.7015545964241028, "learning_rate": 0.002828, "loss": 1.034, "step": 642816 }, { "epoch": 48.246153846153845, "grad_norm": 0.6140884757041931, "learning_rate": 0.002828, "loss": 1.0347, "step": 642880 }, { "epoch": 48.25095684803002, "grad_norm": 0.6183640360832214, "learning_rate": 0.002828, "loss": 1.0361, "step": 642944 }, { "epoch": 48.25575984990619, "grad_norm": 0.6626155376434326, "learning_rate": 0.002828, "loss": 1.038, "step": 643008 }, { "epoch": 48.26056285178237, "grad_norm": 0.7320151329040527, "learning_rate": 0.002828, "loss": 1.0372, "step": 643072 }, { "epoch": 48.26536585365854, "grad_norm": 0.6230993866920471, "learning_rate": 0.002828, "loss": 1.0276, "step": 643136 }, { "epoch": 48.270168855534706, "grad_norm": 0.658531904220581, "learning_rate": 0.002828, "loss": 1.0434, "step": 643200 }, { "epoch": 48.27497185741088, "grad_norm": 0.5257073044776917, "learning_rate": 0.002828, "loss": 1.0349, "step": 643264 }, { "epoch": 48.27977485928705, "grad_norm": 0.7808026671409607, "learning_rate": 0.002828, "loss": 1.0312, "step": 643328 }, { "epoch": 48.28457786116323, "grad_norm": 0.6385923027992249, "learning_rate": 0.002828, "loss": 1.0333, "step": 643392 }, { "epoch": 48.2893808630394, "grad_norm": 0.693492591381073, "learning_rate": 0.002828, "loss": 1.0359, "step": 643456 }, { "epoch": 48.294183864915574, "grad_norm": 0.5596438050270081, "learning_rate": 0.002828, "loss": 1.0354, "step": 643520 }, { "epoch": 48.29898686679174, "grad_norm": 0.6546857357025146, "learning_rate": 0.002828, "loss": 1.0341, "step": 643584 }, { "epoch": 48.30378986866792, "grad_norm": 0.5163006782531738, "learning_rate": 0.002828, "loss": 1.0316, "step": 643648 }, { "epoch": 48.30859287054409, "grad_norm": 0.553073525428772, "learning_rate": 0.002828, "loss": 1.0349, "step": 643712 }, { "epoch": 48.313395872420266, "grad_norm": 0.7049180865287781, "learning_rate": 0.002828, "loss": 1.0343, "step": 643776 }, { "epoch": 48.318198874296435, "grad_norm": 0.6629697680473328, "learning_rate": 0.002828, "loss": 1.0348, "step": 643840 }, { "epoch": 48.32300187617261, "grad_norm": 0.6142928600311279, "learning_rate": 0.002828, "loss": 1.0374, "step": 643904 }, { "epoch": 48.32780487804878, "grad_norm": 0.8215116262435913, "learning_rate": 0.002828, "loss": 1.0362, "step": 643968 }, { "epoch": 48.33260787992495, "grad_norm": 0.7114266753196716, "learning_rate": 0.002828, "loss": 1.0352, "step": 644032 }, { "epoch": 48.33741088180113, "grad_norm": 0.742658257484436, "learning_rate": 0.002828, "loss": 1.0356, "step": 644096 }, { "epoch": 48.342213883677296, "grad_norm": 0.5757499933242798, "learning_rate": 0.002828, "loss": 1.0346, "step": 644160 }, { "epoch": 48.34701688555347, "grad_norm": 0.6741271615028381, "learning_rate": 0.002828, "loss": 1.0449, "step": 644224 }, { "epoch": 48.35181988742964, "grad_norm": 0.7909224033355713, "learning_rate": 0.002828, "loss": 1.0351, "step": 644288 }, { "epoch": 48.35662288930582, "grad_norm": 0.6528041958808899, "learning_rate": 0.002828, "loss": 1.0401, "step": 644352 }, { "epoch": 48.36142589118199, "grad_norm": 0.6354573965072632, "learning_rate": 0.002828, "loss": 1.0327, "step": 644416 }, { "epoch": 48.366228893058164, "grad_norm": 0.7101730704307556, "learning_rate": 0.002828, "loss": 1.0344, "step": 644480 }, { "epoch": 48.37103189493433, "grad_norm": 0.5995579361915588, "learning_rate": 0.002828, "loss": 1.0401, "step": 644544 }, { "epoch": 48.37583489681051, "grad_norm": 0.6640355587005615, "learning_rate": 0.002828, "loss": 1.0396, "step": 644608 }, { "epoch": 48.38063789868668, "grad_norm": 0.6031660437583923, "learning_rate": 0.002828, "loss": 1.0399, "step": 644672 }, { "epoch": 48.38544090056285, "grad_norm": 0.6285581588745117, "learning_rate": 0.002828, "loss": 1.0368, "step": 644736 }, { "epoch": 48.390243902439025, "grad_norm": 0.6844165325164795, "learning_rate": 0.002828, "loss": 1.0411, "step": 644800 }, { "epoch": 48.395046904315194, "grad_norm": 0.6001917719841003, "learning_rate": 0.002828, "loss": 1.0373, "step": 644864 }, { "epoch": 48.39984990619137, "grad_norm": 0.6581211686134338, "learning_rate": 0.002828, "loss": 1.0319, "step": 644928 }, { "epoch": 48.40465290806754, "grad_norm": 0.6621804237365723, "learning_rate": 0.002828, "loss": 1.0385, "step": 644992 }, { "epoch": 48.409455909943716, "grad_norm": 0.6346020102500916, "learning_rate": 0.002828, "loss": 1.0392, "step": 645056 }, { "epoch": 48.414258911819886, "grad_norm": 0.6103173494338989, "learning_rate": 0.002828, "loss": 1.0404, "step": 645120 }, { "epoch": 48.41906191369606, "grad_norm": 0.7052111029624939, "learning_rate": 0.002828, "loss": 1.0412, "step": 645184 }, { "epoch": 48.42386491557223, "grad_norm": 0.6104574799537659, "learning_rate": 0.002828, "loss": 1.0333, "step": 645248 }, { "epoch": 48.42866791744841, "grad_norm": 0.5670404434204102, "learning_rate": 0.002828, "loss": 1.0371, "step": 645312 }, { "epoch": 48.43347091932458, "grad_norm": 0.6457066535949707, "learning_rate": 0.002828, "loss": 1.0381, "step": 645376 }, { "epoch": 48.438273921200754, "grad_norm": 0.7141438722610474, "learning_rate": 0.002828, "loss": 1.04, "step": 645440 }, { "epoch": 48.44307692307692, "grad_norm": 0.6793937683105469, "learning_rate": 0.002828, "loss": 1.0392, "step": 645504 }, { "epoch": 48.44787992495309, "grad_norm": 0.6794419288635254, "learning_rate": 0.002828, "loss": 1.0315, "step": 645568 }, { "epoch": 48.45268292682927, "grad_norm": 0.5492742657661438, "learning_rate": 0.002828, "loss": 1.0426, "step": 645632 }, { "epoch": 48.45748592870544, "grad_norm": 0.5703292489051819, "learning_rate": 0.002828, "loss": 1.0382, "step": 645696 }, { "epoch": 48.462288930581614, "grad_norm": 0.6498845815658569, "learning_rate": 0.002828, "loss": 1.0413, "step": 645760 }, { "epoch": 48.467091932457784, "grad_norm": 0.6196924448013306, "learning_rate": 0.002828, "loss": 1.0419, "step": 645824 }, { "epoch": 48.47189493433396, "grad_norm": 0.8179911375045776, "learning_rate": 0.002828, "loss": 1.0418, "step": 645888 }, { "epoch": 48.47669793621013, "grad_norm": 0.6045671701431274, "learning_rate": 0.002828, "loss": 1.046, "step": 645952 }, { "epoch": 48.481500938086306, "grad_norm": 0.6320502161979675, "learning_rate": 0.002828, "loss": 1.0441, "step": 646016 }, { "epoch": 48.486303939962475, "grad_norm": 0.6962401866912842, "learning_rate": 0.002828, "loss": 1.041, "step": 646080 }, { "epoch": 48.49110694183865, "grad_norm": 0.6397041082382202, "learning_rate": 0.002828, "loss": 1.0428, "step": 646144 }, { "epoch": 48.49590994371482, "grad_norm": 0.599310040473938, "learning_rate": 0.002828, "loss": 1.0461, "step": 646208 }, { "epoch": 48.500712945591, "grad_norm": 0.5711956024169922, "learning_rate": 0.002828, "loss": 1.0386, "step": 646272 }, { "epoch": 48.50551594746717, "grad_norm": 0.6898519396781921, "learning_rate": 0.002828, "loss": 1.0441, "step": 646336 }, { "epoch": 48.510318949343336, "grad_norm": 0.818206787109375, "learning_rate": 0.002828, "loss": 1.0452, "step": 646400 }, { "epoch": 48.51512195121951, "grad_norm": 0.571017861366272, "learning_rate": 0.002828, "loss": 1.0436, "step": 646464 }, { "epoch": 48.51992495309568, "grad_norm": 0.5874201059341431, "learning_rate": 0.002828, "loss": 1.0453, "step": 646528 }, { "epoch": 48.52472795497186, "grad_norm": 0.5294089317321777, "learning_rate": 0.002828, "loss": 1.0431, "step": 646592 }, { "epoch": 48.52953095684803, "grad_norm": 0.6514833569526672, "learning_rate": 0.002828, "loss": 1.0457, "step": 646656 }, { "epoch": 48.534333958724204, "grad_norm": 0.5631481409072876, "learning_rate": 0.002828, "loss": 1.0405, "step": 646720 }, { "epoch": 48.53913696060037, "grad_norm": 0.7069897055625916, "learning_rate": 0.002828, "loss": 1.0445, "step": 646784 }, { "epoch": 48.54393996247655, "grad_norm": 0.589514434337616, "learning_rate": 0.002828, "loss": 1.0424, "step": 646848 }, { "epoch": 48.54874296435272, "grad_norm": 0.6267862915992737, "learning_rate": 0.002828, "loss": 1.0413, "step": 646912 }, { "epoch": 48.553545966228896, "grad_norm": 0.637648344039917, "learning_rate": 0.002828, "loss": 1.0412, "step": 646976 }, { "epoch": 48.558348968105065, "grad_norm": 0.5455642342567444, "learning_rate": 0.002828, "loss": 1.0476, "step": 647040 }, { "epoch": 48.56315196998124, "grad_norm": 0.7661427855491638, "learning_rate": 0.002828, "loss": 1.0432, "step": 647104 }, { "epoch": 48.56795497185741, "grad_norm": 0.7244403958320618, "learning_rate": 0.002828, "loss": 1.046, "step": 647168 }, { "epoch": 48.57275797373358, "grad_norm": 0.5974761247634888, "learning_rate": 0.002828, "loss": 1.0377, "step": 647232 }, { "epoch": 48.57756097560976, "grad_norm": 0.7058935761451721, "learning_rate": 0.002828, "loss": 1.0409, "step": 647296 }, { "epoch": 48.582363977485926, "grad_norm": 0.5798015594482422, "learning_rate": 0.002828, "loss": 1.0429, "step": 647360 }, { "epoch": 48.5871669793621, "grad_norm": 0.5887682437896729, "learning_rate": 0.002828, "loss": 1.0444, "step": 647424 }, { "epoch": 48.59196998123827, "grad_norm": 0.680789589881897, "learning_rate": 0.002828, "loss": 1.0446, "step": 647488 }, { "epoch": 48.59677298311445, "grad_norm": 0.5927484035491943, "learning_rate": 0.002828, "loss": 1.0449, "step": 647552 }, { "epoch": 48.60157598499062, "grad_norm": 0.5905951857566833, "learning_rate": 0.002828, "loss": 1.0444, "step": 647616 }, { "epoch": 48.606378986866794, "grad_norm": 0.6388547420501709, "learning_rate": 0.002828, "loss": 1.0458, "step": 647680 }, { "epoch": 48.61118198874296, "grad_norm": 0.6019654870033264, "learning_rate": 0.002828, "loss": 1.0469, "step": 647744 }, { "epoch": 48.61598499061914, "grad_norm": 0.6199590563774109, "learning_rate": 0.002828, "loss": 1.0444, "step": 647808 }, { "epoch": 48.62078799249531, "grad_norm": 0.6658002138137817, "learning_rate": 0.002828, "loss": 1.0449, "step": 647872 }, { "epoch": 48.625590994371485, "grad_norm": 0.6782828569412231, "learning_rate": 0.002828, "loss": 1.0459, "step": 647936 }, { "epoch": 48.630393996247655, "grad_norm": 0.8401302695274353, "learning_rate": 0.002828, "loss": 1.05, "step": 648000 }, { "epoch": 48.635196998123824, "grad_norm": 0.4938647150993347, "learning_rate": 0.002828, "loss": 1.0463, "step": 648064 }, { "epoch": 48.64, "grad_norm": 0.8528944253921509, "learning_rate": 0.002828, "loss": 1.0463, "step": 648128 }, { "epoch": 48.64480300187617, "grad_norm": 0.6374745965003967, "learning_rate": 0.002828, "loss": 1.0427, "step": 648192 }, { "epoch": 48.649606003752346, "grad_norm": 0.5849046111106873, "learning_rate": 0.002828, "loss": 1.0441, "step": 648256 }, { "epoch": 48.654409005628516, "grad_norm": 0.576451301574707, "learning_rate": 0.002828, "loss": 1.0453, "step": 648320 }, { "epoch": 48.65921200750469, "grad_norm": 0.5208295583724976, "learning_rate": 0.002828, "loss": 1.0482, "step": 648384 }, { "epoch": 48.66401500938086, "grad_norm": 0.649178147315979, "learning_rate": 0.002828, "loss": 1.0494, "step": 648448 }, { "epoch": 48.66881801125704, "grad_norm": 0.6612587571144104, "learning_rate": 0.002828, "loss": 1.0453, "step": 648512 }, { "epoch": 48.67362101313321, "grad_norm": 0.8569740653038025, "learning_rate": 0.002828, "loss": 1.0443, "step": 648576 }, { "epoch": 48.678424015009384, "grad_norm": 0.6810051798820496, "learning_rate": 0.002828, "loss": 1.053, "step": 648640 }, { "epoch": 48.68322701688555, "grad_norm": 0.6148619055747986, "learning_rate": 0.002828, "loss": 1.0457, "step": 648704 }, { "epoch": 48.68803001876173, "grad_norm": 0.5363982915878296, "learning_rate": 0.002828, "loss": 1.0462, "step": 648768 }, { "epoch": 48.6928330206379, "grad_norm": 0.662300169467926, "learning_rate": 0.002828, "loss": 1.0437, "step": 648832 }, { "epoch": 48.69763602251407, "grad_norm": 0.5750449895858765, "learning_rate": 0.002828, "loss": 1.0484, "step": 648896 }, { "epoch": 48.702439024390245, "grad_norm": 0.5501517057418823, "learning_rate": 0.002828, "loss": 1.0393, "step": 648960 }, { "epoch": 48.707242026266414, "grad_norm": 0.721763014793396, "learning_rate": 0.002828, "loss": 1.0528, "step": 649024 }, { "epoch": 48.71204502814259, "grad_norm": 0.6454340219497681, "learning_rate": 0.002828, "loss": 1.0426, "step": 649088 }, { "epoch": 48.71684803001876, "grad_norm": 0.6845133304595947, "learning_rate": 0.002828, "loss": 1.0474, "step": 649152 }, { "epoch": 48.721651031894936, "grad_norm": 0.762546718120575, "learning_rate": 0.002828, "loss": 1.0476, "step": 649216 }, { "epoch": 48.726454033771105, "grad_norm": 0.6332879662513733, "learning_rate": 0.002828, "loss": 1.0498, "step": 649280 }, { "epoch": 48.73125703564728, "grad_norm": 0.6962564587593079, "learning_rate": 0.002828, "loss": 1.0439, "step": 649344 }, { "epoch": 48.73606003752345, "grad_norm": 0.6551742553710938, "learning_rate": 0.002828, "loss": 1.0438, "step": 649408 }, { "epoch": 48.74086303939963, "grad_norm": 0.6957951784133911, "learning_rate": 0.002828, "loss": 1.0486, "step": 649472 }, { "epoch": 48.7456660412758, "grad_norm": 0.5979375839233398, "learning_rate": 0.002828, "loss": 1.0501, "step": 649536 }, { "epoch": 48.75046904315197, "grad_norm": 0.6078873872756958, "learning_rate": 0.002828, "loss": 1.0492, "step": 649600 }, { "epoch": 48.75527204502814, "grad_norm": 0.5456064343452454, "learning_rate": 0.002828, "loss": 1.0495, "step": 649664 }, { "epoch": 48.76007504690431, "grad_norm": 0.6340984106063843, "learning_rate": 0.002828, "loss": 1.0475, "step": 649728 }, { "epoch": 48.76487804878049, "grad_norm": 0.6775336265563965, "learning_rate": 0.002828, "loss": 1.0435, "step": 649792 }, { "epoch": 48.76968105065666, "grad_norm": 0.6024465560913086, "learning_rate": 0.002828, "loss": 1.0489, "step": 649856 }, { "epoch": 48.774484052532834, "grad_norm": 0.6876002550125122, "learning_rate": 0.002828, "loss": 1.0454, "step": 649920 }, { "epoch": 48.779287054409004, "grad_norm": 0.7455543875694275, "learning_rate": 0.002828, "loss": 1.0491, "step": 649984 }, { "epoch": 48.78409005628518, "grad_norm": 0.7086321711540222, "learning_rate": 0.002828, "loss": 1.0489, "step": 650048 }, { "epoch": 48.78889305816135, "grad_norm": 0.614976167678833, "learning_rate": 0.002828, "loss": 1.0462, "step": 650112 }, { "epoch": 48.793696060037526, "grad_norm": 0.6302147507667542, "learning_rate": 0.002828, "loss": 1.0449, "step": 650176 }, { "epoch": 48.798499061913695, "grad_norm": 0.6637048125267029, "learning_rate": 0.002828, "loss": 1.0494, "step": 650240 }, { "epoch": 48.80330206378987, "grad_norm": 0.6396692991256714, "learning_rate": 0.002828, "loss": 1.0497, "step": 650304 }, { "epoch": 48.80810506566604, "grad_norm": 0.5633668303489685, "learning_rate": 0.002828, "loss": 1.0413, "step": 650368 }, { "epoch": 48.81290806754222, "grad_norm": 0.5798957347869873, "learning_rate": 0.002828, "loss": 1.0498, "step": 650432 }, { "epoch": 48.81771106941839, "grad_norm": 0.6401728391647339, "learning_rate": 0.002828, "loss": 1.0499, "step": 650496 }, { "epoch": 48.822514071294556, "grad_norm": 0.6117183566093445, "learning_rate": 0.002828, "loss": 1.0484, "step": 650560 }, { "epoch": 48.82731707317073, "grad_norm": 0.5531995296478271, "learning_rate": 0.002828, "loss": 1.0527, "step": 650624 }, { "epoch": 48.8321200750469, "grad_norm": 0.4812498390674591, "learning_rate": 0.002828, "loss": 1.049, "step": 650688 }, { "epoch": 48.83692307692308, "grad_norm": 0.5269946455955505, "learning_rate": 0.002828, "loss": 1.0485, "step": 650752 }, { "epoch": 48.84172607879925, "grad_norm": 0.5963234305381775, "learning_rate": 0.002828, "loss": 1.0452, "step": 650816 }, { "epoch": 48.846529080675424, "grad_norm": 0.6707912087440491, "learning_rate": 0.002828, "loss": 1.0521, "step": 650880 }, { "epoch": 48.85133208255159, "grad_norm": 0.754862368106842, "learning_rate": 0.002828, "loss": 1.0533, "step": 650944 }, { "epoch": 48.85613508442777, "grad_norm": 0.7602565884590149, "learning_rate": 0.002828, "loss": 1.0457, "step": 651008 }, { "epoch": 48.86093808630394, "grad_norm": 0.5208454728126526, "learning_rate": 0.002828, "loss": 1.0479, "step": 651072 }, { "epoch": 48.865741088180116, "grad_norm": 0.706495463848114, "learning_rate": 0.002828, "loss": 1.0501, "step": 651136 }, { "epoch": 48.870544090056285, "grad_norm": 0.7677738070487976, "learning_rate": 0.002828, "loss": 1.0482, "step": 651200 }, { "epoch": 48.875347091932454, "grad_norm": 0.7193800806999207, "learning_rate": 0.002828, "loss": 1.0475, "step": 651264 }, { "epoch": 48.88015009380863, "grad_norm": 0.5489779114723206, "learning_rate": 0.002828, "loss": 1.0489, "step": 651328 }, { "epoch": 48.8849530956848, "grad_norm": 0.5512523055076599, "learning_rate": 0.002828, "loss": 1.0491, "step": 651392 }, { "epoch": 48.889756097560976, "grad_norm": 0.7432524561882019, "learning_rate": 0.002828, "loss": 1.0496, "step": 651456 }, { "epoch": 48.894559099437146, "grad_norm": 0.6426169872283936, "learning_rate": 0.002828, "loss": 1.0565, "step": 651520 }, { "epoch": 48.89936210131332, "grad_norm": 0.5901021361351013, "learning_rate": 0.002828, "loss": 1.0486, "step": 651584 }, { "epoch": 48.90416510318949, "grad_norm": 0.7789266109466553, "learning_rate": 0.002828, "loss": 1.0471, "step": 651648 }, { "epoch": 48.90896810506567, "grad_norm": 0.7148862481117249, "learning_rate": 0.002828, "loss": 1.0497, "step": 651712 }, { "epoch": 48.91377110694184, "grad_norm": 0.6462157964706421, "learning_rate": 0.002828, "loss": 1.0537, "step": 651776 }, { "epoch": 48.918574108818014, "grad_norm": 0.6770671606063843, "learning_rate": 0.002828, "loss": 1.054, "step": 651840 }, { "epoch": 48.92337711069418, "grad_norm": 0.6750553250312805, "learning_rate": 0.002828, "loss": 1.0534, "step": 651904 }, { "epoch": 48.92818011257036, "grad_norm": 0.57192462682724, "learning_rate": 0.002828, "loss": 1.049, "step": 651968 }, { "epoch": 48.93298311444653, "grad_norm": 0.7276706099510193, "learning_rate": 0.002828, "loss": 1.0529, "step": 652032 }, { "epoch": 48.9377861163227, "grad_norm": 0.648541271686554, "learning_rate": 0.002828, "loss": 1.0511, "step": 652096 }, { "epoch": 48.942589118198875, "grad_norm": 0.6642624139785767, "learning_rate": 0.002828, "loss": 1.0544, "step": 652160 }, { "epoch": 48.947392120075044, "grad_norm": 0.755593478679657, "learning_rate": 0.002828, "loss": 1.0518, "step": 652224 }, { "epoch": 48.95219512195122, "grad_norm": 0.5870972871780396, "learning_rate": 0.002828, "loss": 1.0566, "step": 652288 }, { "epoch": 48.95699812382739, "grad_norm": 0.8194005489349365, "learning_rate": 0.002828, "loss": 1.0498, "step": 652352 }, { "epoch": 48.961801125703566, "grad_norm": 0.6303133964538574, "learning_rate": 0.002828, "loss": 1.054, "step": 652416 }, { "epoch": 48.966604127579735, "grad_norm": 0.5689576268196106, "learning_rate": 0.002828, "loss": 1.0464, "step": 652480 }, { "epoch": 48.97140712945591, "grad_norm": 0.7027059197425842, "learning_rate": 0.002828, "loss": 1.048, "step": 652544 }, { "epoch": 48.97621013133208, "grad_norm": 0.6509045362472534, "learning_rate": 0.002828, "loss": 1.0536, "step": 652608 }, { "epoch": 48.98101313320826, "grad_norm": 0.6776425242424011, "learning_rate": 0.002828, "loss": 1.0528, "step": 652672 }, { "epoch": 48.98581613508443, "grad_norm": 0.8358839750289917, "learning_rate": 0.002828, "loss": 1.0489, "step": 652736 }, { "epoch": 48.9906191369606, "grad_norm": 0.5223692655563354, "learning_rate": 0.002828, "loss": 1.0551, "step": 652800 }, { "epoch": 48.99542213883677, "grad_norm": 0.5597832798957825, "learning_rate": 0.002828, "loss": 1.0536, "step": 652864 }, { "epoch": 49.00022514071294, "grad_norm": 0.5572521686553955, "learning_rate": 0.002828, "loss": 1.0484, "step": 652928 }, { "epoch": 49.00502814258912, "grad_norm": 0.6252709031105042, "learning_rate": 0.002828, "loss": 1.0213, "step": 652992 }, { "epoch": 49.00983114446529, "grad_norm": 0.7065907716751099, "learning_rate": 0.002828, "loss": 1.0139, "step": 653056 }, { "epoch": 49.014634146341464, "grad_norm": 0.6138619184494019, "learning_rate": 0.002828, "loss": 1.0155, "step": 653120 }, { "epoch": 49.019437148217634, "grad_norm": 0.640450656414032, "learning_rate": 0.002828, "loss": 1.0183, "step": 653184 }, { "epoch": 49.02424015009381, "grad_norm": 0.7467250227928162, "learning_rate": 0.002828, "loss": 1.0183, "step": 653248 }, { "epoch": 49.02904315196998, "grad_norm": 0.5681378841400146, "learning_rate": 0.002828, "loss": 1.0222, "step": 653312 }, { "epoch": 49.033846153846156, "grad_norm": 0.5548416972160339, "learning_rate": 0.002828, "loss": 1.0175, "step": 653376 }, { "epoch": 49.038649155722325, "grad_norm": 0.5206427574157715, "learning_rate": 0.002828, "loss": 1.0199, "step": 653440 }, { "epoch": 49.0434521575985, "grad_norm": 0.5543241500854492, "learning_rate": 0.002828, "loss": 1.0229, "step": 653504 }, { "epoch": 49.04825515947467, "grad_norm": 0.701314389705658, "learning_rate": 0.002828, "loss": 1.0198, "step": 653568 }, { "epoch": 49.05305816135085, "grad_norm": 0.6811202764511108, "learning_rate": 0.002828, "loss": 1.0195, "step": 653632 }, { "epoch": 49.05786116322702, "grad_norm": 0.7191393375396729, "learning_rate": 0.002828, "loss": 1.0192, "step": 653696 }, { "epoch": 49.062664165103186, "grad_norm": 0.625001072883606, "learning_rate": 0.002828, "loss": 1.0283, "step": 653760 }, { "epoch": 49.06746716697936, "grad_norm": 0.6109411120414734, "learning_rate": 0.002828, "loss": 1.0214, "step": 653824 }, { "epoch": 49.07227016885553, "grad_norm": 0.562427282333374, "learning_rate": 0.002828, "loss": 1.0236, "step": 653888 }, { "epoch": 49.07707317073171, "grad_norm": 0.6617029905319214, "learning_rate": 0.002828, "loss": 1.0186, "step": 653952 }, { "epoch": 49.08187617260788, "grad_norm": 0.5700415968894958, "learning_rate": 0.002828, "loss": 1.0222, "step": 654016 }, { "epoch": 49.086679174484054, "grad_norm": 0.5865738391876221, "learning_rate": 0.002828, "loss": 1.0226, "step": 654080 }, { "epoch": 49.09148217636022, "grad_norm": 0.6235159635543823, "learning_rate": 0.002828, "loss": 1.0189, "step": 654144 }, { "epoch": 49.0962851782364, "grad_norm": 0.5553084015846252, "learning_rate": 0.002828, "loss": 1.0207, "step": 654208 }, { "epoch": 49.10108818011257, "grad_norm": 0.582048237323761, "learning_rate": 0.002828, "loss": 1.0174, "step": 654272 }, { "epoch": 49.105891181988746, "grad_norm": 0.6419191956520081, "learning_rate": 0.002828, "loss": 1.0255, "step": 654336 }, { "epoch": 49.110694183864915, "grad_norm": 0.512915313243866, "learning_rate": 0.002828, "loss": 1.0275, "step": 654400 }, { "epoch": 49.11549718574109, "grad_norm": 0.6591917872428894, "learning_rate": 0.002828, "loss": 1.0198, "step": 654464 }, { "epoch": 49.12030018761726, "grad_norm": 0.6231454014778137, "learning_rate": 0.002828, "loss": 1.0227, "step": 654528 }, { "epoch": 49.12510318949343, "grad_norm": 0.6315057873725891, "learning_rate": 0.002828, "loss": 1.0215, "step": 654592 }, { "epoch": 49.12990619136961, "grad_norm": 0.6685563325881958, "learning_rate": 0.002828, "loss": 1.0217, "step": 654656 }, { "epoch": 49.134709193245776, "grad_norm": 0.5746342539787292, "learning_rate": 0.002828, "loss": 1.0264, "step": 654720 }, { "epoch": 49.13951219512195, "grad_norm": 0.6998280882835388, "learning_rate": 0.002828, "loss": 1.0225, "step": 654784 }, { "epoch": 49.14431519699812, "grad_norm": 0.73170006275177, "learning_rate": 0.002828, "loss": 1.0283, "step": 654848 }, { "epoch": 49.1491181988743, "grad_norm": 0.7164236903190613, "learning_rate": 0.002828, "loss": 1.0225, "step": 654912 }, { "epoch": 49.15392120075047, "grad_norm": 0.7144173979759216, "learning_rate": 0.002828, "loss": 1.0273, "step": 654976 }, { "epoch": 49.158724202626644, "grad_norm": 0.4856126606464386, "learning_rate": 0.002828, "loss": 1.0245, "step": 655040 }, { "epoch": 49.16352720450281, "grad_norm": 0.6092809438705444, "learning_rate": 0.002828, "loss": 1.0273, "step": 655104 }, { "epoch": 49.16833020637899, "grad_norm": 0.6352282762527466, "learning_rate": 0.002828, "loss": 1.0259, "step": 655168 }, { "epoch": 49.17313320825516, "grad_norm": 0.5976561307907104, "learning_rate": 0.002828, "loss": 1.025, "step": 655232 }, { "epoch": 49.177936210131335, "grad_norm": 0.6315568685531616, "learning_rate": 0.002828, "loss": 1.0252, "step": 655296 }, { "epoch": 49.182739212007505, "grad_norm": 0.6927725076675415, "learning_rate": 0.002828, "loss": 1.0295, "step": 655360 }, { "epoch": 49.187542213883674, "grad_norm": 0.6839077472686768, "learning_rate": 0.002828, "loss": 1.0266, "step": 655424 }, { "epoch": 49.19234521575985, "grad_norm": 0.571959376335144, "learning_rate": 0.002828, "loss": 1.0282, "step": 655488 }, { "epoch": 49.19714821763602, "grad_norm": 0.553979754447937, "learning_rate": 0.002828, "loss": 1.0267, "step": 655552 }, { "epoch": 49.201951219512196, "grad_norm": 0.6836845278739929, "learning_rate": 0.002828, "loss": 1.0314, "step": 655616 }, { "epoch": 49.206754221388366, "grad_norm": 0.647217869758606, "learning_rate": 0.002828, "loss": 1.03, "step": 655680 }, { "epoch": 49.21155722326454, "grad_norm": 0.6496232748031616, "learning_rate": 0.002828, "loss": 1.0259, "step": 655744 }, { "epoch": 49.21636022514071, "grad_norm": 0.6329558491706848, "learning_rate": 0.002828, "loss": 1.0351, "step": 655808 }, { "epoch": 49.22116322701689, "grad_norm": 0.5167147517204285, "learning_rate": 0.002828, "loss": 1.0259, "step": 655872 }, { "epoch": 49.22596622889306, "grad_norm": 0.7184466123580933, "learning_rate": 0.002828, "loss": 1.029, "step": 655936 }, { "epoch": 49.23076923076923, "grad_norm": 0.7324831485748291, "learning_rate": 0.002828, "loss": 1.0298, "step": 656000 }, { "epoch": 49.2355722326454, "grad_norm": 0.6538679003715515, "learning_rate": 0.002828, "loss": 1.0287, "step": 656064 }, { "epoch": 49.24037523452158, "grad_norm": 0.6968247890472412, "learning_rate": 0.002828, "loss": 1.0251, "step": 656128 }, { "epoch": 49.24517823639775, "grad_norm": 0.5685263872146606, "learning_rate": 0.002828, "loss": 1.0242, "step": 656192 }, { "epoch": 49.24998123827392, "grad_norm": 0.6809595227241516, "learning_rate": 0.002828, "loss": 1.0306, "step": 656256 }, { "epoch": 49.254784240150094, "grad_norm": 0.6379225850105286, "learning_rate": 0.002828, "loss": 1.0288, "step": 656320 }, { "epoch": 49.259587242026264, "grad_norm": 0.7669009566307068, "learning_rate": 0.002828, "loss": 1.033, "step": 656384 }, { "epoch": 49.26439024390244, "grad_norm": 0.6783488392829895, "learning_rate": 0.002828, "loss": 1.0265, "step": 656448 }, { "epoch": 49.26919324577861, "grad_norm": 0.6145064830780029, "learning_rate": 0.002828, "loss": 1.031, "step": 656512 }, { "epoch": 49.273996247654786, "grad_norm": 0.6485490798950195, "learning_rate": 0.002828, "loss": 1.0337, "step": 656576 }, { "epoch": 49.278799249530955, "grad_norm": 0.636939525604248, "learning_rate": 0.002828, "loss": 1.0252, "step": 656640 }, { "epoch": 49.28360225140713, "grad_norm": 0.5966800451278687, "learning_rate": 0.002828, "loss": 1.0321, "step": 656704 }, { "epoch": 49.2884052532833, "grad_norm": 0.5692200660705566, "learning_rate": 0.002828, "loss": 1.0303, "step": 656768 }, { "epoch": 49.29320825515948, "grad_norm": 0.724468469619751, "learning_rate": 0.002828, "loss": 1.029, "step": 656832 }, { "epoch": 49.29801125703565, "grad_norm": 0.727816641330719, "learning_rate": 0.002828, "loss": 1.027, "step": 656896 }, { "epoch": 49.30281425891182, "grad_norm": 0.6119939684867859, "learning_rate": 0.002828, "loss": 1.0332, "step": 656960 }, { "epoch": 49.30761726078799, "grad_norm": 0.6606029272079468, "learning_rate": 0.002828, "loss": 1.0348, "step": 657024 }, { "epoch": 49.31242026266416, "grad_norm": 0.6363573670387268, "learning_rate": 0.002828, "loss": 1.0233, "step": 657088 }, { "epoch": 49.31722326454034, "grad_norm": 0.6781613230705261, "learning_rate": 0.002828, "loss": 1.0332, "step": 657152 }, { "epoch": 49.32202626641651, "grad_norm": 0.6983715295791626, "learning_rate": 0.002828, "loss": 1.0245, "step": 657216 }, { "epoch": 49.326829268292684, "grad_norm": 0.7369449138641357, "learning_rate": 0.002828, "loss": 1.0301, "step": 657280 }, { "epoch": 49.33163227016885, "grad_norm": 0.6598891615867615, "learning_rate": 0.002828, "loss": 1.0271, "step": 657344 }, { "epoch": 49.33643527204503, "grad_norm": 0.8296746015548706, "learning_rate": 0.002828, "loss": 1.0339, "step": 657408 }, { "epoch": 49.3412382739212, "grad_norm": 0.7491599917411804, "learning_rate": 0.002828, "loss": 1.0335, "step": 657472 }, { "epoch": 49.346041275797376, "grad_norm": 0.7481504082679749, "learning_rate": 0.002828, "loss": 1.0337, "step": 657536 }, { "epoch": 49.350844277673545, "grad_norm": 0.7235509157180786, "learning_rate": 0.002828, "loss": 1.0284, "step": 657600 }, { "epoch": 49.35564727954972, "grad_norm": 0.6248297095298767, "learning_rate": 0.002828, "loss": 1.0358, "step": 657664 }, { "epoch": 49.36045028142589, "grad_norm": 0.717667818069458, "learning_rate": 0.002828, "loss": 1.0325, "step": 657728 }, { "epoch": 49.36525328330207, "grad_norm": 0.6365737915039062, "learning_rate": 0.002828, "loss": 1.0325, "step": 657792 }, { "epoch": 49.37005628517824, "grad_norm": 0.6971416473388672, "learning_rate": 0.002828, "loss": 1.0308, "step": 657856 }, { "epoch": 49.374859287054406, "grad_norm": 0.6160202622413635, "learning_rate": 0.002828, "loss": 1.0295, "step": 657920 }, { "epoch": 49.37966228893058, "grad_norm": 0.5844454765319824, "learning_rate": 0.002828, "loss": 1.0349, "step": 657984 }, { "epoch": 49.38446529080675, "grad_norm": 0.7336181402206421, "learning_rate": 0.002828, "loss": 1.035, "step": 658048 }, { "epoch": 49.38926829268293, "grad_norm": 0.6621116399765015, "learning_rate": 0.002828, "loss": 1.0389, "step": 658112 }, { "epoch": 49.3940712945591, "grad_norm": 0.7765074372291565, "learning_rate": 0.002828, "loss": 1.0363, "step": 658176 }, { "epoch": 49.398874296435274, "grad_norm": 0.6697800159454346, "learning_rate": 0.002828, "loss": 1.0296, "step": 658240 }, { "epoch": 49.40367729831144, "grad_norm": 0.6838088035583496, "learning_rate": 0.002828, "loss": 1.0321, "step": 658304 }, { "epoch": 49.40848030018762, "grad_norm": 0.6083168387413025, "learning_rate": 0.002828, "loss": 1.0368, "step": 658368 }, { "epoch": 49.41328330206379, "grad_norm": 0.7235950231552124, "learning_rate": 0.002828, "loss": 1.0326, "step": 658432 }, { "epoch": 49.418086303939965, "grad_norm": 0.6048862934112549, "learning_rate": 0.002828, "loss": 1.036, "step": 658496 }, { "epoch": 49.422889305816135, "grad_norm": 0.6545407176017761, "learning_rate": 0.002828, "loss": 1.0306, "step": 658560 }, { "epoch": 49.42769230769231, "grad_norm": 0.6188938617706299, "learning_rate": 0.002828, "loss": 1.0363, "step": 658624 }, { "epoch": 49.43249530956848, "grad_norm": 0.5984054207801819, "learning_rate": 0.002828, "loss": 1.0326, "step": 658688 }, { "epoch": 49.43729831144465, "grad_norm": 0.585507333278656, "learning_rate": 0.002828, "loss": 1.0357, "step": 658752 }, { "epoch": 49.442101313320826, "grad_norm": 0.6248952150344849, "learning_rate": 0.002828, "loss": 1.0356, "step": 658816 }, { "epoch": 49.446904315196996, "grad_norm": 0.5708743333816528, "learning_rate": 0.002828, "loss": 1.035, "step": 658880 }, { "epoch": 49.45170731707317, "grad_norm": 0.7817354202270508, "learning_rate": 0.002828, "loss": 1.0371, "step": 658944 }, { "epoch": 49.45651031894934, "grad_norm": 0.6046018600463867, "learning_rate": 0.002828, "loss": 1.0395, "step": 659008 }, { "epoch": 49.46131332082552, "grad_norm": 0.7188746333122253, "learning_rate": 0.002828, "loss": 1.0356, "step": 659072 }, { "epoch": 49.46611632270169, "grad_norm": 0.5960226058959961, "learning_rate": 0.002828, "loss": 1.0383, "step": 659136 }, { "epoch": 49.470919324577864, "grad_norm": 0.6067889928817749, "learning_rate": 0.002828, "loss": 1.0337, "step": 659200 }, { "epoch": 49.47572232645403, "grad_norm": 0.5953211188316345, "learning_rate": 0.002828, "loss": 1.0352, "step": 659264 }, { "epoch": 49.48052532833021, "grad_norm": 0.6781429052352905, "learning_rate": 0.002828, "loss": 1.0365, "step": 659328 }, { "epoch": 49.48532833020638, "grad_norm": 0.765910804271698, "learning_rate": 0.002828, "loss": 1.0387, "step": 659392 }, { "epoch": 49.490131332082555, "grad_norm": 0.6305378675460815, "learning_rate": 0.002828, "loss": 1.0318, "step": 659456 }, { "epoch": 49.494934333958724, "grad_norm": 0.6369653344154358, "learning_rate": 0.002828, "loss": 1.0384, "step": 659520 }, { "epoch": 49.499737335834894, "grad_norm": 0.7140512466430664, "learning_rate": 0.002828, "loss": 1.0365, "step": 659584 }, { "epoch": 49.50454033771107, "grad_norm": 0.640666127204895, "learning_rate": 0.002828, "loss": 1.0358, "step": 659648 }, { "epoch": 49.50934333958724, "grad_norm": 0.601139485836029, "learning_rate": 0.002828, "loss": 1.0361, "step": 659712 }, { "epoch": 49.514146341463416, "grad_norm": 0.5523284673690796, "learning_rate": 0.002828, "loss": 1.0383, "step": 659776 }, { "epoch": 49.518949343339585, "grad_norm": 0.6139888763427734, "learning_rate": 0.002828, "loss": 1.0397, "step": 659840 }, { "epoch": 49.52375234521576, "grad_norm": 0.5877168774604797, "learning_rate": 0.002828, "loss": 1.035, "step": 659904 }, { "epoch": 49.52855534709193, "grad_norm": 0.7172452807426453, "learning_rate": 0.002828, "loss": 1.0365, "step": 659968 }, { "epoch": 49.53335834896811, "grad_norm": 0.6976636052131653, "learning_rate": 0.002828, "loss": 1.0322, "step": 660032 }, { "epoch": 49.53816135084428, "grad_norm": 0.6075940728187561, "learning_rate": 0.002828, "loss": 1.0372, "step": 660096 }, { "epoch": 49.54296435272045, "grad_norm": 0.7370678782463074, "learning_rate": 0.002828, "loss": 1.034, "step": 660160 }, { "epoch": 49.54776735459662, "grad_norm": 0.6457955241203308, "learning_rate": 0.002828, "loss": 1.0433, "step": 660224 }, { "epoch": 49.5525703564728, "grad_norm": 0.6360769867897034, "learning_rate": 0.002828, "loss": 1.0416, "step": 660288 }, { "epoch": 49.55737335834897, "grad_norm": 0.5721134543418884, "learning_rate": 0.002828, "loss": 1.0366, "step": 660352 }, { "epoch": 49.56217636022514, "grad_norm": 0.5355108976364136, "learning_rate": 0.002828, "loss": 1.0366, "step": 660416 }, { "epoch": 49.566979362101314, "grad_norm": 0.6029777526855469, "learning_rate": 0.002828, "loss": 1.037, "step": 660480 }, { "epoch": 49.57178236397748, "grad_norm": 0.6070830821990967, "learning_rate": 0.002828, "loss": 1.0351, "step": 660544 }, { "epoch": 49.57658536585366, "grad_norm": 0.6995278596878052, "learning_rate": 0.002828, "loss": 1.0385, "step": 660608 }, { "epoch": 49.58138836772983, "grad_norm": 0.6078864932060242, "learning_rate": 0.002828, "loss": 1.0363, "step": 660672 }, { "epoch": 49.586191369606006, "grad_norm": 0.6785514950752258, "learning_rate": 0.002828, "loss": 1.0382, "step": 660736 }, { "epoch": 49.590994371482175, "grad_norm": 0.6332456469535828, "learning_rate": 0.002828, "loss": 1.0358, "step": 660800 }, { "epoch": 49.59579737335835, "grad_norm": 0.638705849647522, "learning_rate": 0.002828, "loss": 1.0395, "step": 660864 }, { "epoch": 49.60060037523452, "grad_norm": 0.6583680510520935, "learning_rate": 0.002828, "loss": 1.0365, "step": 660928 }, { "epoch": 49.6054033771107, "grad_norm": 0.6483926773071289, "learning_rate": 0.002828, "loss": 1.0354, "step": 660992 }, { "epoch": 49.61020637898687, "grad_norm": 0.6064408421516418, "learning_rate": 0.002828, "loss": 1.0344, "step": 661056 }, { "epoch": 49.615009380863036, "grad_norm": 0.7157003283500671, "learning_rate": 0.002828, "loss": 1.0425, "step": 661120 }, { "epoch": 49.61981238273921, "grad_norm": 0.5999187231063843, "learning_rate": 0.002828, "loss": 1.0358, "step": 661184 }, { "epoch": 49.62461538461538, "grad_norm": 0.5730012655258179, "learning_rate": 0.002828, "loss": 1.037, "step": 661248 }, { "epoch": 49.62941838649156, "grad_norm": 0.6842864155769348, "learning_rate": 0.002828, "loss": 1.0373, "step": 661312 }, { "epoch": 49.63422138836773, "grad_norm": 0.6744268536567688, "learning_rate": 0.002828, "loss": 1.0338, "step": 661376 }, { "epoch": 49.639024390243904, "grad_norm": 0.6678750514984131, "learning_rate": 0.002828, "loss": 1.0405, "step": 661440 }, { "epoch": 49.64382739212007, "grad_norm": 0.5240787267684937, "learning_rate": 0.002828, "loss": 1.0375, "step": 661504 }, { "epoch": 49.64863039399625, "grad_norm": 0.6266018152236938, "learning_rate": 0.002828, "loss": 1.0392, "step": 661568 }, { "epoch": 49.65343339587242, "grad_norm": 0.6554743647575378, "learning_rate": 0.002828, "loss": 1.0417, "step": 661632 }, { "epoch": 49.658236397748595, "grad_norm": 0.5542384386062622, "learning_rate": 0.002828, "loss": 1.0337, "step": 661696 }, { "epoch": 49.663039399624765, "grad_norm": 0.6251294612884521, "learning_rate": 0.002828, "loss": 1.0389, "step": 661760 }, { "epoch": 49.66784240150094, "grad_norm": 0.5415288805961609, "learning_rate": 0.002828, "loss": 1.0464, "step": 661824 }, { "epoch": 49.67264540337711, "grad_norm": 0.7915424108505249, "learning_rate": 0.002828, "loss": 1.0446, "step": 661888 }, { "epoch": 49.67744840525328, "grad_norm": 0.5885140299797058, "learning_rate": 0.002828, "loss": 1.0421, "step": 661952 }, { "epoch": 49.682251407129456, "grad_norm": 0.6221455931663513, "learning_rate": 0.002828, "loss": 1.0386, "step": 662016 }, { "epoch": 49.687054409005626, "grad_norm": 0.8090603947639465, "learning_rate": 0.002828, "loss": 1.0413, "step": 662080 }, { "epoch": 49.6918574108818, "grad_norm": 0.6631290912628174, "learning_rate": 0.002828, "loss": 1.0358, "step": 662144 }, { "epoch": 49.69666041275797, "grad_norm": 0.5944517254829407, "learning_rate": 0.002828, "loss": 1.0397, "step": 662208 }, { "epoch": 49.70146341463415, "grad_norm": 0.560357391834259, "learning_rate": 0.002828, "loss": 1.0444, "step": 662272 }, { "epoch": 49.70626641651032, "grad_norm": 0.5998170971870422, "learning_rate": 0.002828, "loss": 1.0386, "step": 662336 }, { "epoch": 49.711069418386494, "grad_norm": 0.5775870084762573, "learning_rate": 0.002828, "loss": 1.0447, "step": 662400 }, { "epoch": 49.71587242026266, "grad_norm": 0.6063507199287415, "learning_rate": 0.002828, "loss": 1.0406, "step": 662464 }, { "epoch": 49.72067542213884, "grad_norm": 0.624538779258728, "learning_rate": 0.002828, "loss": 1.0397, "step": 662528 }, { "epoch": 49.72547842401501, "grad_norm": 0.6854320168495178, "learning_rate": 0.002828, "loss": 1.0418, "step": 662592 }, { "epoch": 49.730281425891185, "grad_norm": 0.7011392116546631, "learning_rate": 0.002828, "loss": 1.0439, "step": 662656 }, { "epoch": 49.735084427767354, "grad_norm": 0.6861682534217834, "learning_rate": 0.002828, "loss": 1.0411, "step": 662720 }, { "epoch": 49.739887429643524, "grad_norm": 0.5691295266151428, "learning_rate": 0.002828, "loss": 1.0449, "step": 662784 }, { "epoch": 49.7446904315197, "grad_norm": 0.6943109631538391, "learning_rate": 0.002828, "loss": 1.0402, "step": 662848 }, { "epoch": 49.74949343339587, "grad_norm": 0.6759951710700989, "learning_rate": 0.002828, "loss": 1.045, "step": 662912 }, { "epoch": 49.754296435272046, "grad_norm": 0.5847229361534119, "learning_rate": 0.002828, "loss": 1.0439, "step": 662976 }, { "epoch": 49.759099437148215, "grad_norm": 0.6311315894126892, "learning_rate": 0.002828, "loss": 1.0403, "step": 663040 }, { "epoch": 49.76390243902439, "grad_norm": 0.5579125285148621, "learning_rate": 0.002828, "loss": 1.0427, "step": 663104 }, { "epoch": 49.76870544090056, "grad_norm": 0.6787163019180298, "learning_rate": 0.002828, "loss": 1.0425, "step": 663168 }, { "epoch": 49.77350844277674, "grad_norm": 0.7048472166061401, "learning_rate": 0.002828, "loss": 1.0473, "step": 663232 }, { "epoch": 49.77831144465291, "grad_norm": 0.5743940472602844, "learning_rate": 0.002828, "loss": 1.0452, "step": 663296 }, { "epoch": 49.78311444652908, "grad_norm": 0.6513806581497192, "learning_rate": 0.002828, "loss": 1.0351, "step": 663360 }, { "epoch": 49.78791744840525, "grad_norm": 0.6481301188468933, "learning_rate": 0.002828, "loss": 1.0443, "step": 663424 }, { "epoch": 49.79272045028143, "grad_norm": 0.6559600234031677, "learning_rate": 0.002828, "loss": 1.0474, "step": 663488 }, { "epoch": 49.7975234521576, "grad_norm": 0.571797251701355, "learning_rate": 0.002828, "loss": 1.0481, "step": 663552 }, { "epoch": 49.80232645403377, "grad_norm": 0.6878950595855713, "learning_rate": 0.002828, "loss": 1.0432, "step": 663616 }, { "epoch": 49.807129455909944, "grad_norm": 0.5944818258285522, "learning_rate": 0.002828, "loss": 1.0453, "step": 663680 }, { "epoch": 49.811932457786114, "grad_norm": 0.7458009123802185, "learning_rate": 0.002828, "loss": 1.0421, "step": 663744 }, { "epoch": 49.81673545966229, "grad_norm": 0.6427059769630432, "learning_rate": 0.002828, "loss": 1.0408, "step": 663808 }, { "epoch": 49.82153846153846, "grad_norm": 0.5951507091522217, "learning_rate": 0.002828, "loss": 1.0432, "step": 663872 }, { "epoch": 49.826341463414636, "grad_norm": 0.6637440919876099, "learning_rate": 0.002828, "loss": 1.0419, "step": 663936 }, { "epoch": 49.831144465290805, "grad_norm": 0.6890386343002319, "learning_rate": 0.002828, "loss": 1.0393, "step": 664000 }, { "epoch": 49.83594746716698, "grad_norm": 0.5513873100280762, "learning_rate": 0.002828, "loss": 1.0435, "step": 664064 }, { "epoch": 49.84075046904315, "grad_norm": 0.5582188963890076, "learning_rate": 0.002828, "loss": 1.0426, "step": 664128 }, { "epoch": 49.84555347091933, "grad_norm": 0.7070587873458862, "learning_rate": 0.002828, "loss": 1.0469, "step": 664192 }, { "epoch": 49.8503564727955, "grad_norm": 0.6251951456069946, "learning_rate": 0.002828, "loss": 1.0463, "step": 664256 }, { "epoch": 49.85515947467167, "grad_norm": 0.7980432510375977, "learning_rate": 0.002828, "loss": 1.046, "step": 664320 }, { "epoch": 49.85996247654784, "grad_norm": 0.5912220478057861, "learning_rate": 0.002828, "loss": 1.0438, "step": 664384 }, { "epoch": 49.86476547842401, "grad_norm": 0.651661217212677, "learning_rate": 0.002828, "loss": 1.0367, "step": 664448 }, { "epoch": 49.86956848030019, "grad_norm": 0.5960105657577515, "learning_rate": 0.002828, "loss": 1.0428, "step": 664512 }, { "epoch": 49.87437148217636, "grad_norm": 0.6765246987342834, "learning_rate": 0.002828, "loss": 1.043, "step": 664576 }, { "epoch": 49.879174484052534, "grad_norm": 0.6367417573928833, "learning_rate": 0.002828, "loss": 1.0459, "step": 664640 }, { "epoch": 49.8839774859287, "grad_norm": 0.6240684986114502, "learning_rate": 0.002828, "loss": 1.0473, "step": 664704 }, { "epoch": 49.88878048780488, "grad_norm": 0.5741950869560242, "learning_rate": 0.002828, "loss": 1.044, "step": 664768 }, { "epoch": 49.89358348968105, "grad_norm": 0.6683483123779297, "learning_rate": 0.002828, "loss": 1.0451, "step": 664832 }, { "epoch": 49.898386491557225, "grad_norm": 0.633254885673523, "learning_rate": 0.002828, "loss": 1.0447, "step": 664896 }, { "epoch": 49.903189493433395, "grad_norm": 0.7444667220115662, "learning_rate": 0.002828, "loss": 1.0422, "step": 664960 }, { "epoch": 49.90799249530957, "grad_norm": 0.5886372923851013, "learning_rate": 0.002828, "loss": 1.0428, "step": 665024 }, { "epoch": 49.91279549718574, "grad_norm": 0.595720112323761, "learning_rate": 0.002828, "loss": 1.0499, "step": 665088 }, { "epoch": 49.91759849906192, "grad_norm": 0.6207606792449951, "learning_rate": 0.002828, "loss": 1.0417, "step": 665152 }, { "epoch": 49.922401500938086, "grad_norm": 0.5864999890327454, "learning_rate": 0.002828, "loss": 1.0456, "step": 665216 }, { "epoch": 49.927204502814256, "grad_norm": 0.7752425670623779, "learning_rate": 0.002828, "loss": 1.0438, "step": 665280 }, { "epoch": 49.93200750469043, "grad_norm": 0.552574634552002, "learning_rate": 0.002828, "loss": 1.0461, "step": 665344 }, { "epoch": 49.9368105065666, "grad_norm": 0.576705813407898, "learning_rate": 0.002828, "loss": 1.0453, "step": 665408 }, { "epoch": 49.94161350844278, "grad_norm": 0.6199607849121094, "learning_rate": 0.002828, "loss": 1.049, "step": 665472 }, { "epoch": 49.94641651031895, "grad_norm": 0.6204496622085571, "learning_rate": 0.002828, "loss": 1.044, "step": 665536 }, { "epoch": 49.951219512195124, "grad_norm": 0.5873743891716003, "learning_rate": 0.002828, "loss": 1.043, "step": 665600 }, { "epoch": 49.95602251407129, "grad_norm": 0.8199127316474915, "learning_rate": 0.002828, "loss": 1.0496, "step": 665664 }, { "epoch": 49.96082551594747, "grad_norm": 0.5267244577407837, "learning_rate": 0.002828, "loss": 1.0469, "step": 665728 }, { "epoch": 49.96562851782364, "grad_norm": 0.6444991827011108, "learning_rate": 0.002828, "loss": 1.0432, "step": 665792 }, { "epoch": 49.970431519699815, "grad_norm": 0.7863524556159973, "learning_rate": 0.002828, "loss": 1.0399, "step": 665856 }, { "epoch": 49.975234521575985, "grad_norm": 0.619107186794281, "learning_rate": 0.002828, "loss": 1.0424, "step": 665920 }, { "epoch": 49.98003752345216, "grad_norm": 0.6243489384651184, "learning_rate": 0.002828, "loss": 1.0404, "step": 665984 }, { "epoch": 49.98484052532833, "grad_norm": 0.6714879870414734, "learning_rate": 0.002828, "loss": 1.0467, "step": 666048 }, { "epoch": 49.9896435272045, "grad_norm": 0.6690127849578857, "learning_rate": 0.002828, "loss": 1.0477, "step": 666112 }, { "epoch": 49.994446529080676, "grad_norm": 0.6523921489715576, "learning_rate": 0.002828, "loss": 1.0537, "step": 666176 }, { "epoch": 49.999249530956845, "grad_norm": 0.7129687070846558, "learning_rate": 0.002828, "loss": 1.0488, "step": 666240 } ], "logging_steps": 64, "max_steps": 666250, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 320, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.313475466887168e+19, "train_batch_size": 200, "trial_name": null, "trial_params": null }