| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9997595575859581, | |
| "eval_steps": 500, | |
| "global_step": 12476, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0016029494269455798, | |
| "grad_norm": 5.699583530426025, | |
| "learning_rate": 1.6025641025641025e-07, | |
| "loss": 0.5515, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0032058988538911596, | |
| "grad_norm": 5.793712615966797, | |
| "learning_rate": 3.205128205128205e-07, | |
| "loss": 0.545, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.004808848280836739, | |
| "grad_norm": 4.68794059753418, | |
| "learning_rate": 4.807692307692308e-07, | |
| "loss": 0.525, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.006411797707782319, | |
| "grad_norm": 3.254547595977783, | |
| "learning_rate": 6.41025641025641e-07, | |
| "loss": 0.4268, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0080147471347279, | |
| "grad_norm": 2.2941272258758545, | |
| "learning_rate": 8.012820512820515e-07, | |
| "loss": 0.3266, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.009617696561673479, | |
| "grad_norm": 1.5437036752700806, | |
| "learning_rate": 9.615384615384617e-07, | |
| "loss": 0.2447, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01122064598861906, | |
| "grad_norm": 0.6866864562034607, | |
| "learning_rate": 1.121794871794872e-06, | |
| "loss": 0.1701, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.012823595415564638, | |
| "grad_norm": 0.5046991109848022, | |
| "learning_rate": 1.282051282051282e-06, | |
| "loss": 0.1366, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.014426544842510219, | |
| "grad_norm": 0.4317735433578491, | |
| "learning_rate": 1.4423076923076922e-06, | |
| "loss": 0.1178, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0160294942694558, | |
| "grad_norm": 0.39152026176452637, | |
| "learning_rate": 1.602564102564103e-06, | |
| "loss": 0.1008, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01763244369640138, | |
| "grad_norm": 0.3579396605491638, | |
| "learning_rate": 1.7628205128205131e-06, | |
| "loss": 0.0927, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.019235393123346958, | |
| "grad_norm": 0.3451220393180847, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 0.0885, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02083834255029254, | |
| "grad_norm": 0.32997310161590576, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "loss": 0.0787, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02244129197723812, | |
| "grad_norm": 0.31072285771369934, | |
| "learning_rate": 2.243589743589744e-06, | |
| "loss": 0.0736, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.024044241404183698, | |
| "grad_norm": 0.3469359874725342, | |
| "learning_rate": 2.403846153846154e-06, | |
| "loss": 0.0731, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.025647190831129277, | |
| "grad_norm": 0.29716140031814575, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 0.067, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.02725014025807486, | |
| "grad_norm": 0.3381027281284332, | |
| "learning_rate": 2.7243589743589744e-06, | |
| "loss": 0.0628, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.028853089685020438, | |
| "grad_norm": 0.31842488050460815, | |
| "learning_rate": 2.8846153846153845e-06, | |
| "loss": 0.0608, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.030456039111966017, | |
| "grad_norm": 0.31226950883865356, | |
| "learning_rate": 3.044871794871795e-06, | |
| "loss": 0.0642, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0320589885389116, | |
| "grad_norm": 0.29508596658706665, | |
| "learning_rate": 3.205128205128206e-06, | |
| "loss": 0.0556, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.033661937965857175, | |
| "grad_norm": 0.33342409133911133, | |
| "learning_rate": 3.365384615384616e-06, | |
| "loss": 0.0542, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03526488739280276, | |
| "grad_norm": 0.32582512497901917, | |
| "learning_rate": 3.5256410256410263e-06, | |
| "loss": 0.0513, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.03686783681974834, | |
| "grad_norm": 0.30706045031547546, | |
| "learning_rate": 3.6858974358974363e-06, | |
| "loss": 0.0533, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.038470786246693915, | |
| "grad_norm": 0.3309493064880371, | |
| "learning_rate": 3.846153846153847e-06, | |
| "loss": 0.0494, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0400737356736395, | |
| "grad_norm": 0.2860076129436493, | |
| "learning_rate": 4.006410256410257e-06, | |
| "loss": 0.0489, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04167668510058508, | |
| "grad_norm": 0.26025334000587463, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.0456, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.043279634527530655, | |
| "grad_norm": 1.0466290712356567, | |
| "learning_rate": 4.326923076923077e-06, | |
| "loss": 0.0475, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04488258395447624, | |
| "grad_norm": 0.31393590569496155, | |
| "learning_rate": 4.487179487179488e-06, | |
| "loss": 0.0484, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04648553338142181, | |
| "grad_norm": 0.2957313060760498, | |
| "learning_rate": 4.647435897435898e-06, | |
| "loss": 0.0441, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.048088482808367396, | |
| "grad_norm": 0.2924838066101074, | |
| "learning_rate": 4.807692307692308e-06, | |
| "loss": 0.0433, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.04969143223531298, | |
| "grad_norm": 0.2568668723106384, | |
| "learning_rate": 4.967948717948718e-06, | |
| "loss": 0.0457, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.051294381662258554, | |
| "grad_norm": 0.3006618320941925, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 0.0436, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.052897331089204136, | |
| "grad_norm": 0.23940995335578918, | |
| "learning_rate": 5.288461538461539e-06, | |
| "loss": 0.0408, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05450028051614972, | |
| "grad_norm": 0.30063626170158386, | |
| "learning_rate": 5.448717948717949e-06, | |
| "loss": 0.0409, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.056103229943095294, | |
| "grad_norm": 0.2716442048549652, | |
| "learning_rate": 5.608974358974359e-06, | |
| "loss": 0.0392, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.057706179370040876, | |
| "grad_norm": 0.27399641275405884, | |
| "learning_rate": 5.769230769230769e-06, | |
| "loss": 0.0375, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.05930912879698645, | |
| "grad_norm": 0.252614825963974, | |
| "learning_rate": 5.92948717948718e-06, | |
| "loss": 0.0378, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.060912078223932034, | |
| "grad_norm": 0.2915278971195221, | |
| "learning_rate": 6.08974358974359e-06, | |
| "loss": 0.0382, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06251502765087762, | |
| "grad_norm": 0.2674607038497925, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.0376, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.0641179770778232, | |
| "grad_norm": 0.23759065568447113, | |
| "learning_rate": 6.410256410256412e-06, | |
| "loss": 0.0376, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06572092650476878, | |
| "grad_norm": 0.28716912865638733, | |
| "learning_rate": 6.570512820512821e-06, | |
| "loss": 0.0368, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06732387593171435, | |
| "grad_norm": 0.23370076715946198, | |
| "learning_rate": 6.730769230769232e-06, | |
| "loss": 0.0353, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.06892682535865993, | |
| "grad_norm": 0.24973611533641815, | |
| "learning_rate": 6.891025641025641e-06, | |
| "loss": 0.0345, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07052977478560551, | |
| "grad_norm": 0.2501871585845947, | |
| "learning_rate": 7.051282051282053e-06, | |
| "loss": 0.0353, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.0721327242125511, | |
| "grad_norm": 0.25735384225845337, | |
| "learning_rate": 7.211538461538462e-06, | |
| "loss": 0.0343, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07373567363949668, | |
| "grad_norm": 0.21811848878860474, | |
| "learning_rate": 7.371794871794873e-06, | |
| "loss": 0.0347, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.07533862306644225, | |
| "grad_norm": 0.25927454233169556, | |
| "learning_rate": 7.532051282051282e-06, | |
| "loss": 0.0334, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07694157249338783, | |
| "grad_norm": 0.20881901681423187, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.0315, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.07854452192033341, | |
| "grad_norm": 0.22877703607082367, | |
| "learning_rate": 7.852564102564102e-06, | |
| "loss": 0.0316, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.080147471347279, | |
| "grad_norm": 0.23380960524082184, | |
| "learning_rate": 8.012820512820515e-06, | |
| "loss": 0.0319, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08175042077422458, | |
| "grad_norm": 0.2707521915435791, | |
| "learning_rate": 8.173076923076923e-06, | |
| "loss": 0.0329, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08335337020117016, | |
| "grad_norm": 0.19819176197052002, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0306, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.08495631962811573, | |
| "grad_norm": 0.2094683200120926, | |
| "learning_rate": 8.493589743589744e-06, | |
| "loss": 0.0314, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.08655926905506131, | |
| "grad_norm": 0.21663790941238403, | |
| "learning_rate": 8.653846153846155e-06, | |
| "loss": 0.033, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.0881622184820069, | |
| "grad_norm": 0.2188635766506195, | |
| "learning_rate": 8.814102564102565e-06, | |
| "loss": 0.0302, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.08976516790895248, | |
| "grad_norm": 0.2224225401878357, | |
| "learning_rate": 8.974358974358976e-06, | |
| "loss": 0.0305, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09136811733589806, | |
| "grad_norm": 0.2301451414823532, | |
| "learning_rate": 9.134615384615384e-06, | |
| "loss": 0.0282, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.09297106676284363, | |
| "grad_norm": 0.21547645330429077, | |
| "learning_rate": 9.294871794871796e-06, | |
| "loss": 0.0314, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.09457401618978921, | |
| "grad_norm": 0.23693452775478363, | |
| "learning_rate": 9.455128205128205e-06, | |
| "loss": 0.0301, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.09617696561673479, | |
| "grad_norm": 0.21754775941371918, | |
| "learning_rate": 9.615384615384616e-06, | |
| "loss": 0.0315, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09777991504368037, | |
| "grad_norm": 0.17971888184547424, | |
| "learning_rate": 9.775641025641026e-06, | |
| "loss": 0.0276, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.09938286447062596, | |
| "grad_norm": 0.2265903502702713, | |
| "learning_rate": 9.935897435897437e-06, | |
| "loss": 0.0287, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.10098581389757152, | |
| "grad_norm": 0.2187529057264328, | |
| "learning_rate": 1.0096153846153847e-05, | |
| "loss": 0.0271, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.10258876332451711, | |
| "grad_norm": 0.20468100905418396, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 0.0279, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.10419171275146269, | |
| "grad_norm": 0.2146177440881729, | |
| "learning_rate": 1.0416666666666668e-05, | |
| "loss": 0.0265, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.10579466217840827, | |
| "grad_norm": 0.21035543084144592, | |
| "learning_rate": 1.0576923076923078e-05, | |
| "loss": 0.0284, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.10739761160535385, | |
| "grad_norm": 0.20882540941238403, | |
| "learning_rate": 1.0737179487179487e-05, | |
| "loss": 0.0287, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.10900056103229944, | |
| "grad_norm": 0.18467697501182556, | |
| "learning_rate": 1.0897435897435898e-05, | |
| "loss": 0.0267, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.110603510459245, | |
| "grad_norm": 0.7568922638893127, | |
| "learning_rate": 1.105769230769231e-05, | |
| "loss": 0.0376, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11220645988619059, | |
| "grad_norm": 0.23849329352378845, | |
| "learning_rate": 1.1217948717948719e-05, | |
| "loss": 0.0317, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11380940931313617, | |
| "grad_norm": 0.20443181693553925, | |
| "learning_rate": 1.1378205128205129e-05, | |
| "loss": 0.0307, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.11541235874008175, | |
| "grad_norm": 0.18542100489139557, | |
| "learning_rate": 1.1538461538461538e-05, | |
| "loss": 0.026, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.11701530816702733, | |
| "grad_norm": 0.241206094622612, | |
| "learning_rate": 1.169871794871795e-05, | |
| "loss": 0.0308, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.1186182575939729, | |
| "grad_norm": 0.17955875396728516, | |
| "learning_rate": 1.185897435897436e-05, | |
| "loss": 0.0251, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.12022120702091849, | |
| "grad_norm": 0.1853230744600296, | |
| "learning_rate": 1.201923076923077e-05, | |
| "loss": 0.0258, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.12182415644786407, | |
| "grad_norm": 0.16425946354866028, | |
| "learning_rate": 1.217948717948718e-05, | |
| "loss": 0.0252, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.12342710587480965, | |
| "grad_norm": 0.18707670271396637, | |
| "learning_rate": 1.2339743589743592e-05, | |
| "loss": 0.0245, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.12503005530175523, | |
| "grad_norm": 0.2019442468881607, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0268, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.12663300472870082, | |
| "grad_norm": 0.16195464134216309, | |
| "learning_rate": 1.2660256410256411e-05, | |
| "loss": 0.0254, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.1282359541556464, | |
| "grad_norm": 0.16929590702056885, | |
| "learning_rate": 1.2820512820512823e-05, | |
| "loss": 0.025, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12983890358259198, | |
| "grad_norm": 0.1814528852701187, | |
| "learning_rate": 1.2980769230769232e-05, | |
| "loss": 0.0254, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.13144185300953756, | |
| "grad_norm": 0.20797356963157654, | |
| "learning_rate": 1.3141025641025642e-05, | |
| "loss": 0.0253, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.13304480243648312, | |
| "grad_norm": 0.18418587744235992, | |
| "learning_rate": 1.3301282051282051e-05, | |
| "loss": 0.0267, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.1346477518634287, | |
| "grad_norm": 0.16782532632350922, | |
| "learning_rate": 1.3461538461538463e-05, | |
| "loss": 0.0244, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.13625070129037428, | |
| "grad_norm": 0.16734585165977478, | |
| "learning_rate": 1.3621794871794874e-05, | |
| "loss": 0.0238, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.13785365071731986, | |
| "grad_norm": 0.15977711975574493, | |
| "learning_rate": 1.3782051282051283e-05, | |
| "loss": 0.0241, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.13945660014426545, | |
| "grad_norm": 0.20185428857803345, | |
| "learning_rate": 1.3942307692307693e-05, | |
| "loss": 0.0232, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.14105954957121103, | |
| "grad_norm": 0.1522301733493805, | |
| "learning_rate": 1.4102564102564105e-05, | |
| "loss": 0.0247, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.1426624989981566, | |
| "grad_norm": 0.15999549627304077, | |
| "learning_rate": 1.4262820512820514e-05, | |
| "loss": 0.0253, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.1442654484251022, | |
| "grad_norm": 0.16568179428577423, | |
| "learning_rate": 1.4423076923076924e-05, | |
| "loss": 0.025, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.14586839785204778, | |
| "grad_norm": 0.18805263936519623, | |
| "learning_rate": 1.4583333333333333e-05, | |
| "loss": 0.0231, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.14747134727899336, | |
| "grad_norm": 0.17499662935733795, | |
| "learning_rate": 1.4743589743589745e-05, | |
| "loss": 0.0236, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.14907429670593894, | |
| "grad_norm": 0.16887474060058594, | |
| "learning_rate": 1.4903846153846156e-05, | |
| "loss": 0.0239, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.1506772461328845, | |
| "grad_norm": 0.1816807985305786, | |
| "learning_rate": 1.5064102564102565e-05, | |
| "loss": 0.0236, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.15228019555983008, | |
| "grad_norm": 0.16164837777614594, | |
| "learning_rate": 1.5224358974358975e-05, | |
| "loss": 0.0245, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.15388314498677566, | |
| "grad_norm": 0.16409705579280853, | |
| "learning_rate": 1.5384615384615387e-05, | |
| "loss": 0.0228, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.15548609441372124, | |
| "grad_norm": 0.18589679896831512, | |
| "learning_rate": 1.5544871794871796e-05, | |
| "loss": 0.025, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.15708904384066683, | |
| "grad_norm": 0.14719834923744202, | |
| "learning_rate": 1.5705128205128205e-05, | |
| "loss": 0.0243, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.1586919932676124, | |
| "grad_norm": 0.1489681452512741, | |
| "learning_rate": 1.5865384615384617e-05, | |
| "loss": 0.0241, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.160294942694558, | |
| "grad_norm": 0.14481011033058167, | |
| "learning_rate": 1.602564102564103e-05, | |
| "loss": 0.0235, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16189789212150357, | |
| "grad_norm": 0.19330425560474396, | |
| "learning_rate": 1.6185897435897438e-05, | |
| "loss": 0.0243, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.16350084154844916, | |
| "grad_norm": 0.14850366115570068, | |
| "learning_rate": 1.6346153846153847e-05, | |
| "loss": 0.0218, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.16510379097539474, | |
| "grad_norm": 0.2127213478088379, | |
| "learning_rate": 1.6506410256410255e-05, | |
| "loss": 0.0219, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.16670674040234032, | |
| "grad_norm": 0.15298931300640106, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0206, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.16830968982928587, | |
| "grad_norm": 0.22085171937942505, | |
| "learning_rate": 1.682692307692308e-05, | |
| "loss": 0.0213, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.16991263925623146, | |
| "grad_norm": 0.17538835108280182, | |
| "learning_rate": 1.698717948717949e-05, | |
| "loss": 0.0215, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.17151558868317704, | |
| "grad_norm": 0.15932999551296234, | |
| "learning_rate": 1.7147435897435897e-05, | |
| "loss": 0.021, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.17311853811012262, | |
| "grad_norm": 0.163809135556221, | |
| "learning_rate": 1.730769230769231e-05, | |
| "loss": 0.0208, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.1747214875370682, | |
| "grad_norm": 0.13327869772911072, | |
| "learning_rate": 1.7467948717948718e-05, | |
| "loss": 0.0213, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.1763244369640138, | |
| "grad_norm": 0.15784206986427307, | |
| "learning_rate": 1.762820512820513e-05, | |
| "loss": 0.0224, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.17792738639095937, | |
| "grad_norm": 0.13589483499526978, | |
| "learning_rate": 1.778846153846154e-05, | |
| "loss": 0.021, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.17953033581790495, | |
| "grad_norm": 0.14331687986850739, | |
| "learning_rate": 1.794871794871795e-05, | |
| "loss": 0.0202, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.18113328524485053, | |
| "grad_norm": 0.1520327925682068, | |
| "learning_rate": 1.810897435897436e-05, | |
| "loss": 0.0201, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.18273623467179612, | |
| "grad_norm": 0.154808908700943, | |
| "learning_rate": 1.826923076923077e-05, | |
| "loss": 0.0209, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.1843391840987417, | |
| "grad_norm": 0.13862183690071106, | |
| "learning_rate": 1.842948717948718e-05, | |
| "loss": 0.0213, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.18594213352568725, | |
| "grad_norm": 0.1722225844860077, | |
| "learning_rate": 1.8589743589743593e-05, | |
| "loss": 0.0186, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.18754508295263284, | |
| "grad_norm": 0.1655365228652954, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.0209, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.18914803237957842, | |
| "grad_norm": 0.20939995348453522, | |
| "learning_rate": 1.891025641025641e-05, | |
| "loss": 0.0211, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.190750981806524, | |
| "grad_norm": 0.17024146020412445, | |
| "learning_rate": 1.9070512820512823e-05, | |
| "loss": 0.0213, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.19235393123346958, | |
| "grad_norm": 0.14322948455810547, | |
| "learning_rate": 1.923076923076923e-05, | |
| "loss": 0.0202, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.19395688066041517, | |
| "grad_norm": 0.15687131881713867, | |
| "learning_rate": 1.9391025641025644e-05, | |
| "loss": 0.0191, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.19555983008736075, | |
| "grad_norm": 0.169046089053154, | |
| "learning_rate": 1.9551282051282052e-05, | |
| "loss": 0.0221, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.19716277951430633, | |
| "grad_norm": 0.15385688841342926, | |
| "learning_rate": 1.9711538461538465e-05, | |
| "loss": 0.0201, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.1987657289412519, | |
| "grad_norm": 0.15540941059589386, | |
| "learning_rate": 1.9871794871794873e-05, | |
| "loss": 0.02, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2003686783681975, | |
| "grad_norm": 0.15357881784439087, | |
| "learning_rate": 1.9999998434240984e-05, | |
| "loss": 0.0201, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.20197162779514305, | |
| "grad_norm": 0.7511164546012878, | |
| "learning_rate": 1.9999943632726828e-05, | |
| "loss": 0.021, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.20357457722208863, | |
| "grad_norm": 0.1908695548772812, | |
| "learning_rate": 1.999981054375207e-05, | |
| "loss": 0.0261, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.20517752664903421, | |
| "grad_norm": 0.17630814015865326, | |
| "learning_rate": 1.999959916835864e-05, | |
| "loss": 0.0211, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2067804760759798, | |
| "grad_norm": 0.16517992317676544, | |
| "learning_rate": 1.9999309508201362e-05, | |
| "loss": 0.023, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.20838342550292538, | |
| "grad_norm": 0.14057657122612, | |
| "learning_rate": 1.999894156554791e-05, | |
| "loss": 0.0205, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.20998637492987096, | |
| "grad_norm": 0.17232050001621246, | |
| "learning_rate": 1.9998495343278833e-05, | |
| "loss": 0.0201, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.21158932435681654, | |
| "grad_norm": 0.1483970731496811, | |
| "learning_rate": 1.9997970844887513e-05, | |
| "loss": 0.0193, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.21319227378376213, | |
| "grad_norm": 0.11647937446832657, | |
| "learning_rate": 1.9997368074480137e-05, | |
| "loss": 0.0192, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.2147952232107077, | |
| "grad_norm": 0.12476928532123566, | |
| "learning_rate": 1.9996687036775672e-05, | |
| "loss": 0.0199, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.2163981726376533, | |
| "grad_norm": 0.13054953515529633, | |
| "learning_rate": 1.9995927737105818e-05, | |
| "loss": 0.0189, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.21800112206459887, | |
| "grad_norm": 0.11871975660324097, | |
| "learning_rate": 1.9995090181414973e-05, | |
| "loss": 0.0202, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.21960407149154443, | |
| "grad_norm": 0.13187278807163239, | |
| "learning_rate": 1.9994174376260175e-05, | |
| "loss": 0.0184, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.22120702091849, | |
| "grad_norm": 0.12306849658489227, | |
| "learning_rate": 1.9993180328811084e-05, | |
| "loss": 0.019, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.2228099703454356, | |
| "grad_norm": 0.12421499937772751, | |
| "learning_rate": 1.9992108046849883e-05, | |
| "loss": 0.0196, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.22441291977238118, | |
| "grad_norm": 0.11667071282863617, | |
| "learning_rate": 1.9990957538771242e-05, | |
| "loss": 0.0197, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.22601586919932676, | |
| "grad_norm": 0.1250181496143341, | |
| "learning_rate": 1.998972881358225e-05, | |
| "loss": 0.0196, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.22761881862627234, | |
| "grad_norm": 0.11087560653686523, | |
| "learning_rate": 1.9988421880902336e-05, | |
| "loss": 0.0188, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.22922176805321792, | |
| "grad_norm": 0.13298539817333221, | |
| "learning_rate": 1.99870367509632e-05, | |
| "loss": 0.0193, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.2308247174801635, | |
| "grad_norm": 0.15178672969341278, | |
| "learning_rate": 1.998557343460874e-05, | |
| "loss": 0.0195, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.2324276669071091, | |
| "grad_norm": 0.13834117352962494, | |
| "learning_rate": 1.9984031943294947e-05, | |
| "loss": 0.0183, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.23403061633405467, | |
| "grad_norm": 0.13149769604206085, | |
| "learning_rate": 1.9982412289089837e-05, | |
| "loss": 0.0184, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.23563356576100025, | |
| "grad_norm": 0.1371048241853714, | |
| "learning_rate": 1.998071448467334e-05, | |
| "loss": 0.0197, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.2372365151879458, | |
| "grad_norm": 0.10099935531616211, | |
| "learning_rate": 1.9978938543337212e-05, | |
| "loss": 0.0195, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.2388394646148914, | |
| "grad_norm": 0.1115645244717598, | |
| "learning_rate": 1.9977084478984926e-05, | |
| "loss": 0.017, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.24044241404183697, | |
| "grad_norm": 0.147051602602005, | |
| "learning_rate": 1.997515230613156e-05, | |
| "loss": 0.018, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24204536346878255, | |
| "grad_norm": 0.1126634031534195, | |
| "learning_rate": 1.997314203990369e-05, | |
| "loss": 0.0182, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.24364831289572814, | |
| "grad_norm": 0.11680582165718079, | |
| "learning_rate": 1.9971053696039273e-05, | |
| "loss": 0.0171, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.24525126232267372, | |
| "grad_norm": 0.12330590188503265, | |
| "learning_rate": 1.996888729088751e-05, | |
| "loss": 0.0178, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.2468542117496193, | |
| "grad_norm": 0.11050969362258911, | |
| "learning_rate": 1.9966642841408738e-05, | |
| "loss": 0.0169, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.24845716117656488, | |
| "grad_norm": 0.12690754234790802, | |
| "learning_rate": 1.9964320365174273e-05, | |
| "loss": 0.0178, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.25006011060351047, | |
| "grad_norm": 0.11407110840082169, | |
| "learning_rate": 1.99619198803663e-05, | |
| "loss": 0.0184, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.251663060030456, | |
| "grad_norm": 0.13558299839496613, | |
| "learning_rate": 1.995944140577771e-05, | |
| "loss": 0.0195, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.25326600945740163, | |
| "grad_norm": 0.11716917902231216, | |
| "learning_rate": 1.995688496081196e-05, | |
| "loss": 0.0164, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.2548689588843472, | |
| "grad_norm": 0.112436443567276, | |
| "learning_rate": 1.995425056548292e-05, | |
| "loss": 0.0162, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.2564719083112928, | |
| "grad_norm": 0.10250984877347946, | |
| "learning_rate": 1.9951538240414724e-05, | |
| "loss": 0.0171, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.25807485773823835, | |
| "grad_norm": 0.12778599560260773, | |
| "learning_rate": 1.9948748006841586e-05, | |
| "loss": 0.0172, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.25967780716518396, | |
| "grad_norm": 0.0987766906619072, | |
| "learning_rate": 1.9945879886607666e-05, | |
| "loss": 0.0165, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.2612807565921295, | |
| "grad_norm": 0.14052483439445496, | |
| "learning_rate": 1.9942933902166873e-05, | |
| "loss": 0.0175, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.2628837060190751, | |
| "grad_norm": 0.10828305035829544, | |
| "learning_rate": 1.9939910076582708e-05, | |
| "loss": 0.017, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.2644866554460207, | |
| "grad_norm": 0.12008311599493027, | |
| "learning_rate": 1.9936808433528058e-05, | |
| "loss": 0.0173, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.26608960487296623, | |
| "grad_norm": 0.11578180640935898, | |
| "learning_rate": 1.9933628997285037e-05, | |
| "loss": 0.0175, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.26769255429991184, | |
| "grad_norm": 0.1305118203163147, | |
| "learning_rate": 1.993037179274479e-05, | |
| "loss": 0.0168, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.2692955037268574, | |
| "grad_norm": 0.12475095689296722, | |
| "learning_rate": 1.992703684540728e-05, | |
| "loss": 0.0164, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.270898453153803, | |
| "grad_norm": 0.13382022082805634, | |
| "learning_rate": 1.9923624181381117e-05, | |
| "loss": 0.0172, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.27250140258074856, | |
| "grad_norm": 0.1182899996638298, | |
| "learning_rate": 1.992013382738333e-05, | |
| "loss": 0.0165, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.2741043520076942, | |
| "grad_norm": 0.11200874298810959, | |
| "learning_rate": 1.9916565810739167e-05, | |
| "loss": 0.0178, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.27570730143463973, | |
| "grad_norm": 0.11989603191614151, | |
| "learning_rate": 1.9912920159381882e-05, | |
| "loss": 0.0174, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.27731025086158534, | |
| "grad_norm": 0.11210440844297409, | |
| "learning_rate": 1.990919690185251e-05, | |
| "loss": 0.0175, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.2789132002885309, | |
| "grad_norm": 0.12546950578689575, | |
| "learning_rate": 1.990539606729966e-05, | |
| "loss": 0.0173, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.2805161497154765, | |
| "grad_norm": 0.12153290957212448, | |
| "learning_rate": 1.9901517685479267e-05, | |
| "loss": 0.0177, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.28211909914242206, | |
| "grad_norm": 0.09732785820960999, | |
| "learning_rate": 1.989756178675437e-05, | |
| "loss": 0.0165, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.2837220485693676, | |
| "grad_norm": 0.09644783288240433, | |
| "learning_rate": 1.9893528402094863e-05, | |
| "loss": 0.0163, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.2853249979963132, | |
| "grad_norm": 0.12492503225803375, | |
| "learning_rate": 1.9889417563077274e-05, | |
| "loss": 0.0166, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.2869279474232588, | |
| "grad_norm": 0.1265823394060135, | |
| "learning_rate": 1.9885229301884497e-05, | |
| "loss": 0.0166, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.2885308968502044, | |
| "grad_norm": 0.10268606245517731, | |
| "learning_rate": 1.9880963651305548e-05, | |
| "loss": 0.0178, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.29013384627714994, | |
| "grad_norm": 0.09787417948246002, | |
| "learning_rate": 1.987662064473532e-05, | |
| "loss": 0.017, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.29173679570409555, | |
| "grad_norm": 0.11527131497859955, | |
| "learning_rate": 1.9872200316174285e-05, | |
| "loss": 0.0164, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.2933397451310411, | |
| "grad_norm": 0.1187480166554451, | |
| "learning_rate": 1.9867702700228282e-05, | |
| "loss": 0.0166, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.2949426945579867, | |
| "grad_norm": 0.10902020335197449, | |
| "learning_rate": 1.9863127832108196e-05, | |
| "loss": 0.0157, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.2965456439849323, | |
| "grad_norm": 0.11473922431468964, | |
| "learning_rate": 1.9858475747629712e-05, | |
| "loss": 0.0172, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.2981485934118779, | |
| "grad_norm": 0.13333828747272491, | |
| "learning_rate": 1.985374648321302e-05, | |
| "loss": 0.0169, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.29975154283882344, | |
| "grad_norm": 0.11504275351762772, | |
| "learning_rate": 1.9848940075882543e-05, | |
| "loss": 0.0161, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.301354492265769, | |
| "grad_norm": 0.10406219214200974, | |
| "learning_rate": 1.9844056563266632e-05, | |
| "loss": 0.017, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3029574416927146, | |
| "grad_norm": 0.12233356386423111, | |
| "learning_rate": 1.9839095983597282e-05, | |
| "loss": 0.0146, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.30456039111966016, | |
| "grad_norm": 0.10993051528930664, | |
| "learning_rate": 1.983405837570983e-05, | |
| "loss": 0.0161, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.30616334054660577, | |
| "grad_norm": 0.1132737472653389, | |
| "learning_rate": 1.9828943779042663e-05, | |
| "loss": 0.0153, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.3077662899735513, | |
| "grad_norm": 0.10923943668603897, | |
| "learning_rate": 1.9823752233636868e-05, | |
| "loss": 0.0158, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.30936923940049693, | |
| "grad_norm": 0.121485136449337, | |
| "learning_rate": 1.9818483780135976e-05, | |
| "loss": 0.0151, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.3109721888274425, | |
| "grad_norm": 0.1390533298254013, | |
| "learning_rate": 1.98131384597856e-05, | |
| "loss": 0.0165, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.3125751382543881, | |
| "grad_norm": 0.11324126273393631, | |
| "learning_rate": 1.9807716314433132e-05, | |
| "loss": 0.0169, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.31417808768133365, | |
| "grad_norm": 0.10308068245649338, | |
| "learning_rate": 1.980221738652741e-05, | |
| "loss": 0.0155, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.31578103710827926, | |
| "grad_norm": 0.08773821592330933, | |
| "learning_rate": 1.9796641719118387e-05, | |
| "loss": 0.014, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.3173839865352248, | |
| "grad_norm": 0.10015236586332321, | |
| "learning_rate": 1.9790989355856794e-05, | |
| "loss": 0.0158, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.31898693596217037, | |
| "grad_norm": 0.10804478079080582, | |
| "learning_rate": 1.9785260340993796e-05, | |
| "loss": 0.0147, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.320589885389116, | |
| "grad_norm": 0.10824614763259888, | |
| "learning_rate": 1.977945471938065e-05, | |
| "loss": 0.0159, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.32219283481606154, | |
| "grad_norm": 0.10421755164861679, | |
| "learning_rate": 1.9773572536468348e-05, | |
| "loss": 0.0141, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.32379578424300715, | |
| "grad_norm": 0.10749202966690063, | |
| "learning_rate": 1.9767613838307267e-05, | |
| "loss": 0.0168, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.3253987336699527, | |
| "grad_norm": 0.11449505388736725, | |
| "learning_rate": 1.9761578671546803e-05, | |
| "loss": 0.0153, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.3270016830968983, | |
| "grad_norm": 0.09397918730974197, | |
| "learning_rate": 1.9755467083435013e-05, | |
| "loss": 0.0147, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.32860463252384386, | |
| "grad_norm": 0.11224810034036636, | |
| "learning_rate": 1.9749279121818235e-05, | |
| "loss": 0.0151, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.3302075819507895, | |
| "grad_norm": 0.11822306364774704, | |
| "learning_rate": 1.9743014835140725e-05, | |
| "loss": 0.0164, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.33181053137773503, | |
| "grad_norm": 0.10186577588319778, | |
| "learning_rate": 1.973667427244427e-05, | |
| "loss": 0.0179, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.33341348080468064, | |
| "grad_norm": 0.1012149527668953, | |
| "learning_rate": 1.97302574833678e-05, | |
| "loss": 0.0155, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.3350164302316262, | |
| "grad_norm": 0.11670338362455368, | |
| "learning_rate": 1.9723764518147012e-05, | |
| "loss": 0.0158, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.33661937965857175, | |
| "grad_norm": 0.1039760485291481, | |
| "learning_rate": 1.971719542761397e-05, | |
| "loss": 0.0152, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.33822232908551736, | |
| "grad_norm": 0.10772741585969925, | |
| "learning_rate": 1.971055026319671e-05, | |
| "loss": 0.0158, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.3398252785124629, | |
| "grad_norm": 0.10215826332569122, | |
| "learning_rate": 1.970382907691882e-05, | |
| "loss": 0.015, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.3414282279394085, | |
| "grad_norm": 0.10524086654186249, | |
| "learning_rate": 1.9697031921399065e-05, | |
| "loss": 0.0146, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.3430311773663541, | |
| "grad_norm": 0.10919707268476486, | |
| "learning_rate": 1.9690158849850943e-05, | |
| "loss": 0.0141, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.3446341267932997, | |
| "grad_norm": 0.10801272839307785, | |
| "learning_rate": 1.9683209916082293e-05, | |
| "loss": 0.0158, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.34623707622024524, | |
| "grad_norm": 0.10727003216743469, | |
| "learning_rate": 1.967618517449486e-05, | |
| "loss": 0.014, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.34784002564719085, | |
| "grad_norm": 0.11416902393102646, | |
| "learning_rate": 1.9669084680083876e-05, | |
| "loss": 0.0134, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.3494429750741364, | |
| "grad_norm": 0.12395931780338287, | |
| "learning_rate": 1.9661908488437613e-05, | |
| "loss": 0.0149, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.351045924501082, | |
| "grad_norm": 0.09420310705900192, | |
| "learning_rate": 1.9654656655736973e-05, | |
| "loss": 0.014, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.3526488739280276, | |
| "grad_norm": 0.10008088499307632, | |
| "learning_rate": 1.9647329238755034e-05, | |
| "loss": 0.0145, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.3542518233549731, | |
| "grad_norm": 0.09090422093868256, | |
| "learning_rate": 1.9639926294856607e-05, | |
| "loss": 0.0158, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.35585477278191874, | |
| "grad_norm": 0.09520357847213745, | |
| "learning_rate": 1.963244788199779e-05, | |
| "loss": 0.0144, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.3574577222088643, | |
| "grad_norm": 0.09746406227350235, | |
| "learning_rate": 1.9624894058725495e-05, | |
| "loss": 0.0156, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.3590606716358099, | |
| "grad_norm": 0.11929647624492645, | |
| "learning_rate": 1.9617264884177037e-05, | |
| "loss": 0.0161, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.36066362106275546, | |
| "grad_norm": 0.127578005194664, | |
| "learning_rate": 1.9609560418079606e-05, | |
| "loss": 0.0145, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.36226657048970107, | |
| "grad_norm": 0.08713559806346893, | |
| "learning_rate": 1.9601780720749867e-05, | |
| "loss": 0.0156, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.3638695199166466, | |
| "grad_norm": 0.1008586436510086, | |
| "learning_rate": 1.9593925853093425e-05, | |
| "loss": 0.0141, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.36547246934359223, | |
| "grad_norm": 0.08782845735549927, | |
| "learning_rate": 1.9585995876604397e-05, | |
| "loss": 0.0146, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.3670754187705378, | |
| "grad_norm": 0.08425669372081757, | |
| "learning_rate": 1.9577990853364902e-05, | |
| "loss": 0.0145, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.3686783681974834, | |
| "grad_norm": 0.09964483976364136, | |
| "learning_rate": 1.9569910846044586e-05, | |
| "loss": 0.0139, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.37028131762442895, | |
| "grad_norm": 0.08887302130460739, | |
| "learning_rate": 1.956175591790014e-05, | |
| "loss": 0.0142, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.3718842670513745, | |
| "grad_norm": 0.12365594506263733, | |
| "learning_rate": 1.955352613277478e-05, | |
| "loss": 0.0145, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.3734872164783201, | |
| "grad_norm": 0.10407640784978867, | |
| "learning_rate": 1.954522155509776e-05, | |
| "loss": 0.0146, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.37509016590526567, | |
| "grad_norm": 0.09630418568849564, | |
| "learning_rate": 1.953684224988389e-05, | |
| "loss": 0.0139, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.3766931153322113, | |
| "grad_norm": 0.11108113825321198, | |
| "learning_rate": 1.952838828273298e-05, | |
| "loss": 0.0147, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.37829606475915684, | |
| "grad_norm": 0.10452122241258621, | |
| "learning_rate": 1.9519859719829375e-05, | |
| "loss": 0.0147, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.37989901418610245, | |
| "grad_norm": 0.08710601180791855, | |
| "learning_rate": 1.9511256627941394e-05, | |
| "loss": 0.0143, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.381501963613048, | |
| "grad_norm": 0.1234845295548439, | |
| "learning_rate": 1.950257907442085e-05, | |
| "loss": 0.0145, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.3831049130399936, | |
| "grad_norm": 0.0989808440208435, | |
| "learning_rate": 1.9493827127202482e-05, | |
| "loss": 0.0143, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.38470786246693917, | |
| "grad_norm": 0.09222126007080078, | |
| "learning_rate": 1.948500085480345e-05, | |
| "loss": 0.0143, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3863108118938847, | |
| "grad_norm": 0.10298562049865723, | |
| "learning_rate": 1.9476100326322785e-05, | |
| "loss": 0.0139, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.38791376132083033, | |
| "grad_norm": 0.10555426776409149, | |
| "learning_rate": 1.9467125611440864e-05, | |
| "loss": 0.015, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.3895167107477759, | |
| "grad_norm": 0.10084094852209091, | |
| "learning_rate": 1.9458076780418844e-05, | |
| "loss": 0.0145, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.3911196601747215, | |
| "grad_norm": 0.1143961027264595, | |
| "learning_rate": 1.9448953904098124e-05, | |
| "loss": 0.0143, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.39272260960166705, | |
| "grad_norm": 0.11982633918523788, | |
| "learning_rate": 1.9439757053899785e-05, | |
| "loss": 0.0155, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.39432555902861266, | |
| "grad_norm": 0.09620746970176697, | |
| "learning_rate": 1.9430486301824044e-05, | |
| "loss": 0.0139, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.3959285084555582, | |
| "grad_norm": 0.09390459209680557, | |
| "learning_rate": 1.942114172044967e-05, | |
| "loss": 0.0144, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.3975314578825038, | |
| "grad_norm": 0.08205546438694, | |
| "learning_rate": 1.9411723382933433e-05, | |
| "loss": 0.0143, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.3991344073094494, | |
| "grad_norm": 0.10015802830457687, | |
| "learning_rate": 1.9402231363009515e-05, | |
| "loss": 0.014, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.400737356736395, | |
| "grad_norm": 0.10022858530282974, | |
| "learning_rate": 1.9392665734988956e-05, | |
| "loss": 0.0146, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.40234030616334054, | |
| "grad_norm": 0.09830185770988464, | |
| "learning_rate": 1.9383026573759046e-05, | |
| "loss": 0.0141, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.4039432555902861, | |
| "grad_norm": 0.10277236253023148, | |
| "learning_rate": 1.9373313954782757e-05, | |
| "loss": 0.0147, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.4055462050172317, | |
| "grad_norm": 0.09011970460414886, | |
| "learning_rate": 1.9363527954098148e-05, | |
| "loss": 0.0138, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.40714915444417726, | |
| "grad_norm": 0.08783067762851715, | |
| "learning_rate": 1.935366864831776e-05, | |
| "loss": 0.0153, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.4087521038711229, | |
| "grad_norm": 0.11457304656505585, | |
| "learning_rate": 1.9343736114628035e-05, | |
| "loss": 0.0145, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.41035505329806843, | |
| "grad_norm": 0.09166496247053146, | |
| "learning_rate": 1.933373043078869e-05, | |
| "loss": 0.015, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.41195800272501404, | |
| "grad_norm": 0.10220997035503387, | |
| "learning_rate": 1.9323651675132126e-05, | |
| "loss": 0.0136, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.4135609521519596, | |
| "grad_norm": 0.08962884545326233, | |
| "learning_rate": 1.931349992656281e-05, | |
| "loss": 0.0146, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.4151639015789052, | |
| "grad_norm": 0.09034290909767151, | |
| "learning_rate": 1.930327526455665e-05, | |
| "loss": 0.0136, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.41676685100585076, | |
| "grad_norm": 0.07524847984313965, | |
| "learning_rate": 1.9292977769160374e-05, | |
| "loss": 0.0137, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.41836980043279637, | |
| "grad_norm": 0.09737731516361237, | |
| "learning_rate": 1.9282607520990918e-05, | |
| "loss": 0.0138, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.4199727498597419, | |
| "grad_norm": 0.10110239684581757, | |
| "learning_rate": 1.927216460123478e-05, | |
| "loss": 0.0141, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.4215756992866875, | |
| "grad_norm": 0.12663525342941284, | |
| "learning_rate": 1.926164909164739e-05, | |
| "loss": 0.0132, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.4231786487136331, | |
| "grad_norm": 0.10225925594568253, | |
| "learning_rate": 1.9251061074552458e-05, | |
| "loss": 0.0142, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.42478159814057864, | |
| "grad_norm": 0.11328284442424774, | |
| "learning_rate": 1.924040063284135e-05, | |
| "loss": 0.0147, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.42638454756752425, | |
| "grad_norm": 0.10168527811765671, | |
| "learning_rate": 1.9229667849972436e-05, | |
| "loss": 0.0135, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.4279874969944698, | |
| "grad_norm": 0.10044345259666443, | |
| "learning_rate": 1.9218862809970413e-05, | |
| "loss": 0.0152, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.4295904464214154, | |
| "grad_norm": 0.09153233468532562, | |
| "learning_rate": 1.9207985597425675e-05, | |
| "loss": 0.0131, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.43119339584836097, | |
| "grad_norm": 0.09851006418466568, | |
| "learning_rate": 1.9197036297493636e-05, | |
| "loss": 0.0139, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.4327963452753066, | |
| "grad_norm": 0.10922195017337799, | |
| "learning_rate": 1.918601499589407e-05, | |
| "loss": 0.0133, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.43439929470225214, | |
| "grad_norm": 0.0905163437128067, | |
| "learning_rate": 1.917492177891043e-05, | |
| "loss": 0.0132, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.43600224412919775, | |
| "grad_norm": 0.09973873198032379, | |
| "learning_rate": 1.916375673338919e-05, | |
| "loss": 0.014, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.4376051935561433, | |
| "grad_norm": 0.09208554029464722, | |
| "learning_rate": 1.9152519946739146e-05, | |
| "loss": 0.0132, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.43920814298308886, | |
| "grad_norm": 0.09508796036243439, | |
| "learning_rate": 1.9141211506930742e-05, | |
| "loss": 0.0146, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.44081109241003447, | |
| "grad_norm": 0.07578465342521667, | |
| "learning_rate": 1.9129831502495383e-05, | |
| "loss": 0.0131, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.44241404183698, | |
| "grad_norm": 0.09476418793201447, | |
| "learning_rate": 1.911838002252474e-05, | |
| "loss": 0.0115, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.44401699126392563, | |
| "grad_norm": 0.08848860114812851, | |
| "learning_rate": 1.9106857156670037e-05, | |
| "loss": 0.0138, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.4456199406908712, | |
| "grad_norm": 0.09684263169765472, | |
| "learning_rate": 1.9095262995141377e-05, | |
| "loss": 0.0133, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.4472228901178168, | |
| "grad_norm": 0.08769190311431885, | |
| "learning_rate": 1.908359762870702e-05, | |
| "loss": 0.0129, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.44882583954476235, | |
| "grad_norm": 0.07661417126655579, | |
| "learning_rate": 1.9071861148692673e-05, | |
| "loss": 0.0149, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.45042878897170796, | |
| "grad_norm": 0.1025426983833313, | |
| "learning_rate": 1.9060053646980772e-05, | |
| "loss": 0.0147, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.4520317383986535, | |
| "grad_norm": 0.09976097196340561, | |
| "learning_rate": 1.9048175216009776e-05, | |
| "loss": 0.0126, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.4536346878255991, | |
| "grad_norm": 0.09328175336122513, | |
| "learning_rate": 1.9036225948773423e-05, | |
| "loss": 0.0134, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.4552376372525447, | |
| "grad_norm": 0.0779719203710556, | |
| "learning_rate": 1.9024205938820023e-05, | |
| "loss": 0.0137, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.45684058667949023, | |
| "grad_norm": 0.08576709777116776, | |
| "learning_rate": 1.901211528025171e-05, | |
| "loss": 0.0138, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.45844353610643584, | |
| "grad_norm": 0.08608071506023407, | |
| "learning_rate": 1.8999954067723715e-05, | |
| "loss": 0.0129, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.4600464855333814, | |
| "grad_norm": 0.09790827333927155, | |
| "learning_rate": 1.8987722396443618e-05, | |
| "loss": 0.0123, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.461649434960327, | |
| "grad_norm": 0.08433697372674942, | |
| "learning_rate": 1.8975420362170606e-05, | |
| "loss": 0.0124, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.46325238438727256, | |
| "grad_norm": 0.10495218634605408, | |
| "learning_rate": 1.8963048061214725e-05, | |
| "loss": 0.0138, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.4648553338142182, | |
| "grad_norm": 0.09212182462215424, | |
| "learning_rate": 1.8950605590436125e-05, | |
| "loss": 0.0119, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.46645828324116373, | |
| "grad_norm": 0.10127340257167816, | |
| "learning_rate": 1.8938093047244298e-05, | |
| "loss": 0.0147, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.46806123266810934, | |
| "grad_norm": 0.09314699470996857, | |
| "learning_rate": 1.892551052959732e-05, | |
| "loss": 0.0121, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.4696641820950549, | |
| "grad_norm": 0.09155376255512238, | |
| "learning_rate": 1.891285813600108e-05, | |
| "loss": 0.0132, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.4712671315220005, | |
| "grad_norm": 0.09479997307062149, | |
| "learning_rate": 1.8900135965508514e-05, | |
| "loss": 0.0127, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.47287008094894606, | |
| "grad_norm": 0.0826965719461441, | |
| "learning_rate": 1.8887344117718825e-05, | |
| "loss": 0.0125, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.4744730303758916, | |
| "grad_norm": 0.0781230479478836, | |
| "learning_rate": 1.8874482692776705e-05, | |
| "loss": 0.0131, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.4760759798028372, | |
| "grad_norm": 0.08591257035732269, | |
| "learning_rate": 1.8861551791371554e-05, | |
| "loss": 0.0122, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.4776789292297828, | |
| "grad_norm": 0.09087405353784561, | |
| "learning_rate": 1.8848551514736684e-05, | |
| "loss": 0.0125, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.4792818786567284, | |
| "grad_norm": 0.11277468502521515, | |
| "learning_rate": 1.883548196464853e-05, | |
| "loss": 0.0134, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.48088482808367394, | |
| "grad_norm": 0.07540776580572128, | |
| "learning_rate": 1.8822343243425867e-05, | |
| "loss": 0.0122, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.48248777751061955, | |
| "grad_norm": 0.08707955479621887, | |
| "learning_rate": 1.8809135453928976e-05, | |
| "loss": 0.0132, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.4840907269375651, | |
| "grad_norm": 0.08592630922794342, | |
| "learning_rate": 1.8795858699558876e-05, | |
| "loss": 0.0129, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.4856936763645107, | |
| "grad_norm": 0.09010718762874603, | |
| "learning_rate": 1.8782513084256492e-05, | |
| "loss": 0.014, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.4872966257914563, | |
| "grad_norm": 0.08629398792982101, | |
| "learning_rate": 1.8769098712501842e-05, | |
| "loss": 0.0131, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.4888995752184019, | |
| "grad_norm": 0.08849960565567017, | |
| "learning_rate": 1.875561568931323e-05, | |
| "loss": 0.0125, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.49050252464534744, | |
| "grad_norm": 0.11940028518438339, | |
| "learning_rate": 1.8742064120246416e-05, | |
| "loss": 0.0143, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.492105474072293, | |
| "grad_norm": 0.0845445841550827, | |
| "learning_rate": 1.872844411139379e-05, | |
| "loss": 0.013, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.4937084234992386, | |
| "grad_norm": 0.0929800420999527, | |
| "learning_rate": 1.8714755769383546e-05, | |
| "loss": 0.013, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.49531137292618416, | |
| "grad_norm": 0.1016814187169075, | |
| "learning_rate": 1.870099920137884e-05, | |
| "loss": 0.0139, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.49691432235312977, | |
| "grad_norm": 0.09055141359567642, | |
| "learning_rate": 1.8687174515076956e-05, | |
| "loss": 0.0121, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.4985172717800753, | |
| "grad_norm": 0.09261985868215561, | |
| "learning_rate": 1.867328181870846e-05, | |
| "loss": 0.0133, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.5001202212070209, | |
| "grad_norm": 0.08901810646057129, | |
| "learning_rate": 1.8659321221036365e-05, | |
| "loss": 0.013, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.5017231706339665, | |
| "grad_norm": 0.08935806900262833, | |
| "learning_rate": 1.8645292831355252e-05, | |
| "loss": 0.0147, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.503326120060912, | |
| "grad_norm": 0.1315586119890213, | |
| "learning_rate": 1.8631196759490447e-05, | |
| "loss": 0.0125, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.5049290694878577, | |
| "grad_norm": 0.08496637642383575, | |
| "learning_rate": 1.8617033115797137e-05, | |
| "loss": 0.0112, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.5065320189148033, | |
| "grad_norm": 0.0730431079864502, | |
| "learning_rate": 1.8602802011159516e-05, | |
| "loss": 0.0116, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.5081349683417489, | |
| "grad_norm": 0.10121534019708633, | |
| "learning_rate": 1.8588503556989918e-05, | |
| "loss": 0.0132, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.5097379177686944, | |
| "grad_norm": 0.10644973069429398, | |
| "learning_rate": 1.8574137865227933e-05, | |
| "loss": 0.0119, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.51134086719564, | |
| "grad_norm": 0.10578668117523193, | |
| "learning_rate": 1.8559705048339562e-05, | |
| "loss": 0.0126, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.5129438166225856, | |
| "grad_norm": 0.08165573328733444, | |
| "learning_rate": 1.8545205219316292e-05, | |
| "loss": 0.0131, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.5145467660495311, | |
| "grad_norm": 0.08247827738523483, | |
| "learning_rate": 1.853063849167424e-05, | |
| "loss": 0.0133, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5161497154764767, | |
| "grad_norm": 0.07236569374799728, | |
| "learning_rate": 1.8516004979453265e-05, | |
| "loss": 0.0116, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5177526649034223, | |
| "grad_norm": 0.08705353736877441, | |
| "learning_rate": 1.850130479721606e-05, | |
| "loss": 0.0126, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5193556143303679, | |
| "grad_norm": 0.07517849653959274, | |
| "learning_rate": 1.8486538060047267e-05, | |
| "loss": 0.0109, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5209585637573134, | |
| "grad_norm": 0.09321631491184235, | |
| "learning_rate": 1.8471704883552582e-05, | |
| "loss": 0.0118, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.522561513184259, | |
| "grad_norm": 0.09053128957748413, | |
| "learning_rate": 1.845680538385782e-05, | |
| "loss": 0.0134, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5241644626112046, | |
| "grad_norm": 0.08485197275876999, | |
| "learning_rate": 1.8441839677608045e-05, | |
| "loss": 0.0129, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.5257674120381503, | |
| "grad_norm": 0.0714045837521553, | |
| "learning_rate": 1.8426807881966633e-05, | |
| "loss": 0.0111, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.5273703614650957, | |
| "grad_norm": 0.0777626484632492, | |
| "learning_rate": 1.841171011461435e-05, | |
| "loss": 0.0122, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.5289733108920414, | |
| "grad_norm": 0.07993612438440323, | |
| "learning_rate": 1.8396546493748456e-05, | |
| "loss": 0.0123, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.530576260318987, | |
| "grad_norm": 0.09510450810194016, | |
| "learning_rate": 1.8381317138081755e-05, | |
| "loss": 0.0126, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.5321792097459325, | |
| "grad_norm": 0.07836094498634338, | |
| "learning_rate": 1.8366022166841676e-05, | |
| "loss": 0.0111, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.5337821591728781, | |
| "grad_norm": 0.09115055948495865, | |
| "learning_rate": 1.8350661699769344e-05, | |
| "loss": 0.0127, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.5353851085998237, | |
| "grad_norm": 0.08571304380893707, | |
| "learning_rate": 1.833523585711863e-05, | |
| "loss": 0.0126, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.5369880580267693, | |
| "grad_norm": 0.06526945531368256, | |
| "learning_rate": 1.831974475965521e-05, | |
| "loss": 0.0119, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.5385910074537148, | |
| "grad_norm": 0.08085188269615173, | |
| "learning_rate": 1.830418852865565e-05, | |
| "loss": 0.0117, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.5401939568806604, | |
| "grad_norm": 0.0840122401714325, | |
| "learning_rate": 1.828856728590642e-05, | |
| "loss": 0.0138, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.541796906307606, | |
| "grad_norm": 0.09077087044715881, | |
| "learning_rate": 1.827288115370294e-05, | |
| "loss": 0.0157, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.5433998557345516, | |
| "grad_norm": 0.14829397201538086, | |
| "learning_rate": 1.825713025484866e-05, | |
| "loss": 0.013, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.5450028051614971, | |
| "grad_norm": 0.08603893965482712, | |
| "learning_rate": 1.824131471265405e-05, | |
| "loss": 0.014, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.5466057545884427, | |
| "grad_norm": 0.0720130130648613, | |
| "learning_rate": 1.822543465093568e-05, | |
| "loss": 0.012, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.5482087040153883, | |
| "grad_norm": 0.08433578908443451, | |
| "learning_rate": 1.8209490194015216e-05, | |
| "loss": 0.0128, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.5498116534423338, | |
| "grad_norm": 0.36162707209587097, | |
| "learning_rate": 1.819348146671847e-05, | |
| "loss": 0.0138, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.5514146028692795, | |
| "grad_norm": 0.08648835122585297, | |
| "learning_rate": 1.8177408594374412e-05, | |
| "loss": 0.0126, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.5530175522962251, | |
| "grad_norm": 0.0839293822646141, | |
| "learning_rate": 1.816127170281418e-05, | |
| "loss": 0.012, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.5546205017231707, | |
| "grad_norm": 0.10502775758504868, | |
| "learning_rate": 1.8145070918370114e-05, | |
| "loss": 0.0133, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.5562234511501162, | |
| "grad_norm": 0.08115836977958679, | |
| "learning_rate": 1.8128806367874762e-05, | |
| "loss": 0.0125, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.5578264005770618, | |
| "grad_norm": 0.07933896780014038, | |
| "learning_rate": 1.8112478178659872e-05, | |
| "loss": 0.0124, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.5594293500040074, | |
| "grad_norm": 0.07423722743988037, | |
| "learning_rate": 1.8096086478555414e-05, | |
| "loss": 0.0114, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.561032299430953, | |
| "grad_norm": 0.08609019964933395, | |
| "learning_rate": 1.8079631395888567e-05, | |
| "loss": 0.0121, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.5626352488578985, | |
| "grad_norm": 0.09131062030792236, | |
| "learning_rate": 1.8063113059482718e-05, | |
| "loss": 0.0122, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.5642381982848441, | |
| "grad_norm": 0.07950209826231003, | |
| "learning_rate": 1.8046531598656465e-05, | |
| "loss": 0.0119, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.5658411477117897, | |
| "grad_norm": 0.10808353126049042, | |
| "learning_rate": 1.802988714322258e-05, | |
| "loss": 0.0127, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.5674440971387352, | |
| "grad_norm": 0.08181063830852509, | |
| "learning_rate": 1.801317982348701e-05, | |
| "loss": 0.0119, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.5690470465656808, | |
| "grad_norm": 0.09325892478227615, | |
| "learning_rate": 1.7996409770247866e-05, | |
| "loss": 0.0113, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.5706499959926264, | |
| "grad_norm": 0.09701074659824371, | |
| "learning_rate": 1.7979577114794367e-05, | |
| "loss": 0.0116, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.5722529454195721, | |
| "grad_norm": 0.08736202120780945, | |
| "learning_rate": 1.7962681988905844e-05, | |
| "loss": 0.0118, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.5738558948465176, | |
| "grad_norm": 0.09056610614061356, | |
| "learning_rate": 1.7945724524850697e-05, | |
| "loss": 0.0118, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.5754588442734632, | |
| "grad_norm": 0.06541703641414642, | |
| "learning_rate": 1.7928704855385344e-05, | |
| "loss": 0.0118, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.5770617937004088, | |
| "grad_norm": 0.07547177374362946, | |
| "learning_rate": 1.791162311375321e-05, | |
| "loss": 0.0126, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.5786647431273544, | |
| "grad_norm": 0.07265637814998627, | |
| "learning_rate": 1.7894479433683676e-05, | |
| "loss": 0.0122, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.5802676925542999, | |
| "grad_norm": 0.07592958211898804, | |
| "learning_rate": 1.7877273949391006e-05, | |
| "loss": 0.0135, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.5818706419812455, | |
| "grad_norm": 0.08901036530733109, | |
| "learning_rate": 1.7860006795573326e-05, | |
| "loss": 0.012, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.5834735914081911, | |
| "grad_norm": 0.07132358849048615, | |
| "learning_rate": 1.7842678107411565e-05, | |
| "loss": 0.0114, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.5850765408351366, | |
| "grad_norm": 0.07480525970458984, | |
| "learning_rate": 1.7825288020568387e-05, | |
| "loss": 0.0118, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.5866794902620822, | |
| "grad_norm": 0.07714416086673737, | |
| "learning_rate": 1.780783667118713e-05, | |
| "loss": 0.0117, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.5882824396890278, | |
| "grad_norm": 0.08674295246601105, | |
| "learning_rate": 1.7790324195890752e-05, | |
| "loss": 0.0108, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.5898853891159734, | |
| "grad_norm": 0.08064709603786469, | |
| "learning_rate": 1.777275073178074e-05, | |
| "loss": 0.0112, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.5914883385429189, | |
| "grad_norm": 0.07559552043676376, | |
| "learning_rate": 1.7755116416436063e-05, | |
| "loss": 0.0111, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.5930912879698645, | |
| "grad_norm": 0.08388907462358475, | |
| "learning_rate": 1.7737421387912075e-05, | |
| "loss": 0.0121, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.5946942373968102, | |
| "grad_norm": 0.08252954483032227, | |
| "learning_rate": 1.7719665784739444e-05, | |
| "loss": 0.0127, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.5962971868237558, | |
| "grad_norm": 0.0813174620270729, | |
| "learning_rate": 1.7701849745923056e-05, | |
| "loss": 0.012, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.5979001362507013, | |
| "grad_norm": 0.07678083330392838, | |
| "learning_rate": 1.7683973410940946e-05, | |
| "loss": 0.0123, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.5995030856776469, | |
| "grad_norm": 0.09870904684066772, | |
| "learning_rate": 1.766603691974319e-05, | |
| "loss": 0.0117, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.6011060351045925, | |
| "grad_norm": 0.07422671467065811, | |
| "learning_rate": 1.7648040412750807e-05, | |
| "loss": 0.012, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.602708984531538, | |
| "grad_norm": 0.09045329689979553, | |
| "learning_rate": 1.7629984030854685e-05, | |
| "loss": 0.0118, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.6043119339584836, | |
| "grad_norm": 0.10855185240507126, | |
| "learning_rate": 1.761186791541444e-05, | |
| "loss": 0.018, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.6059148833854292, | |
| "grad_norm": 0.08267343789339066, | |
| "learning_rate": 1.7593692208257347e-05, | |
| "loss": 0.014, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.6075178328123748, | |
| "grad_norm": 0.09279263019561768, | |
| "learning_rate": 1.75754570516772e-05, | |
| "loss": 0.0136, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.6091207822393203, | |
| "grad_norm": 0.09911685436964035, | |
| "learning_rate": 1.7557162588433207e-05, | |
| "loss": 0.012, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.6107237316662659, | |
| "grad_norm": 0.07959414273500443, | |
| "learning_rate": 1.7538808961748897e-05, | |
| "loss": 0.012, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.6123266810932115, | |
| "grad_norm": 0.09886189550161362, | |
| "learning_rate": 1.752039631531095e-05, | |
| "loss": 0.0118, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.6139296305201571, | |
| "grad_norm": 0.08571209758520126, | |
| "learning_rate": 1.750192479326812e-05, | |
| "loss": 0.0125, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.6155325799471026, | |
| "grad_norm": 0.07818809151649475, | |
| "learning_rate": 1.748339454023007e-05, | |
| "loss": 0.0121, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.6171355293740483, | |
| "grad_norm": 0.09316529333591461, | |
| "learning_rate": 1.746480570126627e-05, | |
| "loss": 0.0123, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6187384788009939, | |
| "grad_norm": 0.07923634350299835, | |
| "learning_rate": 1.744615842190484e-05, | |
| "loss": 0.0118, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6203414282279394, | |
| "grad_norm": 0.06345493346452713, | |
| "learning_rate": 1.742745284813141e-05, | |
| "loss": 0.011, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.621944377654885, | |
| "grad_norm": 0.092954121530056, | |
| "learning_rate": 1.7408689126387997e-05, | |
| "loss": 0.0111, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6235473270818306, | |
| "grad_norm": 0.0818738043308258, | |
| "learning_rate": 1.7389867403571844e-05, | |
| "loss": 0.0107, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6251502765087762, | |
| "grad_norm": 0.07621193677186966, | |
| "learning_rate": 1.737098782703427e-05, | |
| "loss": 0.011, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6267532259357217, | |
| "grad_norm": 0.08616837859153748, | |
| "learning_rate": 1.7352050544579514e-05, | |
| "loss": 0.0116, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6283561753626673, | |
| "grad_norm": 0.08493662625551224, | |
| "learning_rate": 1.733305570446359e-05, | |
| "loss": 0.012, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.6299591247896129, | |
| "grad_norm": 0.07540535926818848, | |
| "learning_rate": 1.7314003455393117e-05, | |
| "loss": 0.0109, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.6315620742165585, | |
| "grad_norm": 0.0753653421998024, | |
| "learning_rate": 1.729489394652415e-05, | |
| "loss": 0.0107, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.633165023643504, | |
| "grad_norm": 0.11599840223789215, | |
| "learning_rate": 1.7275727327461035e-05, | |
| "loss": 0.0119, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.6347679730704496, | |
| "grad_norm": 0.08844250440597534, | |
| "learning_rate": 1.72565037482552e-05, | |
| "loss": 0.0114, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.6363709224973952, | |
| "grad_norm": 0.13949395716190338, | |
| "learning_rate": 1.723722335940402e-05, | |
| "loss": 0.0125, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.6379738719243407, | |
| "grad_norm": 0.07286416739225388, | |
| "learning_rate": 1.721788631184961e-05, | |
| "loss": 0.0118, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.6395768213512864, | |
| "grad_norm": 0.07762700319290161, | |
| "learning_rate": 1.7198492756977664e-05, | |
| "loss": 0.012, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.641179770778232, | |
| "grad_norm": 0.08513263612985611, | |
| "learning_rate": 1.717904284661625e-05, | |
| "loss": 0.0111, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.6427827202051776, | |
| "grad_norm": 0.07539255172014236, | |
| "learning_rate": 1.7159536733034638e-05, | |
| "loss": 0.0122, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.6443856696321231, | |
| "grad_norm": 0.08053237944841385, | |
| "learning_rate": 1.7139974568942094e-05, | |
| "loss": 0.0102, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.6459886190590687, | |
| "grad_norm": 0.07483901083469391, | |
| "learning_rate": 1.7120356507486694e-05, | |
| "loss": 0.0117, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.6475915684860143, | |
| "grad_norm": 0.08027271181344986, | |
| "learning_rate": 1.7100682702254133e-05, | |
| "loss": 0.0117, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.6491945179129599, | |
| "grad_norm": 0.0767173245549202, | |
| "learning_rate": 1.7080953307266507e-05, | |
| "loss": 0.012, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.6507974673399054, | |
| "grad_norm": 0.07106052339076996, | |
| "learning_rate": 1.70611684769811e-05, | |
| "loss": 0.0109, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.652400416766851, | |
| "grad_norm": 0.0712939128279686, | |
| "learning_rate": 1.7041328366289202e-05, | |
| "loss": 0.01, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.6540033661937966, | |
| "grad_norm": 0.08332879841327667, | |
| "learning_rate": 1.7021433130514875e-05, | |
| "loss": 0.0114, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.6556063156207421, | |
| "grad_norm": 0.07251271605491638, | |
| "learning_rate": 1.7001482925413748e-05, | |
| "loss": 0.0113, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.6572092650476877, | |
| "grad_norm": 0.09120030701160431, | |
| "learning_rate": 1.6981477907171792e-05, | |
| "loss": 0.0103, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.6588122144746333, | |
| "grad_norm": 0.0815647691488266, | |
| "learning_rate": 1.696141823240409e-05, | |
| "loss": 0.0109, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.660415163901579, | |
| "grad_norm": 0.08549433201551437, | |
| "learning_rate": 1.6941304058153637e-05, | |
| "loss": 0.0113, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.6620181133285244, | |
| "grad_norm": 0.06725161522626877, | |
| "learning_rate": 1.6921135541890075e-05, | |
| "loss": 0.0114, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.6636210627554701, | |
| "grad_norm": 0.06733765453100204, | |
| "learning_rate": 1.6900912841508496e-05, | |
| "loss": 0.0116, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.6652240121824157, | |
| "grad_norm": 0.10023114830255508, | |
| "learning_rate": 1.6880636115328165e-05, | |
| "loss": 0.0103, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.6668269616093613, | |
| "grad_norm": 0.07057762891054153, | |
| "learning_rate": 1.6860305522091328e-05, | |
| "loss": 0.0113, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.6684299110363068, | |
| "grad_norm": 0.08346732705831528, | |
| "learning_rate": 1.683992122096193e-05, | |
| "loss": 0.0111, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.6700328604632524, | |
| "grad_norm": 0.07453355193138123, | |
| "learning_rate": 1.681948337152439e-05, | |
| "loss": 0.0103, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.671635809890198, | |
| "grad_norm": 0.07577425241470337, | |
| "learning_rate": 1.6798992133782333e-05, | |
| "loss": 0.0111, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.6732387593171435, | |
| "grad_norm": 0.07895900309085846, | |
| "learning_rate": 1.677844766815737e-05, | |
| "loss": 0.01, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.6748417087440891, | |
| "grad_norm": 0.10164612531661987, | |
| "learning_rate": 1.6757850135487813e-05, | |
| "loss": 0.0099, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.6764446581710347, | |
| "grad_norm": 0.08837499469518661, | |
| "learning_rate": 1.673719969702742e-05, | |
| "loss": 0.0114, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.6780476075979803, | |
| "grad_norm": 0.07328519970178604, | |
| "learning_rate": 1.671649651444414e-05, | |
| "loss": 0.011, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.6796505570249258, | |
| "grad_norm": 0.07811128348112106, | |
| "learning_rate": 1.6695740749818847e-05, | |
| "loss": 0.0105, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.6812535064518714, | |
| "grad_norm": 0.08153773844242096, | |
| "learning_rate": 1.6674932565644068e-05, | |
| "loss": 0.0116, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.682856455878817, | |
| "grad_norm": 0.07350067049264908, | |
| "learning_rate": 1.6654072124822713e-05, | |
| "loss": 0.0112, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.6844594053057627, | |
| "grad_norm": 0.08609039336442947, | |
| "learning_rate": 1.663315959066679e-05, | |
| "loss": 0.0117, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.6860623547327082, | |
| "grad_norm": 0.0826505571603775, | |
| "learning_rate": 1.6612195126896143e-05, | |
| "loss": 0.0104, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.6876653041596538, | |
| "grad_norm": 0.08540302515029907, | |
| "learning_rate": 1.6591178897637167e-05, | |
| "loss": 0.0101, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.6892682535865994, | |
| "grad_norm": 0.07264818996191025, | |
| "learning_rate": 1.6570111067421504e-05, | |
| "loss": 0.0112, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.6908712030135449, | |
| "grad_norm": 0.07823798805475235, | |
| "learning_rate": 1.6548991801184784e-05, | |
| "loss": 0.0108, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.6924741524404905, | |
| "grad_norm": 0.14571355283260345, | |
| "learning_rate": 1.6527821264265303e-05, | |
| "loss": 0.0109, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.6940771018674361, | |
| "grad_norm": 0.06896299868822098, | |
| "learning_rate": 1.6506599622402757e-05, | |
| "loss": 0.0108, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.6956800512943817, | |
| "grad_norm": 0.08016230165958405, | |
| "learning_rate": 1.648532704173693e-05, | |
| "loss": 0.0117, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.6972830007213272, | |
| "grad_norm": 0.07234030216932297, | |
| "learning_rate": 1.6464003688806385e-05, | |
| "loss": 0.0112, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.6988859501482728, | |
| "grad_norm": 0.08299129456281662, | |
| "learning_rate": 1.6442629730547187e-05, | |
| "loss": 0.011, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.7004888995752184, | |
| "grad_norm": 0.07566344738006592, | |
| "learning_rate": 1.6421205334291563e-05, | |
| "loss": 0.0111, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.702091849002164, | |
| "grad_norm": 0.0738309845328331, | |
| "learning_rate": 1.639973066776662e-05, | |
| "loss": 0.0123, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.7036947984291095, | |
| "grad_norm": 0.06730606406927109, | |
| "learning_rate": 1.637820589909302e-05, | |
| "loss": 0.0107, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.7052977478560551, | |
| "grad_norm": 0.07160209119319916, | |
| "learning_rate": 1.6356631196783657e-05, | |
| "loss": 0.0106, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.7069006972830008, | |
| "grad_norm": 0.06399673223495483, | |
| "learning_rate": 1.6335006729742345e-05, | |
| "loss": 0.0114, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.7085036467099463, | |
| "grad_norm": 0.06448632478713989, | |
| "learning_rate": 1.6313332667262506e-05, | |
| "loss": 0.0102, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.7101065961368919, | |
| "grad_norm": 0.06451641768217087, | |
| "learning_rate": 1.6291609179025827e-05, | |
| "loss": 0.0101, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.7117095455638375, | |
| "grad_norm": 0.10104962438344955, | |
| "learning_rate": 1.6269836435100933e-05, | |
| "loss": 0.0115, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.7133124949907831, | |
| "grad_norm": 0.09099545329809189, | |
| "learning_rate": 1.6248014605942074e-05, | |
| "loss": 0.0115, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.7149154444177286, | |
| "grad_norm": 0.06802688539028168, | |
| "learning_rate": 1.6226143862387776e-05, | |
| "loss": 0.0106, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.7165183938446742, | |
| "grad_norm": 0.08041039854288101, | |
| "learning_rate": 1.620422437565949e-05, | |
| "loss": 0.0107, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.7181213432716198, | |
| "grad_norm": 0.07889063656330109, | |
| "learning_rate": 1.6182256317360286e-05, | |
| "loss": 0.0106, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.7197242926985654, | |
| "grad_norm": 0.08090974390506744, | |
| "learning_rate": 1.6160239859473484e-05, | |
| "loss": 0.0115, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.7213272421255109, | |
| "grad_norm": 0.07090416550636292, | |
| "learning_rate": 1.613817517436131e-05, | |
| "loss": 0.011, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7229301915524565, | |
| "grad_norm": 0.07482471317052841, | |
| "learning_rate": 1.611606243476356e-05, | |
| "loss": 0.0113, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7245331409794021, | |
| "grad_norm": 0.07659480720758438, | |
| "learning_rate": 1.6093901813796223e-05, | |
| "loss": 0.0098, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.7261360904063476, | |
| "grad_norm": 0.07723913341760635, | |
| "learning_rate": 1.6071693484950165e-05, | |
| "loss": 0.0105, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.7277390398332932, | |
| "grad_norm": 0.08152413368225098, | |
| "learning_rate": 1.6049437622089715e-05, | |
| "loss": 0.0122, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7293419892602389, | |
| "grad_norm": 0.07603183388710022, | |
| "learning_rate": 1.602713439945137e-05, | |
| "loss": 0.0125, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7309449386871845, | |
| "grad_norm": 0.07525479793548584, | |
| "learning_rate": 1.6004783991642373e-05, | |
| "loss": 0.011, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.73254788811413, | |
| "grad_norm": 0.0917053148150444, | |
| "learning_rate": 1.5982386573639375e-05, | |
| "loss": 0.0108, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.7341508375410756, | |
| "grad_norm": 0.0688992515206337, | |
| "learning_rate": 1.595994232078707e-05, | |
| "loss": 0.011, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.7357537869680212, | |
| "grad_norm": 0.08237089961767197, | |
| "learning_rate": 1.5937451408796796e-05, | |
| "loss": 0.0105, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.7373567363949668, | |
| "grad_norm": 0.07928130775690079, | |
| "learning_rate": 1.5914914013745194e-05, | |
| "loss": 0.0114, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.7389596858219123, | |
| "grad_norm": 0.07128145545721054, | |
| "learning_rate": 1.589233031207279e-05, | |
| "loss": 0.0105, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.7405626352488579, | |
| "grad_norm": 0.07204887270927429, | |
| "learning_rate": 1.586970048058266e-05, | |
| "loss": 0.0098, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.7421655846758035, | |
| "grad_norm": 0.08074098080396652, | |
| "learning_rate": 1.584702469643899e-05, | |
| "loss": 0.0123, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.743768534102749, | |
| "grad_norm": 0.07323694974184036, | |
| "learning_rate": 1.582430313716575e-05, | |
| "loss": 0.0095, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.7453714835296946, | |
| "grad_norm": 0.0781148299574852, | |
| "learning_rate": 1.5801535980645253e-05, | |
| "loss": 0.0098, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.7469744329566402, | |
| "grad_norm": 0.07220373302698135, | |
| "learning_rate": 1.5778723405116792e-05, | |
| "loss": 0.011, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.7485773823835858, | |
| "grad_norm": 0.05822448804974556, | |
| "learning_rate": 1.5755865589175237e-05, | |
| "loss": 0.0108, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.7501803318105313, | |
| "grad_norm": 0.0715513601899147, | |
| "learning_rate": 1.573296271176963e-05, | |
| "loss": 0.0103, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.751783281237477, | |
| "grad_norm": 0.3894812762737274, | |
| "learning_rate": 1.5710014952201797e-05, | |
| "loss": 0.0103, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.7533862306644226, | |
| "grad_norm": 0.07824983447790146, | |
| "learning_rate": 1.5687022490124926e-05, | |
| "loss": 0.01, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.7549891800913682, | |
| "grad_norm": 0.0841994360089302, | |
| "learning_rate": 1.5663985505542185e-05, | |
| "loss": 0.012, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.7565921295183137, | |
| "grad_norm": 0.13080202043056488, | |
| "learning_rate": 1.5640904178805293e-05, | |
| "loss": 0.0119, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.7581950789452593, | |
| "grad_norm": 0.10361091792583466, | |
| "learning_rate": 1.561777869061311e-05, | |
| "loss": 0.0131, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.7597980283722049, | |
| "grad_norm": 0.07729562371969223, | |
| "learning_rate": 1.559460922201023e-05, | |
| "loss": 0.0128, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.7614009777991504, | |
| "grad_norm": 0.07854557782411575, | |
| "learning_rate": 1.5571395954385565e-05, | |
| "loss": 0.0117, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.763003927226096, | |
| "grad_norm": 0.07294869422912598, | |
| "learning_rate": 1.5548139069470923e-05, | |
| "loss": 0.0107, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.7646068766530416, | |
| "grad_norm": 0.09968176484107971, | |
| "learning_rate": 1.5524838749339567e-05, | |
| "loss": 0.0113, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.7662098260799872, | |
| "grad_norm": 0.09212654083967209, | |
| "learning_rate": 1.5501495176404817e-05, | |
| "loss": 0.0115, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.7678127755069327, | |
| "grad_norm": 0.07619819790124893, | |
| "learning_rate": 1.5478108533418604e-05, | |
| "loss": 0.0122, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.7694157249338783, | |
| "grad_norm": 0.07858500629663467, | |
| "learning_rate": 1.5454679003470056e-05, | |
| "loss": 0.0106, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.7710186743608239, | |
| "grad_norm": 0.0700116902589798, | |
| "learning_rate": 1.5431206769984047e-05, | |
| "loss": 0.0121, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.7726216237877694, | |
| "grad_norm": 0.0738898515701294, | |
| "learning_rate": 1.5407692016719763e-05, | |
| "loss": 0.01, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.774224573214715, | |
| "grad_norm": 0.06155667454004288, | |
| "learning_rate": 1.538413492776928e-05, | |
| "loss": 0.0103, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.7758275226416607, | |
| "grad_norm": 0.08188408613204956, | |
| "learning_rate": 1.5360535687556097e-05, | |
| "loss": 0.0102, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.7774304720686063, | |
| "grad_norm": 0.06689083576202393, | |
| "learning_rate": 1.533689448083372e-05, | |
| "loss": 0.0104, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.7790334214955518, | |
| "grad_norm": 0.09438969194889069, | |
| "learning_rate": 1.5313211492684193e-05, | |
| "loss": 0.0095, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.7806363709224974, | |
| "grad_norm": 0.05713031813502312, | |
| "learning_rate": 1.5289486908516665e-05, | |
| "loss": 0.0097, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.782239320349443, | |
| "grad_norm": 0.0832870751619339, | |
| "learning_rate": 1.5265720914065925e-05, | |
| "loss": 0.0114, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.7838422697763886, | |
| "grad_norm": 0.06851907074451447, | |
| "learning_rate": 1.5241913695390957e-05, | |
| "loss": 0.0103, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.7854452192033341, | |
| "grad_norm": 0.07114174216985703, | |
| "learning_rate": 1.5218065438873484e-05, | |
| "loss": 0.0105, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.7870481686302797, | |
| "grad_norm": 0.06810145080089569, | |
| "learning_rate": 1.5194176331216496e-05, | |
| "loss": 0.0098, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.7886511180572253, | |
| "grad_norm": 0.08911102265119553, | |
| "learning_rate": 1.5170246559442815e-05, | |
| "loss": 0.0104, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.7902540674841708, | |
| "grad_norm": 0.07687364518642426, | |
| "learning_rate": 1.5146276310893594e-05, | |
| "loss": 0.0106, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.7918570169111164, | |
| "grad_norm": 0.07641691714525223, | |
| "learning_rate": 1.5122265773226886e-05, | |
| "loss": 0.0094, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.793459966338062, | |
| "grad_norm": 0.07674935460090637, | |
| "learning_rate": 1.5098215134416148e-05, | |
| "loss": 0.0098, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.7950629157650076, | |
| "grad_norm": 0.0895194262266159, | |
| "learning_rate": 1.5074124582748785e-05, | |
| "loss": 0.0107, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.7966658651919531, | |
| "grad_norm": 0.07801928371191025, | |
| "learning_rate": 1.5049994306824678e-05, | |
| "loss": 0.0095, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.7982688146188988, | |
| "grad_norm": 0.08184653520584106, | |
| "learning_rate": 1.5025824495554688e-05, | |
| "loss": 0.0096, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.7998717640458444, | |
| "grad_norm": 0.07062246650457382, | |
| "learning_rate": 1.5001615338159198e-05, | |
| "loss": 0.0104, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.80147471347279, | |
| "grad_norm": 0.06757838279008865, | |
| "learning_rate": 1.497736702416662e-05, | |
| "loss": 0.0099, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.8030776628997355, | |
| "grad_norm": 0.08436329662799835, | |
| "learning_rate": 1.4953079743411922e-05, | |
| "loss": 0.0102, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.8046806123266811, | |
| "grad_norm": 0.0803229808807373, | |
| "learning_rate": 1.4928753686035128e-05, | |
| "loss": 0.01, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.8062835617536267, | |
| "grad_norm": 0.08634204417467117, | |
| "learning_rate": 1.4904389042479831e-05, | |
| "loss": 0.0094, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.8078865111805722, | |
| "grad_norm": 0.07308026403188705, | |
| "learning_rate": 1.4879986003491722e-05, | |
| "loss": 0.0103, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.8094894606075178, | |
| "grad_norm": 0.07339402288198471, | |
| "learning_rate": 1.4855544760117064e-05, | |
| "loss": 0.0102, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.8110924100344634, | |
| "grad_norm": 0.06620003283023834, | |
| "learning_rate": 1.4831065503701234e-05, | |
| "loss": 0.0093, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.812695359461409, | |
| "grad_norm": 0.0636444240808487, | |
| "learning_rate": 1.4806548425887186e-05, | |
| "loss": 0.0105, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.8142983088883545, | |
| "grad_norm": 0.06956276297569275, | |
| "learning_rate": 1.4781993718613983e-05, | |
| "loss": 0.0099, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.8159012583153001, | |
| "grad_norm": 0.06907407194375992, | |
| "learning_rate": 1.475740157411527e-05, | |
| "loss": 0.0121, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.8175042077422457, | |
| "grad_norm": 0.06344885379076004, | |
| "learning_rate": 1.4732772184917795e-05, | |
| "loss": 0.0097, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.8191071571691914, | |
| "grad_norm": 0.08058324456214905, | |
| "learning_rate": 1.4708105743839876e-05, | |
| "loss": 0.0101, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.8207101065961369, | |
| "grad_norm": 0.07553695142269135, | |
| "learning_rate": 1.46834024439899e-05, | |
| "loss": 0.0096, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.8223130560230825, | |
| "grad_norm": 0.06311500817537308, | |
| "learning_rate": 1.4658662478764823e-05, | |
| "loss": 0.0108, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.8239160054500281, | |
| "grad_norm": 0.07880297303199768, | |
| "learning_rate": 1.463388604184864e-05, | |
| "loss": 0.0096, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.8255189548769736, | |
| "grad_norm": 0.07937072962522507, | |
| "learning_rate": 1.4609073327210879e-05, | |
| "loss": 0.0105, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8271219043039192, | |
| "grad_norm": 0.07063055038452148, | |
| "learning_rate": 1.4584224529105077e-05, | |
| "loss": 0.0108, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8287248537308648, | |
| "grad_norm": 0.07742191851139069, | |
| "learning_rate": 1.4559339842067259e-05, | |
| "loss": 0.0108, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8303278031578104, | |
| "grad_norm": 0.07162066549062729, | |
| "learning_rate": 1.453441946091442e-05, | |
| "loss": 0.0095, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8319307525847559, | |
| "grad_norm": 0.0674009919166565, | |
| "learning_rate": 1.4509463580742993e-05, | |
| "loss": 0.0102, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8335337020117015, | |
| "grad_norm": 0.07282152771949768, | |
| "learning_rate": 1.4484472396927334e-05, | |
| "loss": 0.0093, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8351366514386471, | |
| "grad_norm": 0.07375837862491608, | |
| "learning_rate": 1.4459446105118171e-05, | |
| "loss": 0.0099, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8367396008655927, | |
| "grad_norm": 0.0662304237484932, | |
| "learning_rate": 1.4434384901241096e-05, | |
| "loss": 0.0099, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.8383425502925382, | |
| "grad_norm": 0.0746363177895546, | |
| "learning_rate": 1.4409288981495011e-05, | |
| "loss": 0.0094, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.8399454997194838, | |
| "grad_norm": 0.09581029415130615, | |
| "learning_rate": 1.438415854235061e-05, | |
| "loss": 0.0102, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.8415484491464295, | |
| "grad_norm": 0.08328843116760254, | |
| "learning_rate": 1.4358993780548832e-05, | |
| "loss": 0.0095, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.843151398573375, | |
| "grad_norm": 0.06462030857801437, | |
| "learning_rate": 1.433379489309931e-05, | |
| "loss": 0.0097, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.8447543480003206, | |
| "grad_norm": 0.09732311964035034, | |
| "learning_rate": 1.4308562077278854e-05, | |
| "loss": 0.0109, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.8463572974272662, | |
| "grad_norm": 0.06178658828139305, | |
| "learning_rate": 1.4283295530629877e-05, | |
| "loss": 0.0094, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.8479602468542118, | |
| "grad_norm": 0.07857873290777206, | |
| "learning_rate": 1.4257995450958877e-05, | |
| "loss": 0.0105, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.8495631962811573, | |
| "grad_norm": 0.0908777043223381, | |
| "learning_rate": 1.423266203633487e-05, | |
| "loss": 0.0102, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.8511661457081029, | |
| "grad_norm": 0.0654769092798233, | |
| "learning_rate": 1.4207295485087837e-05, | |
| "loss": 0.0096, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.8527690951350485, | |
| "grad_norm": 0.06554923206567764, | |
| "learning_rate": 1.4181895995807193e-05, | |
| "loss": 0.0089, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.8543720445619941, | |
| "grad_norm": 0.07311050593852997, | |
| "learning_rate": 1.415646376734021e-05, | |
| "loss": 0.0091, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.8559749939889396, | |
| "grad_norm": 0.07530997693538666, | |
| "learning_rate": 1.4130998998790464e-05, | |
| "loss": 0.0104, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.8575779434158852, | |
| "grad_norm": 0.06178179755806923, | |
| "learning_rate": 1.4105501889516288e-05, | |
| "loss": 0.0095, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.8591808928428308, | |
| "grad_norm": 0.07013700902462006, | |
| "learning_rate": 1.4079972639129204e-05, | |
| "loss": 0.0095, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.8607838422697763, | |
| "grad_norm": 0.06522087752819061, | |
| "learning_rate": 1.4054411447492352e-05, | |
| "loss": 0.0092, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.8623867916967219, | |
| "grad_norm": 0.06943648308515549, | |
| "learning_rate": 1.4028818514718936e-05, | |
| "loss": 0.0093, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.8639897411236676, | |
| "grad_norm": 0.06666680425405502, | |
| "learning_rate": 1.4003194041170665e-05, | |
| "loss": 0.01, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.8655926905506132, | |
| "grad_norm": 0.06522834300994873, | |
| "learning_rate": 1.397753822745616e-05, | |
| "loss": 0.0095, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.8671956399775587, | |
| "grad_norm": 0.05904649198055267, | |
| "learning_rate": 1.3951851274429409e-05, | |
| "loss": 0.0096, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.8687985894045043, | |
| "grad_norm": 0.06921979039907455, | |
| "learning_rate": 1.392613338318817e-05, | |
| "loss": 0.0102, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.8704015388314499, | |
| "grad_norm": 0.06601817160844803, | |
| "learning_rate": 1.3900384755072424e-05, | |
| "loss": 0.0102, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.8720044882583955, | |
| "grad_norm": 0.07253236323595047, | |
| "learning_rate": 1.3874605591662778e-05, | |
| "loss": 0.0095, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.873607437685341, | |
| "grad_norm": 0.07604202628135681, | |
| "learning_rate": 1.384879609477889e-05, | |
| "loss": 0.0095, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.8752103871122866, | |
| "grad_norm": 0.0852079764008522, | |
| "learning_rate": 1.38229564664779e-05, | |
| "loss": 0.0101, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.8768133365392322, | |
| "grad_norm": 0.06676860898733139, | |
| "learning_rate": 1.379708690905283e-05, | |
| "loss": 0.0092, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.8784162859661777, | |
| "grad_norm": 0.06362950801849365, | |
| "learning_rate": 1.3771187625031027e-05, | |
| "loss": 0.0098, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.8800192353931233, | |
| "grad_norm": 0.06741517037153244, | |
| "learning_rate": 1.3745258817172544e-05, | |
| "loss": 0.0101, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.8816221848200689, | |
| "grad_norm": 0.0978483185172081, | |
| "learning_rate": 1.371930068846858e-05, | |
| "loss": 0.01, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.8832251342470145, | |
| "grad_norm": 0.05989569425582886, | |
| "learning_rate": 1.3693313442139877e-05, | |
| "loss": 0.0091, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.88482808367396, | |
| "grad_norm": 0.07173648476600647, | |
| "learning_rate": 1.3667297281635135e-05, | |
| "loss": 0.0102, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.8864310331009057, | |
| "grad_norm": 0.06889471411705017, | |
| "learning_rate": 1.364125241062942e-05, | |
| "loss": 0.0095, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.8880339825278513, | |
| "grad_norm": 0.05711568892002106, | |
| "learning_rate": 1.3615179033022556e-05, | |
| "loss": 0.0099, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.8896369319547969, | |
| "grad_norm": 0.07180725783109665, | |
| "learning_rate": 1.3589077352937552e-05, | |
| "loss": 0.0103, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.8912398813817424, | |
| "grad_norm": 0.06540070474147797, | |
| "learning_rate": 1.3562947574718977e-05, | |
| "loss": 0.0083, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.892842830808688, | |
| "grad_norm": 0.07605359703302383, | |
| "learning_rate": 1.3536789902931391e-05, | |
| "loss": 0.0106, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.8944457802356336, | |
| "grad_norm": 0.06887410581111908, | |
| "learning_rate": 1.351060454235771e-05, | |
| "loss": 0.0093, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.8960487296625791, | |
| "grad_norm": 0.06430304050445557, | |
| "learning_rate": 1.3484391697997637e-05, | |
| "loss": 0.009, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.8976516790895247, | |
| "grad_norm": 0.08205439895391464, | |
| "learning_rate": 1.3458151575066025e-05, | |
| "loss": 0.0095, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.8992546285164703, | |
| "grad_norm": 0.06184590607881546, | |
| "learning_rate": 1.343188437899129e-05, | |
| "loss": 0.0093, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.9008575779434159, | |
| "grad_norm": 0.06461317837238312, | |
| "learning_rate": 1.34055903154138e-05, | |
| "loss": 0.0093, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.9024605273703614, | |
| "grad_norm": 0.0797036737203598, | |
| "learning_rate": 1.3379269590184264e-05, | |
| "loss": 0.0092, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.904063476797307, | |
| "grad_norm": 0.07450596988201141, | |
| "learning_rate": 1.3352922409362122e-05, | |
| "loss": 0.0098, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.9056664262242526, | |
| "grad_norm": 0.06656588613986969, | |
| "learning_rate": 1.332654897921391e-05, | |
| "loss": 0.0097, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.9072693756511983, | |
| "grad_norm": 0.08003687113523483, | |
| "learning_rate": 1.3300149506211693e-05, | |
| "loss": 0.0097, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.9088723250781438, | |
| "grad_norm": 0.0784405767917633, | |
| "learning_rate": 1.32737241970314e-05, | |
| "loss": 0.0104, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.9104752745050894, | |
| "grad_norm": 0.07136031985282898, | |
| "learning_rate": 1.3247273258551236e-05, | |
| "loss": 0.0086, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.912078223932035, | |
| "grad_norm": 0.06181903928518295, | |
| "learning_rate": 1.3220796897850045e-05, | |
| "loss": 0.0085, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.9136811733589805, | |
| "grad_norm": 0.06976137310266495, | |
| "learning_rate": 1.3194295322205698e-05, | |
| "loss": 0.0099, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.9152841227859261, | |
| "grad_norm": 0.06435976177453995, | |
| "learning_rate": 1.3167768739093479e-05, | |
| "loss": 0.0095, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.9168870722128717, | |
| "grad_norm": 0.06821715831756592, | |
| "learning_rate": 1.314121735618443e-05, | |
| "loss": 0.0093, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.9184900216398173, | |
| "grad_norm": 0.07428358495235443, | |
| "learning_rate": 1.3114641381343767e-05, | |
| "loss": 0.0099, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.9200929710667628, | |
| "grad_norm": 0.06527336686849594, | |
| "learning_rate": 1.3088041022629217e-05, | |
| "loss": 0.0089, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.9216959204937084, | |
| "grad_norm": 0.0690665915608406, | |
| "learning_rate": 1.3061416488289407e-05, | |
| "loss": 0.0089, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.923298869920654, | |
| "grad_norm": 0.06167382001876831, | |
| "learning_rate": 1.3034767986762229e-05, | |
| "loss": 0.0094, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.9249018193475996, | |
| "grad_norm": 0.08395292609930038, | |
| "learning_rate": 1.3008095726673214e-05, | |
| "loss": 0.0097, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.9265047687745451, | |
| "grad_norm": 0.07569417357444763, | |
| "learning_rate": 1.2981399916833888e-05, | |
| "loss": 0.0096, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.9281077182014907, | |
| "grad_norm": 0.07300177216529846, | |
| "learning_rate": 1.2954680766240146e-05, | |
| "loss": 0.0093, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.9297106676284363, | |
| "grad_norm": 0.05430857837200165, | |
| "learning_rate": 1.2927938484070608e-05, | |
| "loss": 0.0084, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9313136170553818, | |
| "grad_norm": 0.06711985915899277, | |
| "learning_rate": 1.2901173279684998e-05, | |
| "loss": 0.0098, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9329165664823275, | |
| "grad_norm": 0.05374586954712868, | |
| "learning_rate": 1.2874385362622476e-05, | |
| "loss": 0.0086, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9345195159092731, | |
| "grad_norm": 0.062482576817274094, | |
| "learning_rate": 1.2847574942600037e-05, | |
| "loss": 0.0099, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9361224653362187, | |
| "grad_norm": 0.06739748269319534, | |
| "learning_rate": 1.2820742229510818e-05, | |
| "loss": 0.0094, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9377254147631642, | |
| "grad_norm": 0.0729333832859993, | |
| "learning_rate": 1.2793887433422515e-05, | |
| "loss": 0.0105, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9393283641901098, | |
| "grad_norm": 0.061774469912052155, | |
| "learning_rate": 1.276701076457568e-05, | |
| "loss": 0.009, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.9409313136170554, | |
| "grad_norm": 0.06991413980722427, | |
| "learning_rate": 1.2740112433382124e-05, | |
| "loss": 0.009, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.942534263044001, | |
| "grad_norm": 0.06478750705718994, | |
| "learning_rate": 1.2713192650423234e-05, | |
| "loss": 0.0093, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.9441372124709465, | |
| "grad_norm": 0.05868879333138466, | |
| "learning_rate": 1.2686251626448341e-05, | |
| "loss": 0.0091, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.9457401618978921, | |
| "grad_norm": 0.06809740513563156, | |
| "learning_rate": 1.2659289572373072e-05, | |
| "loss": 0.0099, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.9473431113248377, | |
| "grad_norm": 0.06697957962751389, | |
| "learning_rate": 1.263230669927769e-05, | |
| "loss": 0.0094, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.9489460607517832, | |
| "grad_norm": 0.07428871095180511, | |
| "learning_rate": 1.2605303218405449e-05, | |
| "loss": 0.0101, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.9505490101787288, | |
| "grad_norm": 0.06630957126617432, | |
| "learning_rate": 1.2578279341160933e-05, | |
| "loss": 0.0091, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.9521519596056744, | |
| "grad_norm": 0.07631520181894302, | |
| "learning_rate": 1.2551235279108407e-05, | |
| "loss": 0.009, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.9537549090326201, | |
| "grad_norm": 0.059855490922927856, | |
| "learning_rate": 1.2524171243970163e-05, | |
| "loss": 0.0095, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.9553578584595656, | |
| "grad_norm": 0.0653349757194519, | |
| "learning_rate": 1.2497087447624844e-05, | |
| "loss": 0.0105, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.9569608078865112, | |
| "grad_norm": 0.06262225657701492, | |
| "learning_rate": 1.2469984102105821e-05, | |
| "loss": 0.0083, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.9585637573134568, | |
| "grad_norm": 0.06356295198202133, | |
| "learning_rate": 1.2442861419599492e-05, | |
| "loss": 0.0105, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.9601667067404024, | |
| "grad_norm": 0.06270481646060944, | |
| "learning_rate": 1.2415719612443651e-05, | |
| "loss": 0.0092, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.9617696561673479, | |
| "grad_norm": 0.06081055477261543, | |
| "learning_rate": 1.2388558893125806e-05, | |
| "loss": 0.0083, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.9633726055942935, | |
| "grad_norm": 0.06876713782548904, | |
| "learning_rate": 1.2361379474281536e-05, | |
| "loss": 0.0097, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.9649755550212391, | |
| "grad_norm": 0.06346289068460464, | |
| "learning_rate": 1.233418156869281e-05, | |
| "loss": 0.0099, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.9665785044481846, | |
| "grad_norm": 1.9258140325546265, | |
| "learning_rate": 1.2306965389286316e-05, | |
| "loss": 0.0118, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.9681814538751302, | |
| "grad_norm": 0.0953875482082367, | |
| "learning_rate": 1.2279731149131821e-05, | |
| "loss": 0.0196, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.9697844033020758, | |
| "grad_norm": 0.09784775227308273, | |
| "learning_rate": 1.225247906144047e-05, | |
| "loss": 0.0116, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.9713873527290214, | |
| "grad_norm": 0.12490657716989517, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.0114, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.9729903021559669, | |
| "grad_norm": 0.0873861089348793, | |
| "learning_rate": 1.2197922196988776e-05, | |
| "loss": 0.0104, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.9745932515829125, | |
| "grad_norm": 0.06704119592905045, | |
| "learning_rate": 1.2170617847342673e-05, | |
| "loss": 0.0094, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.9761962010098582, | |
| "grad_norm": 0.0751587525010109, | |
| "learning_rate": 1.2143296504384868e-05, | |
| "loss": 0.0094, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.9777991504368038, | |
| "grad_norm": 0.07341291010379791, | |
| "learning_rate": 1.2115958382008414e-05, | |
| "loss": 0.0099, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.9794020998637493, | |
| "grad_norm": 0.06904918700456619, | |
| "learning_rate": 1.2088603694237744e-05, | |
| "loss": 0.0099, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.9810050492906949, | |
| "grad_norm": 0.06313113123178482, | |
| "learning_rate": 1.2061232655226964e-05, | |
| "loss": 0.0089, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.9826079987176405, | |
| "grad_norm": 0.08169377595186234, | |
| "learning_rate": 1.2033845479258197e-05, | |
| "loss": 0.0086, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.984210948144586, | |
| "grad_norm": 0.06288773566484451, | |
| "learning_rate": 1.2006442380739896e-05, | |
| "loss": 0.0089, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.9858138975715316, | |
| "grad_norm": 0.07561258971691132, | |
| "learning_rate": 1.197902357420517e-05, | |
| "loss": 0.0102, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.9874168469984772, | |
| "grad_norm": 0.06604144722223282, | |
| "learning_rate": 1.1951589274310105e-05, | |
| "loss": 0.0092, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.9890197964254228, | |
| "grad_norm": 0.0782618299126625, | |
| "learning_rate": 1.1924139695832077e-05, | |
| "loss": 0.0094, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.9906227458523683, | |
| "grad_norm": 0.08666810393333435, | |
| "learning_rate": 1.189667505366808e-05, | |
| "loss": 0.0095, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.9922256952793139, | |
| "grad_norm": 0.08879975974559784, | |
| "learning_rate": 1.1869195562833027e-05, | |
| "loss": 0.009, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.9938286447062595, | |
| "grad_norm": 0.08351059257984161, | |
| "learning_rate": 1.1841701438458092e-05, | |
| "loss": 0.0095, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.9954315941332051, | |
| "grad_norm": 0.07370386272668839, | |
| "learning_rate": 1.181419289578901e-05, | |
| "loss": 0.0099, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.9970345435601506, | |
| "grad_norm": 0.07470313459634781, | |
| "learning_rate": 1.1786670150184381e-05, | |
| "loss": 0.0093, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.9986374929870963, | |
| "grad_norm": 0.06986988335847855, | |
| "learning_rate": 1.1759133417114013e-05, | |
| "loss": 0.0094, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.0001602949426946, | |
| "grad_norm": 0.032465722411870956, | |
| "learning_rate": 1.1731582912157206e-05, | |
| "loss": 0.0086, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.00176324436964, | |
| "grad_norm": 0.03515005484223366, | |
| "learning_rate": 1.170401885100109e-05, | |
| "loss": 0.0083, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.0033661937965856, | |
| "grad_norm": 0.05306578055024147, | |
| "learning_rate": 1.1676441449438908e-05, | |
| "loss": 0.0082, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.0049691432235313, | |
| "grad_norm": 0.047804247587919235, | |
| "learning_rate": 1.164885092336836e-05, | |
| "loss": 0.0079, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.0065720926504769, | |
| "grad_norm": 0.052819378674030304, | |
| "learning_rate": 1.1621247488789878e-05, | |
| "loss": 0.0077, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.0081750420774225, | |
| "grad_norm": 0.04436744377017021, | |
| "learning_rate": 1.159363136180496e-05, | |
| "loss": 0.0078, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.009777991504368, | |
| "grad_norm": 0.0423140823841095, | |
| "learning_rate": 1.1566002758614476e-05, | |
| "loss": 0.0081, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.0113809409313137, | |
| "grad_norm": 0.0411105714738369, | |
| "learning_rate": 1.153836189551696e-05, | |
| "loss": 0.0076, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.012983890358259, | |
| "grad_norm": 0.048951249569654465, | |
| "learning_rate": 1.151070898890693e-05, | |
| "loss": 0.0087, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.0145868397852047, | |
| "grad_norm": 0.04871077463030815, | |
| "learning_rate": 1.148304425527319e-05, | |
| "loss": 0.0083, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.0161897892121503, | |
| "grad_norm": 0.04017919674515724, | |
| "learning_rate": 1.1455367911197137e-05, | |
| "loss": 0.0092, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.017792738639096, | |
| "grad_norm": 0.04326304793357849, | |
| "learning_rate": 1.1427680173351057e-05, | |
| "loss": 0.0082, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.0193956880660415, | |
| "grad_norm": 0.04374143108725548, | |
| "learning_rate": 1.1399981258496447e-05, | |
| "loss": 0.007, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.0209986374929871, | |
| "grad_norm": 0.04934844747185707, | |
| "learning_rate": 1.1372271383482293e-05, | |
| "loss": 0.0085, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.0226015869199327, | |
| "grad_norm": 0.05297398567199707, | |
| "learning_rate": 1.1344550765243398e-05, | |
| "loss": 0.0088, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.0242045363468784, | |
| "grad_norm": 0.048744361847639084, | |
| "learning_rate": 1.1316819620798665e-05, | |
| "loss": 0.0079, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.0258074857738237, | |
| "grad_norm": 0.04246707633137703, | |
| "learning_rate": 1.1289078167249403e-05, | |
| "loss": 0.0088, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.0274104352007694, | |
| "grad_norm": 0.060280464589595795, | |
| "learning_rate": 1.1261326621777635e-05, | |
| "loss": 0.0073, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.029013384627715, | |
| "grad_norm": 0.09698043763637543, | |
| "learning_rate": 1.1233565201644383e-05, | |
| "loss": 0.0083, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.0306163340546606, | |
| "grad_norm": 0.045573972165584564, | |
| "learning_rate": 1.1205794124187985e-05, | |
| "loss": 0.0081, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.0322192834816062, | |
| "grad_norm": 0.04904637858271599, | |
| "learning_rate": 1.117801360682238e-05, | |
| "loss": 0.0093, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.0338222329085518, | |
| "grad_norm": 0.06047618016600609, | |
| "learning_rate": 1.1150223867035405e-05, | |
| "loss": 0.0091, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.0354251823354974, | |
| "grad_norm": 0.04516315832734108, | |
| "learning_rate": 1.112242512238711e-05, | |
| "loss": 0.0082, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.0370281317624428, | |
| "grad_norm": 0.0490105003118515, | |
| "learning_rate": 1.1094617590508025e-05, | |
| "loss": 0.0082, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.0386310811893884, | |
| "grad_norm": 0.0697263553738594, | |
| "learning_rate": 1.106680148909749e-05, | |
| "loss": 0.0085, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.040234030616334, | |
| "grad_norm": 0.04336274787783623, | |
| "learning_rate": 1.1038977035921921e-05, | |
| "loss": 0.0077, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.0418369800432796, | |
| "grad_norm": 0.043528806418180466, | |
| "learning_rate": 1.1011144448813129e-05, | |
| "loss": 0.0082, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0434399294702252, | |
| "grad_norm": 0.036150723695755005, | |
| "learning_rate": 1.0983303945666599e-05, | |
| "loss": 0.0079, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.0450428788971708, | |
| "grad_norm": 0.057397518306970596, | |
| "learning_rate": 1.0955455744439782e-05, | |
| "loss": 0.0076, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.0466458283241165, | |
| "grad_norm": 0.04161163419485092, | |
| "learning_rate": 1.0927600063150413e-05, | |
| "loss": 0.007, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.0482487777510618, | |
| "grad_norm": 0.03657936677336693, | |
| "learning_rate": 1.0899737119874769e-05, | |
| "loss": 0.0078, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.0498517271780075, | |
| "grad_norm": 0.04080647602677345, | |
| "learning_rate": 1.0871867132745989e-05, | |
| "loss": 0.0075, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.051454676604953, | |
| "grad_norm": 0.04533609747886658, | |
| "learning_rate": 1.0843990319952351e-05, | |
| "loss": 0.0082, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.0530576260318987, | |
| "grad_norm": 0.05578101426362991, | |
| "learning_rate": 1.0816106899735579e-05, | |
| "loss": 0.0082, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.0546605754588443, | |
| "grad_norm": 0.04739479348063469, | |
| "learning_rate": 1.078821709038912e-05, | |
| "loss": 0.0087, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.05626352488579, | |
| "grad_norm": 0.04615991190075874, | |
| "learning_rate": 1.0760321110256436e-05, | |
| "loss": 0.0091, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.0578664743127355, | |
| "grad_norm": 0.05354125425219536, | |
| "learning_rate": 1.0732419177729303e-05, | |
| "loss": 0.0071, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.0594694237396811, | |
| "grad_norm": 0.040618497878313065, | |
| "learning_rate": 1.0704511511246096e-05, | |
| "loss": 0.0079, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.0610723731666265, | |
| "grad_norm": 0.043357282876968384, | |
| "learning_rate": 1.0676598329290087e-05, | |
| "loss": 0.0079, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.0626753225935721, | |
| "grad_norm": 0.033633410930633545, | |
| "learning_rate": 1.064867985038771e-05, | |
| "loss": 0.0078, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.0642782720205177, | |
| "grad_norm": 0.05955130606889725, | |
| "learning_rate": 1.0620756293106891e-05, | |
| "loss": 0.008, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.0658812214474633, | |
| "grad_norm": 0.04183583706617355, | |
| "learning_rate": 1.0592827876055291e-05, | |
| "loss": 0.0077, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.067484170874409, | |
| "grad_norm": 0.05350350961089134, | |
| "learning_rate": 1.0564894817878632e-05, | |
| "loss": 0.0077, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.0690871203013546, | |
| "grad_norm": 0.05788803473114967, | |
| "learning_rate": 1.0536957337258968e-05, | |
| "loss": 0.0077, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.0706900697283002, | |
| "grad_norm": 0.046094585210084915, | |
| "learning_rate": 1.0509015652912965e-05, | |
| "loss": 0.008, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.0722930191552456, | |
| "grad_norm": 0.05263133347034454, | |
| "learning_rate": 1.0481069983590222e-05, | |
| "loss": 0.0077, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.0738959685821912, | |
| "grad_norm": 0.04569438099861145, | |
| "learning_rate": 1.0453120548071503e-05, | |
| "loss": 0.0077, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.0754989180091368, | |
| "grad_norm": 0.054995764046907425, | |
| "learning_rate": 1.0425167565167085e-05, | |
| "loss": 0.008, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.0771018674360824, | |
| "grad_norm": 0.05096138268709183, | |
| "learning_rate": 1.0397211253715005e-05, | |
| "loss": 0.0075, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.078704816863028, | |
| "grad_norm": 0.04507224261760712, | |
| "learning_rate": 1.0369251832579362e-05, | |
| "loss": 0.0081, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.0803077662899736, | |
| "grad_norm": 0.046497710049152374, | |
| "learning_rate": 1.0341289520648591e-05, | |
| "loss": 0.0083, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.0819107157169192, | |
| "grad_norm": 0.04293447360396385, | |
| "learning_rate": 1.031332453683377e-05, | |
| "loss": 0.0081, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.0835136651438648, | |
| "grad_norm": 0.042333897203207016, | |
| "learning_rate": 1.028535710006689e-05, | |
| "loss": 0.0081, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.0851166145708102, | |
| "grad_norm": 0.036750372499227524, | |
| "learning_rate": 1.0257387429299144e-05, | |
| "loss": 0.0088, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.0867195639977558, | |
| "grad_norm": 0.05808829143643379, | |
| "learning_rate": 1.0229415743499217e-05, | |
| "loss": 0.0081, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.0883225134247014, | |
| "grad_norm": 0.04959068074822426, | |
| "learning_rate": 1.0201442261651571e-05, | |
| "loss": 0.008, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.089925462851647, | |
| "grad_norm": 0.04482626914978027, | |
| "learning_rate": 1.017346720275472e-05, | |
| "loss": 0.0072, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.0915284122785927, | |
| "grad_norm": 0.03439100831747055, | |
| "learning_rate": 1.0145490785819537e-05, | |
| "loss": 0.0079, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.0931313617055383, | |
| "grad_norm": 0.053014714270830154, | |
| "learning_rate": 1.0117513229867515e-05, | |
| "loss": 0.0083, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.0947343111324839, | |
| "grad_norm": 0.043408554047346115, | |
| "learning_rate": 1.0089534753929073e-05, | |
| "loss": 0.0085, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.0963372605594293, | |
| "grad_norm": 0.044179074466228485, | |
| "learning_rate": 1.0061555577041828e-05, | |
| "loss": 0.008, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.0979402099863749, | |
| "grad_norm": 0.05316644906997681, | |
| "learning_rate": 1.0033575918248884e-05, | |
| "loss": 0.0084, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.0995431594133205, | |
| "grad_norm": 0.06040149927139282, | |
| "learning_rate": 1.0005595996597122e-05, | |
| "loss": 0.0081, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.101146108840266, | |
| "grad_norm": 0.040795937180519104, | |
| "learning_rate": 9.977616031135476e-06, | |
| "loss": 0.0083, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.1027490582672117, | |
| "grad_norm": 0.060764264315366745, | |
| "learning_rate": 9.949636240913228e-06, | |
| "loss": 0.0086, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.1043520076941573, | |
| "grad_norm": 0.04060014709830284, | |
| "learning_rate": 9.921656844978284e-06, | |
| "loss": 0.0073, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.105954957121103, | |
| "grad_norm": 0.04240868240594864, | |
| "learning_rate": 9.893678062375455e-06, | |
| "loss": 0.0085, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.1075579065480483, | |
| "grad_norm": 0.04362311586737633, | |
| "learning_rate": 9.865700112144776e-06, | |
| "loss": 0.0081, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.109160855974994, | |
| "grad_norm": 0.04985825717449188, | |
| "learning_rate": 9.83772321331974e-06, | |
| "loss": 0.0086, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.1107638054019395, | |
| "grad_norm": 0.04281558841466904, | |
| "learning_rate": 9.809747584925617e-06, | |
| "loss": 0.0079, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.1123667548288851, | |
| "grad_norm": 0.04530913755297661, | |
| "learning_rate": 9.781773445977737e-06, | |
| "loss": 0.0079, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.1139697042558308, | |
| "grad_norm": 0.05035943537950516, | |
| "learning_rate": 9.753801015479762e-06, | |
| "loss": 0.0076, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.1155726536827764, | |
| "grad_norm": 0.03706235811114311, | |
| "learning_rate": 9.725830512421981e-06, | |
| "loss": 0.0077, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.117175603109722, | |
| "grad_norm": 0.04904279112815857, | |
| "learning_rate": 9.697862155779593e-06, | |
| "loss": 0.0083, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.1187785525366674, | |
| "grad_norm": 0.03876826539635658, | |
| "learning_rate": 9.669896164510996e-06, | |
| "loss": 0.0083, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.120381501963613, | |
| "grad_norm": 0.04224033281207085, | |
| "learning_rate": 9.641932757556069e-06, | |
| "loss": 0.0086, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.1219844513905586, | |
| "grad_norm": 0.032640572637319565, | |
| "learning_rate": 9.613972153834451e-06, | |
| "loss": 0.0075, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.1235874008175042, | |
| "grad_norm": 0.054974623024463654, | |
| "learning_rate": 9.586014572243852e-06, | |
| "loss": 0.0081, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.1251903502444498, | |
| "grad_norm": 0.04727548360824585, | |
| "learning_rate": 9.558060231658308e-06, | |
| "loss": 0.0076, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.1267932996713954, | |
| "grad_norm": 0.049345288425683975, | |
| "learning_rate": 9.53010935092649e-06, | |
| "loss": 0.0072, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.128396249098341, | |
| "grad_norm": 0.045910853892564774, | |
| "learning_rate": 9.502162148869967e-06, | |
| "loss": 0.0078, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.1299991985252866, | |
| "grad_norm": 0.037328120321035385, | |
| "learning_rate": 9.474218844281533e-06, | |
| "loss": 0.0072, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.131602147952232, | |
| "grad_norm": 0.04577163606882095, | |
| "learning_rate": 9.446279655923451e-06, | |
| "loss": 0.0081, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.1332050973791776, | |
| "grad_norm": 0.0425073467195034, | |
| "learning_rate": 9.418344802525767e-06, | |
| "loss": 0.0081, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.1348080468061232, | |
| "grad_norm": 0.05351976305246353, | |
| "learning_rate": 9.390414502784586e-06, | |
| "loss": 0.0081, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.1364109962330688, | |
| "grad_norm": 0.052148912101984024, | |
| "learning_rate": 9.362488975360364e-06, | |
| "loss": 0.0083, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.1380139456600145, | |
| "grad_norm": 0.04858115687966347, | |
| "learning_rate": 9.334568438876198e-06, | |
| "loss": 0.0078, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.13961689508696, | |
| "grad_norm": 0.05193081498146057, | |
| "learning_rate": 9.306653111916105e-06, | |
| "loss": 0.0076, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.1412198445139057, | |
| "grad_norm": 0.041127193719148636, | |
| "learning_rate": 9.27874321302333e-06, | |
| "loss": 0.0078, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.142822793940851, | |
| "grad_norm": 0.0410025380551815, | |
| "learning_rate": 9.250838960698613e-06, | |
| "loss": 0.0071, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.1444257433677967, | |
| "grad_norm": 0.04048438370227814, | |
| "learning_rate": 9.222940573398485e-06, | |
| "loss": 0.0073, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.1460286927947423, | |
| "grad_norm": 0.039375003427267075, | |
| "learning_rate": 9.195048269533575e-06, | |
| "loss": 0.0074, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.147631642221688, | |
| "grad_norm": 0.04218687117099762, | |
| "learning_rate": 9.167162267466876e-06, | |
| "loss": 0.0077, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.1492345916486335, | |
| "grad_norm": 0.045502275228500366, | |
| "learning_rate": 9.139282785512046e-06, | |
| "loss": 0.0079, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.1508375410755791, | |
| "grad_norm": 0.03404016047716141, | |
| "learning_rate": 9.111410041931696e-06, | |
| "loss": 0.0078, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.1524404905025247, | |
| "grad_norm": 0.032071568071842194, | |
| "learning_rate": 9.083544254935696e-06, | |
| "loss": 0.0086, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.1540434399294703, | |
| "grad_norm": 0.059753019362688065, | |
| "learning_rate": 9.05568564267944e-06, | |
| "loss": 0.007, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.1556463893564157, | |
| "grad_norm": 0.03715592995285988, | |
| "learning_rate": 9.027834423262157e-06, | |
| "loss": 0.0078, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.1572493387833613, | |
| "grad_norm": 0.0512300506234169, | |
| "learning_rate": 8.999990814725204e-06, | |
| "loss": 0.0081, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.158852288210307, | |
| "grad_norm": 0.04520628973841667, | |
| "learning_rate": 8.972155035050351e-06, | |
| "loss": 0.0082, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.1604552376372526, | |
| "grad_norm": 0.0374721996486187, | |
| "learning_rate": 8.944327302158073e-06, | |
| "loss": 0.0075, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.1620581870641982, | |
| "grad_norm": 0.0465792752802372, | |
| "learning_rate": 8.91650783390585e-06, | |
| "loss": 0.0074, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.1636611364911438, | |
| "grad_norm": 0.06805742532014847, | |
| "learning_rate": 8.888696848086474e-06, | |
| "loss": 0.0077, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.1652640859180892, | |
| "grad_norm": 0.055558763444423676, | |
| "learning_rate": 8.860894562426308e-06, | |
| "loss": 0.0073, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.1668670353450348, | |
| "grad_norm": 0.057639673352241516, | |
| "learning_rate": 8.83310119458361e-06, | |
| "loss": 0.0076, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.1684699847719804, | |
| "grad_norm": 0.04337713494896889, | |
| "learning_rate": 8.805316962146835e-06, | |
| "loss": 0.0079, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.170072934198926, | |
| "grad_norm": 0.05388013273477554, | |
| "learning_rate": 8.777542082632906e-06, | |
| "loss": 0.0083, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.1716758836258716, | |
| "grad_norm": 0.04648788273334503, | |
| "learning_rate": 8.749776773485525e-06, | |
| "loss": 0.0082, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.1732788330528172, | |
| "grad_norm": 0.03778034448623657, | |
| "learning_rate": 8.722021252073471e-06, | |
| "loss": 0.0077, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.1748817824797628, | |
| "grad_norm": 0.049436796456575394, | |
| "learning_rate": 8.694275735688903e-06, | |
| "loss": 0.0077, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.1764847319067084, | |
| "grad_norm": 0.05889998748898506, | |
| "learning_rate": 8.666540441545643e-06, | |
| "loss": 0.0083, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.178087681333654, | |
| "grad_norm": 0.05097229406237602, | |
| "learning_rate": 8.63881558677749e-06, | |
| "loss": 0.0073, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.1796906307605994, | |
| "grad_norm": 0.04770096018910408, | |
| "learning_rate": 8.611101388436518e-06, | |
| "loss": 0.0077, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.181293580187545, | |
| "grad_norm": 0.04946606978774071, | |
| "learning_rate": 8.583398063491368e-06, | |
| "loss": 0.0074, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.1828965296144907, | |
| "grad_norm": 0.039538607001304626, | |
| "learning_rate": 8.55570582882556e-06, | |
| "loss": 0.0073, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.1844994790414363, | |
| "grad_norm": 0.04183432087302208, | |
| "learning_rate": 8.528024901235784e-06, | |
| "loss": 0.0082, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.1861024284683819, | |
| "grad_norm": 0.04200197011232376, | |
| "learning_rate": 8.500355497430223e-06, | |
| "loss": 0.0076, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.1877053778953275, | |
| "grad_norm": 0.043875742703676224, | |
| "learning_rate": 8.472697834026832e-06, | |
| "loss": 0.0074, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.1893083273222729, | |
| "grad_norm": 0.04134812578558922, | |
| "learning_rate": 8.445052127551647e-06, | |
| "loss": 0.0075, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.1909112767492185, | |
| "grad_norm": 0.07085922360420227, | |
| "learning_rate": 8.417418594437115e-06, | |
| "loss": 0.0079, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.192514226176164, | |
| "grad_norm": 0.037032246589660645, | |
| "learning_rate": 8.389797451020361e-06, | |
| "loss": 0.007, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.1941171756031097, | |
| "grad_norm": 0.0462561696767807, | |
| "learning_rate": 8.362188913541525e-06, | |
| "loss": 0.0076, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.1957201250300553, | |
| "grad_norm": 0.048092614859342575, | |
| "learning_rate": 8.334593198142049e-06, | |
| "loss": 0.0077, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.197323074457001, | |
| "grad_norm": 0.043921004980802536, | |
| "learning_rate": 8.307010520863008e-06, | |
| "loss": 0.0081, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.1989260238839465, | |
| "grad_norm": 0.048510029911994934, | |
| "learning_rate": 8.27944109764339e-06, | |
| "loss": 0.0076, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.2005289733108921, | |
| "grad_norm": 0.060406643897295, | |
| "learning_rate": 8.251885144318421e-06, | |
| "loss": 0.0082, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.2021319227378375, | |
| "grad_norm": 0.03776608407497406, | |
| "learning_rate": 8.224342876617887e-06, | |
| "loss": 0.0077, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.2037348721647831, | |
| "grad_norm": 0.04678969085216522, | |
| "learning_rate": 8.196814510164416e-06, | |
| "loss": 0.008, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.2053378215917288, | |
| "grad_norm": 0.04679039865732193, | |
| "learning_rate": 8.169300260471818e-06, | |
| "loss": 0.0081, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.2069407710186744, | |
| "grad_norm": 0.05291286110877991, | |
| "learning_rate": 8.141800342943375e-06, | |
| "loss": 0.008, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.20854372044562, | |
| "grad_norm": 0.0398259200155735, | |
| "learning_rate": 8.114314972870179e-06, | |
| "loss": 0.0081, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.2101466698725656, | |
| "grad_norm": 0.046976324170827866, | |
| "learning_rate": 8.086844365429421e-06, | |
| "loss": 0.0085, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.2117496192995112, | |
| "grad_norm": 0.032477136701345444, | |
| "learning_rate": 8.059388735682723e-06, | |
| "loss": 0.0071, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.2133525687264566, | |
| "grad_norm": 0.04444463923573494, | |
| "learning_rate": 8.031948298574452e-06, | |
| "loss": 0.0077, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.2149555181534022, | |
| "grad_norm": 0.055053021758794785, | |
| "learning_rate": 8.00452326893003e-06, | |
| "loss": 0.0076, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.2165584675803478, | |
| "grad_norm": 0.04440735653042793, | |
| "learning_rate": 7.977113861454265e-06, | |
| "loss": 0.0077, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.2181614170072934, | |
| "grad_norm": 0.038958437740802765, | |
| "learning_rate": 7.949720290729649e-06, | |
| "loss": 0.0077, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.219764366434239, | |
| "grad_norm": 0.04088424891233444, | |
| "learning_rate": 7.922342771214707e-06, | |
| "loss": 0.0073, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.2213673158611846, | |
| "grad_norm": 0.048896338790655136, | |
| "learning_rate": 7.894981517242293e-06, | |
| "loss": 0.0073, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.2229702652881302, | |
| "grad_norm": 0.03298410400748253, | |
| "learning_rate": 7.867636743017919e-06, | |
| "loss": 0.0081, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.2245732147150759, | |
| "grad_norm": 0.05103585496544838, | |
| "learning_rate": 7.840308662618096e-06, | |
| "loss": 0.0075, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.2261761641420212, | |
| "grad_norm": 0.0347796194255352, | |
| "learning_rate": 7.812997489988622e-06, | |
| "loss": 0.0079, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.2277791135689669, | |
| "grad_norm": 0.04016980156302452, | |
| "learning_rate": 7.785703438942941e-06, | |
| "loss": 0.0077, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.2293820629959125, | |
| "grad_norm": 0.049017369747161865, | |
| "learning_rate": 7.75842672316045e-06, | |
| "loss": 0.0066, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.230985012422858, | |
| "grad_norm": 0.050889965146780014, | |
| "learning_rate": 7.731167556184836e-06, | |
| "loss": 0.0074, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.2325879618498037, | |
| "grad_norm": 0.04382390156388283, | |
| "learning_rate": 7.7039261514224e-06, | |
| "loss": 0.0072, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.2341909112767493, | |
| "grad_norm": 0.042560938745737076, | |
| "learning_rate": 7.676702722140378e-06, | |
| "loss": 0.007, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.2357938607036947, | |
| "grad_norm": 0.047447387129068375, | |
| "learning_rate": 7.649497481465291e-06, | |
| "loss": 0.0077, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.2373968101306403, | |
| "grad_norm": 0.03900102153420448, | |
| "learning_rate": 7.622310642381261e-06, | |
| "loss": 0.0072, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.238999759557586, | |
| "grad_norm": 0.05226941406726837, | |
| "learning_rate": 7.595142417728344e-06, | |
| "loss": 0.0082, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.2406027089845315, | |
| "grad_norm": 0.045353468507528305, | |
| "learning_rate": 7.56799302020087e-06, | |
| "loss": 0.0079, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.2422056584114771, | |
| "grad_norm": 0.0446421317756176, | |
| "learning_rate": 7.54086266234578e-06, | |
| "loss": 0.0083, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.2438086078384227, | |
| "grad_norm": 0.037011098116636276, | |
| "learning_rate": 7.513751556560951e-06, | |
| "loss": 0.0078, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.2454115572653683, | |
| "grad_norm": 0.04144198074936867, | |
| "learning_rate": 7.486659915093537e-06, | |
| "loss": 0.008, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.247014506692314, | |
| "grad_norm": 0.03886372596025467, | |
| "learning_rate": 7.459587950038325e-06, | |
| "loss": 0.007, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.2486174561192596, | |
| "grad_norm": 0.045625023543834686, | |
| "learning_rate": 7.432535873336046e-06, | |
| "loss": 0.0074, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.250220405546205, | |
| "grad_norm": 0.04141170531511307, | |
| "learning_rate": 7.4055038967717286e-06, | |
| "loss": 0.0083, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.2518233549731506, | |
| "grad_norm": 0.0399341844022274, | |
| "learning_rate": 7.378492231973044e-06, | |
| "loss": 0.0073, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.2534263044000962, | |
| "grad_norm": 0.055795952677726746, | |
| "learning_rate": 7.351501090408658e-06, | |
| "loss": 0.0079, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.2550292538270418, | |
| "grad_norm": 0.059044260531663895, | |
| "learning_rate": 7.324530683386549e-06, | |
| "loss": 0.0078, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.2566322032539874, | |
| "grad_norm": 0.03669803962111473, | |
| "learning_rate": 7.297581222052373e-06, | |
| "loss": 0.0078, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.2582351526809328, | |
| "grad_norm": 0.036892443895339966, | |
| "learning_rate": 7.270652917387812e-06, | |
| "loss": 0.007, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.2598381021078784, | |
| "grad_norm": 0.0762961283326149, | |
| "learning_rate": 7.243745980208915e-06, | |
| "loss": 0.0074, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.261441051534824, | |
| "grad_norm": 0.04703805595636368, | |
| "learning_rate": 7.2168606211644435e-06, | |
| "loss": 0.0081, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.2630440009617696, | |
| "grad_norm": 0.07492291182279587, | |
| "learning_rate": 7.189997050734232e-06, | |
| "loss": 0.0076, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.2646469503887152, | |
| "grad_norm": 0.045667752623558044, | |
| "learning_rate": 7.16315547922754e-06, | |
| "loss": 0.0076, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.2662498998156608, | |
| "grad_norm": 0.0335981622338295, | |
| "learning_rate": 7.1363361167814e-06, | |
| "loss": 0.0088, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.2678528492426064, | |
| "grad_norm": 0.038948290050029755, | |
| "learning_rate": 7.109539173358968e-06, | |
| "loss": 0.0071, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.269455798669552, | |
| "grad_norm": 0.0461229644715786, | |
| "learning_rate": 7.082764858747899e-06, | |
| "loss": 0.0075, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.2710587480964977, | |
| "grad_norm": 0.037394460290670395, | |
| "learning_rate": 7.056013382558683e-06, | |
| "loss": 0.0076, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.2726616975234433, | |
| "grad_norm": 0.03177995607256889, | |
| "learning_rate": 7.02928495422301e-06, | |
| "loss": 0.007, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.2742646469503887, | |
| "grad_norm": 0.037877414375543594, | |
| "learning_rate": 7.002579782992138e-06, | |
| "loss": 0.0075, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.2758675963773343, | |
| "grad_norm": 0.04539757966995239, | |
| "learning_rate": 6.975898077935255e-06, | |
| "loss": 0.0076, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.2774705458042799, | |
| "grad_norm": 0.04658060148358345, | |
| "learning_rate": 6.949240047937828e-06, | |
| "loss": 0.0075, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.2790734952312255, | |
| "grad_norm": 0.051648322492837906, | |
| "learning_rate": 6.922605901699978e-06, | |
| "loss": 0.0073, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.280676444658171, | |
| "grad_norm": 0.05647370219230652, | |
| "learning_rate": 6.895995847734853e-06, | |
| "loss": 0.008, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.2822793940851165, | |
| "grad_norm": 0.03840125352144241, | |
| "learning_rate": 6.8694100943669815e-06, | |
| "loss": 0.0077, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.283882343512062, | |
| "grad_norm": 0.03707459568977356, | |
| "learning_rate": 6.842848849730647e-06, | |
| "loss": 0.0076, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.2854852929390077, | |
| "grad_norm": 0.04414204880595207, | |
| "learning_rate": 6.8163123217682584e-06, | |
| "loss": 0.0072, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.2870882423659533, | |
| "grad_norm": 0.051417384296655655, | |
| "learning_rate": 6.7898007182287294e-06, | |
| "loss": 0.0074, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.288691191792899, | |
| "grad_norm": 0.050453051924705505, | |
| "learning_rate": 6.763314246665842e-06, | |
| "loss": 0.0074, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.2902941412198445, | |
| "grad_norm": 0.05048830434679985, | |
| "learning_rate": 6.736853114436619e-06, | |
| "loss": 0.0075, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.2918970906467901, | |
| "grad_norm": 0.04227893427014351, | |
| "learning_rate": 6.710417528699722e-06, | |
| "loss": 0.0068, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.2935000400737358, | |
| "grad_norm": 0.043056413531303406, | |
| "learning_rate": 6.684007696413799e-06, | |
| "loss": 0.0078, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.2951029895006814, | |
| "grad_norm": 0.08075862377882004, | |
| "learning_rate": 6.6576238243358905e-06, | |
| "loss": 0.0073, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.2967059389276268, | |
| "grad_norm": 0.04923049733042717, | |
| "learning_rate": 6.631266119019786e-06, | |
| "loss": 0.0068, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.2983088883545724, | |
| "grad_norm": 0.04691435396671295, | |
| "learning_rate": 6.604934786814439e-06, | |
| "loss": 0.0076, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.299911837781518, | |
| "grad_norm": 0.053735796362161636, | |
| "learning_rate": 6.578630033862324e-06, | |
| "loss": 0.0079, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.3015147872084636, | |
| "grad_norm": 0.04722294583916664, | |
| "learning_rate": 6.552352066097829e-06, | |
| "loss": 0.008, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.3031177366354092, | |
| "grad_norm": 0.037270933389663696, | |
| "learning_rate": 6.5261010892456515e-06, | |
| "loss": 0.0075, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.3047206860623548, | |
| "grad_norm": 0.04054490104317665, | |
| "learning_rate": 6.499877308819184e-06, | |
| "loss": 0.0082, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.3063236354893002, | |
| "grad_norm": 0.04223480075597763, | |
| "learning_rate": 6.473680930118899e-06, | |
| "loss": 0.0081, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.3079265849162458, | |
| "grad_norm": 0.05101209506392479, | |
| "learning_rate": 6.447512158230746e-06, | |
| "loss": 0.0069, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.3095295343431914, | |
| "grad_norm": 0.05035392940044403, | |
| "learning_rate": 6.42137119802456e-06, | |
| "loss": 0.0072, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.311132483770137, | |
| "grad_norm": 0.04680299758911133, | |
| "learning_rate": 6.3952582541524235e-06, | |
| "loss": 0.0075, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.3127354331970826, | |
| "grad_norm": 0.044884469360113144, | |
| "learning_rate": 6.369173531047099e-06, | |
| "loss": 0.0071, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.3143383826240282, | |
| "grad_norm": 0.03105269744992256, | |
| "learning_rate": 6.343117232920407e-06, | |
| "loss": 0.0072, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.3159413320509739, | |
| "grad_norm": 0.041606560349464417, | |
| "learning_rate": 6.317089563761647e-06, | |
| "loss": 0.0076, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.3175442814779195, | |
| "grad_norm": 0.03596537187695503, | |
| "learning_rate": 6.291090727335974e-06, | |
| "loss": 0.0079, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.319147230904865, | |
| "grad_norm": 0.05035543814301491, | |
| "learning_rate": 6.265120927182824e-06, | |
| "loss": 0.0076, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.3207501803318105, | |
| "grad_norm": 0.036603864282369614, | |
| "learning_rate": 6.2391803666143145e-06, | |
| "loss": 0.0081, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.322353129758756, | |
| "grad_norm": 0.04323893412947655, | |
| "learning_rate": 6.213269248713653e-06, | |
| "loss": 0.0078, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.3239560791857017, | |
| "grad_norm": 0.03787427023053169, | |
| "learning_rate": 6.187387776333542e-06, | |
| "loss": 0.0073, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.3255590286126473, | |
| "grad_norm": 0.04349483549594879, | |
| "learning_rate": 6.161536152094598e-06, | |
| "loss": 0.0076, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.327161978039593, | |
| "grad_norm": 0.043585531413555145, | |
| "learning_rate": 6.135714578383769e-06, | |
| "loss": 0.0072, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.3287649274665383, | |
| "grad_norm": 0.03276698291301727, | |
| "learning_rate": 6.109923257352732e-06, | |
| "loss": 0.0077, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.330367876893484, | |
| "grad_norm": 0.04165283590555191, | |
| "learning_rate": 6.084162390916328e-06, | |
| "loss": 0.008, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.3319708263204295, | |
| "grad_norm": 0.05153461545705795, | |
| "learning_rate": 6.0584321807509825e-06, | |
| "loss": 0.0073, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.3335737757473751, | |
| "grad_norm": 0.049016524106264114, | |
| "learning_rate": 6.032732828293106e-06, | |
| "loss": 0.0071, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.3351767251743207, | |
| "grad_norm": 0.042667657136917114, | |
| "learning_rate": 6.007064534737538e-06, | |
| "loss": 0.007, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.3367796746012663, | |
| "grad_norm": 0.053493741899728775, | |
| "learning_rate": 5.981427501035959e-06, | |
| "loss": 0.008, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.338382624028212, | |
| "grad_norm": 0.04648403078317642, | |
| "learning_rate": 5.955821927895337e-06, | |
| "loss": 0.0072, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.3399855734551576, | |
| "grad_norm": 0.039376430213451385, | |
| "learning_rate": 5.930248015776325e-06, | |
| "loss": 0.0072, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.3415885228821032, | |
| "grad_norm": 0.043529435992240906, | |
| "learning_rate": 5.904705964891715e-06, | |
| "loss": 0.0073, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.3431914723090488, | |
| "grad_norm": 0.05400298163294792, | |
| "learning_rate": 5.8791959752048675e-06, | |
| "loss": 0.0073, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.3447944217359942, | |
| "grad_norm": 0.04469398036599159, | |
| "learning_rate": 5.853718246428137e-06, | |
| "loss": 0.0072, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.3463973711629398, | |
| "grad_norm": 0.053977783769369125, | |
| "learning_rate": 5.828272978021319e-06, | |
| "loss": 0.0078, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.3480003205898854, | |
| "grad_norm": 0.0440487302839756, | |
| "learning_rate": 5.802860369190076e-06, | |
| "loss": 0.0072, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.349603270016831, | |
| "grad_norm": 0.03750430420041084, | |
| "learning_rate": 5.7774806188843955e-06, | |
| "loss": 0.0078, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.3512062194437766, | |
| "grad_norm": 0.03685884550213814, | |
| "learning_rate": 5.7521339257970196e-06, | |
| "loss": 0.0076, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.352809168870722, | |
| "grad_norm": 0.04150310531258583, | |
| "learning_rate": 5.7268204883618836e-06, | |
| "loss": 0.0077, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.3544121182976676, | |
| "grad_norm": 0.03886445239186287, | |
| "learning_rate": 5.701540504752583e-06, | |
| "loss": 0.0086, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.3560150677246132, | |
| "grad_norm": 0.043425098061561584, | |
| "learning_rate": 5.6762941728808065e-06, | |
| "loss": 0.0074, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.3576180171515588, | |
| "grad_norm": 0.03988328576087952, | |
| "learning_rate": 5.651081690394784e-06, | |
| "loss": 0.0069, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.3592209665785044, | |
| "grad_norm": 0.04670213907957077, | |
| "learning_rate": 5.625903254677753e-06, | |
| "loss": 0.0086, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.36082391600545, | |
| "grad_norm": 0.04091748967766762, | |
| "learning_rate": 5.60075906284641e-06, | |
| "loss": 0.0074, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.3624268654323957, | |
| "grad_norm": 0.046860676258802414, | |
| "learning_rate": 5.575649311749348e-06, | |
| "loss": 0.0072, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.3640298148593413, | |
| "grad_norm": 0.036616772413253784, | |
| "learning_rate": 5.550574197965545e-06, | |
| "loss": 0.0067, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.3656327642862869, | |
| "grad_norm": 0.05461053550243378, | |
| "learning_rate": 5.525533917802806e-06, | |
| "loss": 0.0074, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.3672357137132323, | |
| "grad_norm": 0.03603074327111244, | |
| "learning_rate": 5.500528667296232e-06, | |
| "loss": 0.0076, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.3688386631401779, | |
| "grad_norm": 0.046030059456825256, | |
| "learning_rate": 5.4755586422066805e-06, | |
| "loss": 0.0084, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.3704416125671235, | |
| "grad_norm": 0.0493701696395874, | |
| "learning_rate": 5.450624038019232e-06, | |
| "loss": 0.0071, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.372044561994069, | |
| "grad_norm": 0.04193172603845596, | |
| "learning_rate": 5.425725049941686e-06, | |
| "loss": 0.0078, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.3736475114210147, | |
| "grad_norm": 0.03369879722595215, | |
| "learning_rate": 5.4008618729029846e-06, | |
| "loss": 0.0074, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.3752504608479603, | |
| "grad_norm": 0.05988788977265358, | |
| "learning_rate": 5.376034701551729e-06, | |
| "loss": 0.007, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.3768534102749057, | |
| "grad_norm": 0.058902859687805176, | |
| "learning_rate": 5.3512437302546365e-06, | |
| "loss": 0.0077, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.3784563597018513, | |
| "grad_norm": 0.05228348448872566, | |
| "learning_rate": 5.326489153095011e-06, | |
| "loss": 0.0074, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.380059309128797, | |
| "grad_norm": 0.03920851647853851, | |
| "learning_rate": 5.301771163871257e-06, | |
| "loss": 0.0073, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.3816622585557425, | |
| "grad_norm": 0.040967486798763275, | |
| "learning_rate": 5.277089956095312e-06, | |
| "loss": 0.0076, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.3832652079826881, | |
| "grad_norm": 0.04195632413029671, | |
| "learning_rate": 5.25244572299118e-06, | |
| "loss": 0.0073, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.3848681574096338, | |
| "grad_norm": 0.04842723160982132, | |
| "learning_rate": 5.227838657493396e-06, | |
| "loss": 0.0072, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.3864711068365794, | |
| "grad_norm": 0.035918042063713074, | |
| "learning_rate": 5.2032689522455e-06, | |
| "loss": 0.007, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.388074056263525, | |
| "grad_norm": 0.04247179999947548, | |
| "learning_rate": 5.178736799598574e-06, | |
| "loss": 0.0076, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.3896770056904706, | |
| "grad_norm": 0.03688638284802437, | |
| "learning_rate": 5.154242391609683e-06, | |
| "loss": 0.0074, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.391279955117416, | |
| "grad_norm": 0.04736698791384697, | |
| "learning_rate": 5.129785920040416e-06, | |
| "loss": 0.0085, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.3928829045443616, | |
| "grad_norm": 0.041601404547691345, | |
| "learning_rate": 5.105367576355351e-06, | |
| "loss": 0.0072, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.3944858539713072, | |
| "grad_norm": 0.03648248687386513, | |
| "learning_rate": 5.08098755172058e-06, | |
| "loss": 0.0064, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.3960888033982528, | |
| "grad_norm": 0.04284751042723656, | |
| "learning_rate": 5.056646037002205e-06, | |
| "loss": 0.0068, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.3976917528251984, | |
| "grad_norm": 0.047767430543899536, | |
| "learning_rate": 5.03234322276483e-06, | |
| "loss": 0.0081, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.3992947022521438, | |
| "grad_norm": 0.06886753439903259, | |
| "learning_rate": 5.008079299270091e-06, | |
| "loss": 0.0076, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.4008976516790894, | |
| "grad_norm": 0.03830573335289955, | |
| "learning_rate": 4.98385445647516e-06, | |
| "loss": 0.0072, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.402500601106035, | |
| "grad_norm": 0.04586140811443329, | |
| "learning_rate": 4.95966888403124e-06, | |
| "loss": 0.0071, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.4041035505329806, | |
| "grad_norm": 0.04326622560620308, | |
| "learning_rate": 4.935522771282108e-06, | |
| "loss": 0.0076, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.4057064999599262, | |
| "grad_norm": 0.0408218689262867, | |
| "learning_rate": 4.911416307262617e-06, | |
| "loss": 0.0077, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.4073094493868719, | |
| "grad_norm": 0.037510719150304794, | |
| "learning_rate": 4.887349680697208e-06, | |
| "loss": 0.0069, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.4089123988138175, | |
| "grad_norm": 0.04716578498482704, | |
| "learning_rate": 4.863323079998456e-06, | |
| "loss": 0.0079, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.410515348240763, | |
| "grad_norm": 0.043064896017313004, | |
| "learning_rate": 4.8393366932655774e-06, | |
| "loss": 0.0081, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.4121182976677087, | |
| "grad_norm": 0.03494073450565338, | |
| "learning_rate": 4.815390708282964e-06, | |
| "loss": 0.0075, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.4137212470946543, | |
| "grad_norm": 0.04706864804029465, | |
| "learning_rate": 4.791485312518701e-06, | |
| "loss": 0.0074, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.4153241965215997, | |
| "grad_norm": 0.0434531532227993, | |
| "learning_rate": 4.767620693123119e-06, | |
| "loss": 0.0073, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.4169271459485453, | |
| "grad_norm": 0.03783673048019409, | |
| "learning_rate": 4.7437970369273216e-06, | |
| "loss": 0.0077, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.418530095375491, | |
| "grad_norm": 0.03731882572174072, | |
| "learning_rate": 4.720014530441705e-06, | |
| "loss": 0.0081, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.4201330448024365, | |
| "grad_norm": 0.04272478446364403, | |
| "learning_rate": 4.696273359854528e-06, | |
| "loss": 0.0072, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.4217359942293821, | |
| "grad_norm": 0.0567503459751606, | |
| "learning_rate": 4.672573711030438e-06, | |
| "loss": 0.0071, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.4233389436563275, | |
| "grad_norm": 0.04293932020664215, | |
| "learning_rate": 4.6489157695090045e-06, | |
| "loss": 0.007, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.4249418930832731, | |
| "grad_norm": 0.037547074258327484, | |
| "learning_rate": 4.625299720503297e-06, | |
| "loss": 0.0068, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.4265448425102187, | |
| "grad_norm": 0.039693210273981094, | |
| "learning_rate": 4.601725748898395e-06, | |
| "loss": 0.007, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.4281477919371643, | |
| "grad_norm": 0.048341087996959686, | |
| "learning_rate": 4.578194039249992e-06, | |
| "loss": 0.0068, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.42975074136411, | |
| "grad_norm": 0.046204425394535065, | |
| "learning_rate": 4.554704775782899e-06, | |
| "loss": 0.0075, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.4313536907910556, | |
| "grad_norm": 0.03958141803741455, | |
| "learning_rate": 4.531258142389622e-06, | |
| "loss": 0.0075, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.4329566402180012, | |
| "grad_norm": 0.039660900831222534, | |
| "learning_rate": 4.5078543226289505e-06, | |
| "loss": 0.0072, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.4345595896449468, | |
| "grad_norm": 0.04643959179520607, | |
| "learning_rate": 4.484493499724468e-06, | |
| "loss": 0.007, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.4361625390718924, | |
| "grad_norm": 0.03679412230849266, | |
| "learning_rate": 4.461175856563164e-06, | |
| "loss": 0.0068, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.4377654884988378, | |
| "grad_norm": 0.03700454905629158, | |
| "learning_rate": 4.4379015756939646e-06, | |
| "loss": 0.0064, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.4393684379257834, | |
| "grad_norm": 0.03768878057599068, | |
| "learning_rate": 4.414670839326337e-06, | |
| "loss": 0.0067, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.440971387352729, | |
| "grad_norm": 0.056058432906866074, | |
| "learning_rate": 4.391483829328845e-06, | |
| "loss": 0.0069, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.4425743367796746, | |
| "grad_norm": 0.053892601281404495, | |
| "learning_rate": 4.368340727227719e-06, | |
| "loss": 0.0071, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.4441772862066202, | |
| "grad_norm": 0.053902119398117065, | |
| "learning_rate": 4.345241714205452e-06, | |
| "loss": 0.0078, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.4457802356335658, | |
| "grad_norm": 0.044995274394750595, | |
| "learning_rate": 4.322186971099373e-06, | |
| "loss": 0.007, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.4473831850605112, | |
| "grad_norm": 0.04146160930395126, | |
| "learning_rate": 4.299176678400225e-06, | |
| "loss": 0.0072, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.4489861344874568, | |
| "grad_norm": 0.03394712135195732, | |
| "learning_rate": 4.276211016250763e-06, | |
| "loss": 0.0078, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.4505890839144024, | |
| "grad_norm": 0.044879477471113205, | |
| "learning_rate": 4.253290164444337e-06, | |
| "loss": 0.0068, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.452192033341348, | |
| "grad_norm": 0.047636158764362335, | |
| "learning_rate": 4.230414302423491e-06, | |
| "loss": 0.0067, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.4537949827682937, | |
| "grad_norm": 0.05139080807566643, | |
| "learning_rate": 4.207583609278543e-06, | |
| "loss": 0.0065, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.4553979321952393, | |
| "grad_norm": 0.03766312450170517, | |
| "learning_rate": 4.184798263746201e-06, | |
| "loss": 0.0075, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.4570008816221849, | |
| "grad_norm": 0.0382857508957386, | |
| "learning_rate": 4.162058444208159e-06, | |
| "loss": 0.0067, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.4586038310491305, | |
| "grad_norm": 0.057315412908792496, | |
| "learning_rate": 4.139364328689687e-06, | |
| "loss": 0.007, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.460206780476076, | |
| "grad_norm": 0.056077856570482254, | |
| "learning_rate": 4.116716094858255e-06, | |
| "loss": 0.0072, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.4618097299030215, | |
| "grad_norm": 0.03596881777048111, | |
| "learning_rate": 4.0941139200221414e-06, | |
| "loss": 0.007, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.463412679329967, | |
| "grad_norm": 0.04506804421544075, | |
| "learning_rate": 4.071557981129019e-06, | |
| "loss": 0.0069, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.4650156287569127, | |
| "grad_norm": 0.04154985398054123, | |
| "learning_rate": 4.049048454764608e-06, | |
| "loss": 0.0072, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.4666185781838583, | |
| "grad_norm": 0.052859582006931305, | |
| "learning_rate": 4.02658551715127e-06, | |
| "loss": 0.0069, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.468221527610804, | |
| "grad_norm": 0.033961955457925797, | |
| "learning_rate": 4.004169344146623e-06, | |
| "loss": 0.0077, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.4698244770377493, | |
| "grad_norm": 0.0423002727329731, | |
| "learning_rate": 3.98180011124219e-06, | |
| "loss": 0.0074, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.471427426464695, | |
| "grad_norm": 0.053169313818216324, | |
| "learning_rate": 3.9594779935619895e-06, | |
| "loss": 0.0083, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.4730303758916405, | |
| "grad_norm": 0.036727722734212875, | |
| "learning_rate": 3.937203165861215e-06, | |
| "loss": 0.007, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.4746333253185862, | |
| "grad_norm": 0.039678290486335754, | |
| "learning_rate": 3.914975802524806e-06, | |
| "loss": 0.0066, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.4762362747455318, | |
| "grad_norm": 0.040238771587610245, | |
| "learning_rate": 3.892796077566131e-06, | |
| "loss": 0.0069, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.4778392241724774, | |
| "grad_norm": 0.03908219188451767, | |
| "learning_rate": 3.870664164625606e-06, | |
| "loss": 0.0075, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.479442173599423, | |
| "grad_norm": 0.056258101016283035, | |
| "learning_rate": 3.848580236969327e-06, | |
| "loss": 0.0069, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.4810451230263686, | |
| "grad_norm": 0.05250425264239311, | |
| "learning_rate": 3.826544467487737e-06, | |
| "loss": 0.0066, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.4826480724533142, | |
| "grad_norm": 0.043162960559129715, | |
| "learning_rate": 3.8045570286942455e-06, | |
| "loss": 0.007, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.4842510218802598, | |
| "grad_norm": 0.05024382844567299, | |
| "learning_rate": 3.782618092723902e-06, | |
| "loss": 0.0068, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.4858539713072052, | |
| "grad_norm": 0.0474337600171566, | |
| "learning_rate": 3.760727831332034e-06, | |
| "loss": 0.0075, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.4874569207341508, | |
| "grad_norm": 0.051519252359867096, | |
| "learning_rate": 3.738886415892897e-06, | |
| "loss": 0.0072, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.4890598701610964, | |
| "grad_norm": 0.04442744702100754, | |
| "learning_rate": 3.7170940173983627e-06, | |
| "loss": 0.0067, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.490662819588042, | |
| "grad_norm": 0.04417699947953224, | |
| "learning_rate": 3.69535080645654e-06, | |
| "loss": 0.0078, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.4922657690149876, | |
| "grad_norm": 0.030911916866898537, | |
| "learning_rate": 3.673656953290462e-06, | |
| "loss": 0.0067, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.493868718441933, | |
| "grad_norm": 0.041102729737758636, | |
| "learning_rate": 3.652012627736756e-06, | |
| "loss": 0.0064, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.4954716678688786, | |
| "grad_norm": 0.051470912992954254, | |
| "learning_rate": 3.6304179992443065e-06, | |
| "loss": 0.0071, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.4970746172958243, | |
| "grad_norm": 0.04753391444683075, | |
| "learning_rate": 3.608873236872934e-06, | |
| "loss": 0.0068, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.4986775667227699, | |
| "grad_norm": 0.04698888957500458, | |
| "learning_rate": 3.587378509292053e-06, | |
| "loss": 0.0061, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.5002805161497155, | |
| "grad_norm": 0.04583842307329178, | |
| "learning_rate": 3.5659339847793805e-06, | |
| "loss": 0.0064, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.501883465576661, | |
| "grad_norm": 0.03985747694969177, | |
| "learning_rate": 3.5445398312196046e-06, | |
| "loss": 0.0065, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.5034864150036067, | |
| "grad_norm": 0.039111629128456116, | |
| "learning_rate": 3.5231962161030554e-06, | |
| "loss": 0.0074, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.5050893644305523, | |
| "grad_norm": 0.03803228959441185, | |
| "learning_rate": 3.5019033065244225e-06, | |
| "loss": 0.007, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.506692313857498, | |
| "grad_norm": 0.05284256488084793, | |
| "learning_rate": 3.48066126918143e-06, | |
| "loss": 0.007, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.5082952632844435, | |
| "grad_norm": 0.04431462287902832, | |
| "learning_rate": 3.459470270373525e-06, | |
| "loss": 0.0072, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.509898212711389, | |
| "grad_norm": 0.041640907526016235, | |
| "learning_rate": 3.4383304760005952e-06, | |
| "loss": 0.0067, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.5115011621383345, | |
| "grad_norm": 0.0580732598900795, | |
| "learning_rate": 3.4172420515616543e-06, | |
| "loss": 0.0071, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.5131041115652801, | |
| "grad_norm": 0.042141567915678024, | |
| "learning_rate": 3.396205162153556e-06, | |
| "loss": 0.0066, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.5147070609922257, | |
| "grad_norm": 0.037412770092487335, | |
| "learning_rate": 3.375219972469692e-06, | |
| "loss": 0.0068, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.5163100104191711, | |
| "grad_norm": 0.04163951054215431, | |
| "learning_rate": 3.3542866467987003e-06, | |
| "loss": 0.0066, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.5179129598461167, | |
| "grad_norm": 0.04390928894281387, | |
| "learning_rate": 3.333405349023211e-06, | |
| "loss": 0.0057, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.5195159092730623, | |
| "grad_norm": 0.052741654217243195, | |
| "learning_rate": 3.3125762426185114e-06, | |
| "loss": 0.007, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.521118858700008, | |
| "grad_norm": 0.041966211050748825, | |
| "learning_rate": 3.2917994906513095e-06, | |
| "loss": 0.0058, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.5227218081269536, | |
| "grad_norm": 0.04255704954266548, | |
| "learning_rate": 3.271075255778442e-06, | |
| "loss": 0.0073, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.5243247575538992, | |
| "grad_norm": 0.04274579510092735, | |
| "learning_rate": 3.250403700245586e-06, | |
| "loss": 0.0065, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.5259277069808448, | |
| "grad_norm": 0.06175905093550682, | |
| "learning_rate": 3.229784985886022e-06, | |
| "loss": 0.0063, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.5275306564077904, | |
| "grad_norm": 0.042121000587940216, | |
| "learning_rate": 3.2092192741193295e-06, | |
| "loss": 0.0063, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.529133605834736, | |
| "grad_norm": 0.042963907122612, | |
| "learning_rate": 3.188706725950157e-06, | |
| "loss": 0.0071, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.5307365552616816, | |
| "grad_norm": 0.04113069176673889, | |
| "learning_rate": 3.1682475019669413e-06, | |
| "loss": 0.0068, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.5323395046886272, | |
| "grad_norm": 0.03678800165653229, | |
| "learning_rate": 3.1478417623406464e-06, | |
| "loss": 0.0066, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.5339424541155726, | |
| "grad_norm": 0.04855935275554657, | |
| "learning_rate": 3.127489666823539e-06, | |
| "loss": 0.0067, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.5355454035425182, | |
| "grad_norm": 0.04056788608431816, | |
| "learning_rate": 3.107191374747893e-06, | |
| "loss": 0.0076, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.5371483529694638, | |
| "grad_norm": 0.03992756828665733, | |
| "learning_rate": 3.0869470450247875e-06, | |
| "loss": 0.0064, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.5387513023964094, | |
| "grad_norm": 0.047048088163137436, | |
| "learning_rate": 3.0667568361428256e-06, | |
| "loss": 0.0067, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.5403542518233548, | |
| "grad_norm": 0.04409756511449814, | |
| "learning_rate": 3.0466209061669184e-06, | |
| "loss": 0.0068, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.5419572012503004, | |
| "grad_norm": 0.07111341506242752, | |
| "learning_rate": 3.0265394127370406e-06, | |
| "loss": 0.0067, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.543560150677246, | |
| "grad_norm": 0.04027624428272247, | |
| "learning_rate": 3.006512513066985e-06, | |
| "loss": 0.0072, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.5451631001041917, | |
| "grad_norm": 0.03571724519133568, | |
| "learning_rate": 2.986540363943149e-06, | |
| "loss": 0.0064, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.5467660495311373, | |
| "grad_norm": 0.04187304526567459, | |
| "learning_rate": 2.966623121723303e-06, | |
| "loss": 0.0075, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.5483689989580829, | |
| "grad_norm": 0.042146991938352585, | |
| "learning_rate": 2.9467609423353504e-06, | |
| "loss": 0.0064, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.5499719483850285, | |
| "grad_norm": 0.04556239768862724, | |
| "learning_rate": 2.9269539812761293e-06, | |
| "loss": 0.0063, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.551574897811974, | |
| "grad_norm": 0.03327028825879097, | |
| "learning_rate": 2.9072023936101847e-06, | |
| "loss": 0.0069, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.5531778472389197, | |
| "grad_norm": 0.0430419035255909, | |
| "learning_rate": 2.887506333968546e-06, | |
| "loss": 0.0067, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.5547807966658653, | |
| "grad_norm": 0.05209411308169365, | |
| "learning_rate": 2.8678659565475363e-06, | |
| "loss": 0.0079, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.556383746092811, | |
| "grad_norm": 0.0382845476269722, | |
| "learning_rate": 2.8482814151075477e-06, | |
| "loss": 0.0068, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.5579866955197563, | |
| "grad_norm": 0.03739907592535019, | |
| "learning_rate": 2.8287528629718507e-06, | |
| "loss": 0.0065, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.559589644946702, | |
| "grad_norm": 0.04491214081645012, | |
| "learning_rate": 2.809280453025376e-06, | |
| "loss": 0.0069, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.5611925943736475, | |
| "grad_norm": 0.047207217663526535, | |
| "learning_rate": 2.7898643377135383e-06, | |
| "loss": 0.0068, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.562795543800593, | |
| "grad_norm": 0.03475257754325867, | |
| "learning_rate": 2.7705046690410344e-06, | |
| "loss": 0.0067, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.5643984932275385, | |
| "grad_norm": 0.05968464910984039, | |
| "learning_rate": 2.751201598570642e-06, | |
| "loss": 0.0072, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.5660014426544842, | |
| "grad_norm": 0.03330584615468979, | |
| "learning_rate": 2.7319552774220517e-06, | |
| "loss": 0.0066, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.5676043920814298, | |
| "grad_norm": 0.03819479048252106, | |
| "learning_rate": 2.712765856270678e-06, | |
| "loss": 0.0069, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.5692073415083754, | |
| "grad_norm": 0.05412070453166962, | |
| "learning_rate": 2.6936334853464676e-06, | |
| "loss": 0.0068, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.570810290935321, | |
| "grad_norm": 0.04212406277656555, | |
| "learning_rate": 2.6745583144327423e-06, | |
| "loss": 0.0068, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.5724132403622666, | |
| "grad_norm": 0.03889830783009529, | |
| "learning_rate": 2.6555404928650055e-06, | |
| "loss": 0.0073, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.5740161897892122, | |
| "grad_norm": 0.03061777725815773, | |
| "learning_rate": 2.6365801695298033e-06, | |
| "loss": 0.0073, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.5756191392161578, | |
| "grad_norm": 0.046813514083623886, | |
| "learning_rate": 2.617677492863524e-06, | |
| "loss": 0.0073, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.5772220886431034, | |
| "grad_norm": 0.04139237850904465, | |
| "learning_rate": 2.5988326108512494e-06, | |
| "loss": 0.0066, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.578825038070049, | |
| "grad_norm": 0.03712620958685875, | |
| "learning_rate": 2.5800456710256207e-06, | |
| "loss": 0.0074, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.5804279874969944, | |
| "grad_norm": 0.04341958463191986, | |
| "learning_rate": 2.561316820465638e-06, | |
| "loss": 0.007, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.58203093692394, | |
| "grad_norm": 0.06013938784599304, | |
| "learning_rate": 2.5426462057955505e-06, | |
| "loss": 0.0064, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.5836338863508856, | |
| "grad_norm": 0.026838280260562897, | |
| "learning_rate": 2.524033973183675e-06, | |
| "loss": 0.0061, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.5852368357778313, | |
| "grad_norm": 0.04112298786640167, | |
| "learning_rate": 2.505480268341278e-06, | |
| "loss": 0.0063, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.5868397852047766, | |
| "grad_norm": 0.03727724403142929, | |
| "learning_rate": 2.4869852365214287e-06, | |
| "loss": 0.006, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.5884427346317223, | |
| "grad_norm": 0.037251487374305725, | |
| "learning_rate": 2.468549022517841e-06, | |
| "loss": 0.0077, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.5900456840586679, | |
| "grad_norm": 0.04826606065034866, | |
| "learning_rate": 2.4501717706637707e-06, | |
| "loss": 0.0068, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.5916486334856135, | |
| "grad_norm": 0.04270663857460022, | |
| "learning_rate": 2.43185362483087e-06, | |
| "loss": 0.0066, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.593251582912559, | |
| "grad_norm": 0.03491262346506119, | |
| "learning_rate": 2.4135947284280523e-06, | |
| "loss": 0.0068, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.5948545323395047, | |
| "grad_norm": 0.03372110426425934, | |
| "learning_rate": 2.395395224400391e-06, | |
| "loss": 0.0066, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.5964574817664503, | |
| "grad_norm": 0.03577246144413948, | |
| "learning_rate": 2.3772552552279837e-06, | |
| "loss": 0.0066, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.598060431193396, | |
| "grad_norm": 0.04551481455564499, | |
| "learning_rate": 2.3591749629248463e-06, | |
| "loss": 0.0066, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.5996633806203415, | |
| "grad_norm": 0.044537000358104706, | |
| "learning_rate": 2.341154489037788e-06, | |
| "loss": 0.0067, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.6012663300472871, | |
| "grad_norm": 0.03701915591955185, | |
| "learning_rate": 2.3231939746453214e-06, | |
| "loss": 0.0068, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.6028692794742327, | |
| "grad_norm": 0.03451233729720116, | |
| "learning_rate": 2.3052935603565464e-06, | |
| "loss": 0.0061, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.6044722289011781, | |
| "grad_norm": 0.0396900400519371, | |
| "learning_rate": 2.287453386310047e-06, | |
| "loss": 0.0068, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.6060751783281237, | |
| "grad_norm": 0.03384820371866226, | |
| "learning_rate": 2.269673592172804e-06, | |
| "loss": 0.0064, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.6076781277550694, | |
| "grad_norm": 0.05970784276723862, | |
| "learning_rate": 2.251954317139099e-06, | |
| "loss": 0.0067, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.609281077182015, | |
| "grad_norm": 0.03140771761536598, | |
| "learning_rate": 2.234295699929413e-06, | |
| "loss": 0.0069, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.6108840266089604, | |
| "grad_norm": 0.04010142758488655, | |
| "learning_rate": 2.2166978787893576e-06, | |
| "loss": 0.0071, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.612486976035906, | |
| "grad_norm": 0.037328608334064484, | |
| "learning_rate": 2.1991609914885857e-06, | |
| "loss": 0.0065, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.6140899254628516, | |
| "grad_norm": 0.04343570023775101, | |
| "learning_rate": 2.1816851753197023e-06, | |
| "loss": 0.0066, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.6156928748897972, | |
| "grad_norm": 0.05846039205789566, | |
| "learning_rate": 2.164270567097212e-06, | |
| "loss": 0.0059, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.6172958243167428, | |
| "grad_norm": 0.0358646959066391, | |
| "learning_rate": 2.1469173031564194e-06, | |
| "loss": 0.0065, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.6188987737436884, | |
| "grad_norm": 0.04640796035528183, | |
| "learning_rate": 2.1296255193523973e-06, | |
| "loss": 0.007, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.620501723170634, | |
| "grad_norm": 0.026891401037573814, | |
| "learning_rate": 2.11239535105889e-06, | |
| "loss": 0.0061, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.6221046725975796, | |
| "grad_norm": 0.045271456241607666, | |
| "learning_rate": 2.0952269331672624e-06, | |
| "loss": 0.007, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.6237076220245252, | |
| "grad_norm": 0.03902266547083855, | |
| "learning_rate": 2.078120400085468e-06, | |
| "loss": 0.0066, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.6253105714514708, | |
| "grad_norm": 0.03035142458975315, | |
| "learning_rate": 2.0610758857369573e-06, | |
| "loss": 0.0072, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.6269135208784162, | |
| "grad_norm": 0.03005078062415123, | |
| "learning_rate": 2.0440935235596613e-06, | |
| "loss": 0.0072, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.6285164703053618, | |
| "grad_norm": 0.04039452224969864, | |
| "learning_rate": 2.0271734465049264e-06, | |
| "loss": 0.0066, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.6301194197323075, | |
| "grad_norm": 0.04712294414639473, | |
| "learning_rate": 2.0103157870364866e-06, | |
| "loss": 0.0067, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.631722369159253, | |
| "grad_norm": 0.034529995173215866, | |
| "learning_rate": 1.9935206771294258e-06, | |
| "loss": 0.0064, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.6333253185861984, | |
| "grad_norm": 0.05219118669629097, | |
| "learning_rate": 1.9767882482691257e-06, | |
| "loss": 0.007, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.634928268013144, | |
| "grad_norm": 0.04649091139435768, | |
| "learning_rate": 1.960118631450273e-06, | |
| "loss": 0.006, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.6365312174400897, | |
| "grad_norm": 0.03332320600748062, | |
| "learning_rate": 1.9435119571757942e-06, | |
| "loss": 0.0072, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.6381341668670353, | |
| "grad_norm": 0.04636770859360695, | |
| "learning_rate": 1.926968355455856e-06, | |
| "loss": 0.0068, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.639737116293981, | |
| "grad_norm": 0.033311877399683, | |
| "learning_rate": 1.910487955806848e-06, | |
| "loss": 0.0068, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.6413400657209265, | |
| "grad_norm": 0.044362135231494904, | |
| "learning_rate": 1.894070887250361e-06, | |
| "loss": 0.0059, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 1.642943015147872, | |
| "grad_norm": 0.049247484654188156, | |
| "learning_rate": 1.8777172783121823e-06, | |
| "loss": 0.0062, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.6445459645748177, | |
| "grad_norm": 0.03714997321367264, | |
| "learning_rate": 1.86142725702128e-06, | |
| "loss": 0.0069, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 1.6461489140017633, | |
| "grad_norm": 0.04958584904670715, | |
| "learning_rate": 1.8452009509088164e-06, | |
| "loss": 0.0066, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 1.647751863428709, | |
| "grad_norm": 0.04805106669664383, | |
| "learning_rate": 1.8290384870071398e-06, | |
| "loss": 0.0064, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 1.6493548128556546, | |
| "grad_norm": 0.04615769535303116, | |
| "learning_rate": 1.8129399918487833e-06, | |
| "loss": 0.0067, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 1.6509577622826, | |
| "grad_norm": 0.04581748694181442, | |
| "learning_rate": 1.796905591465492e-06, | |
| "loss": 0.0067, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.6525607117095455, | |
| "grad_norm": 0.039267655462026596, | |
| "learning_rate": 1.7809354113872224e-06, | |
| "loss": 0.0058, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 1.6541636611364912, | |
| "grad_norm": 0.03654203936457634, | |
| "learning_rate": 1.7650295766411607e-06, | |
| "loss": 0.0069, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 1.6557666105634368, | |
| "grad_norm": 0.05323672294616699, | |
| "learning_rate": 1.7491882117507507e-06, | |
| "loss": 0.0069, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 1.6573695599903822, | |
| "grad_norm": 0.047599758952856064, | |
| "learning_rate": 1.7334114407347157e-06, | |
| "loss": 0.0068, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 1.6589725094173278, | |
| "grad_norm": 0.048799458891153336, | |
| "learning_rate": 1.7176993871060876e-06, | |
| "loss": 0.0065, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.6605754588442734, | |
| "grad_norm": 0.0284061748534441, | |
| "learning_rate": 1.7020521738712359e-06, | |
| "loss": 0.0066, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 1.662178408271219, | |
| "grad_norm": 0.039750777184963226, | |
| "learning_rate": 1.686469923528905e-06, | |
| "loss": 0.007, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 1.6637813576981646, | |
| "grad_norm": 0.03770313411951065, | |
| "learning_rate": 1.670952758069272e-06, | |
| "loss": 0.0065, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 1.6653843071251102, | |
| "grad_norm": 0.04947682470083237, | |
| "learning_rate": 1.6555007989729643e-06, | |
| "loss": 0.0067, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 1.6669872565520558, | |
| "grad_norm": 0.048967983573675156, | |
| "learning_rate": 1.6401141672101283e-06, | |
| "loss": 0.0061, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.6685902059790014, | |
| "grad_norm": 0.042331352829933167, | |
| "learning_rate": 1.6247929832394792e-06, | |
| "loss": 0.0068, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 1.670193155405947, | |
| "grad_norm": 0.0504949614405632, | |
| "learning_rate": 1.6095373670073467e-06, | |
| "loss": 0.0063, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 1.6717961048328926, | |
| "grad_norm": 0.03911704197525978, | |
| "learning_rate": 1.594347437946755e-06, | |
| "loss": 0.0065, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 1.6733990542598383, | |
| "grad_norm": 0.03691579028964043, | |
| "learning_rate": 1.5792233149764656e-06, | |
| "loss": 0.0069, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 1.6750020036867836, | |
| "grad_norm": 0.03881106898188591, | |
| "learning_rate": 1.5641651165000672e-06, | |
| "loss": 0.0063, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.6766049531137293, | |
| "grad_norm": 0.039052508771419525, | |
| "learning_rate": 1.5491729604050388e-06, | |
| "loss": 0.0069, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 1.6782079025406749, | |
| "grad_norm": 0.03730938211083412, | |
| "learning_rate": 1.5342469640618162e-06, | |
| "loss": 0.0066, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 1.6798108519676203, | |
| "grad_norm": 0.05100889876484871, | |
| "learning_rate": 1.5193872443229052e-06, | |
| "loss": 0.0068, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 1.6814138013945659, | |
| "grad_norm": 0.03976810351014137, | |
| "learning_rate": 1.5045939175219271e-06, | |
| "loss": 0.0064, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 1.6830167508215115, | |
| "grad_norm": 0.04142378270626068, | |
| "learning_rate": 1.4898670994727326e-06, | |
| "loss": 0.0066, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.684619700248457, | |
| "grad_norm": 0.03331238403916359, | |
| "learning_rate": 1.4752069054684925e-06, | |
| "loss": 0.0062, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 1.6862226496754027, | |
| "grad_norm": 0.042981959879398346, | |
| "learning_rate": 1.460613450280789e-06, | |
| "loss": 0.0066, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 1.6878255991023483, | |
| "grad_norm": 0.04343404620885849, | |
| "learning_rate": 1.4460868481587231e-06, | |
| "loss": 0.006, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 1.689428548529294, | |
| "grad_norm": 0.038565054535865784, | |
| "learning_rate": 1.4316272128280107e-06, | |
| "loss": 0.0065, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 1.6910314979562395, | |
| "grad_norm": 0.049151334911584854, | |
| "learning_rate": 1.4172346574901064e-06, | |
| "loss": 0.0072, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.6926344473831851, | |
| "grad_norm": 0.03570883348584175, | |
| "learning_rate": 1.4029092948213075e-06, | |
| "loss": 0.0062, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 1.6942373968101307, | |
| "grad_norm": 0.04620914161205292, | |
| "learning_rate": 1.3886512369718675e-06, | |
| "loss": 0.0071, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 1.6958403462370764, | |
| "grad_norm": 0.03542487695813179, | |
| "learning_rate": 1.3744605955651336e-06, | |
| "loss": 0.0065, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 1.6974432956640217, | |
| "grad_norm": 0.03881022706627846, | |
| "learning_rate": 1.3603374816966607e-06, | |
| "loss": 0.0062, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 1.6990462450909674, | |
| "grad_norm": 0.06067880243062973, | |
| "learning_rate": 1.3462820059333403e-06, | |
| "loss": 0.0068, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.700649194517913, | |
| "grad_norm": 0.04585850238800049, | |
| "learning_rate": 1.332294278312546e-06, | |
| "loss": 0.0067, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 1.7022521439448586, | |
| "grad_norm": 0.040219008922576904, | |
| "learning_rate": 1.318374408341262e-06, | |
| "loss": 0.0064, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 1.703855093371804, | |
| "grad_norm": 0.04510616883635521, | |
| "learning_rate": 1.3045225049952314e-06, | |
| "loss": 0.007, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 1.7054580427987496, | |
| "grad_norm": 0.029775921255350113, | |
| "learning_rate": 1.2907386767180985e-06, | |
| "loss": 0.0059, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 1.7070609922256952, | |
| "grad_norm": 0.04115507751703262, | |
| "learning_rate": 1.2770230314205567e-06, | |
| "loss": 0.0066, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.7086639416526408, | |
| "grad_norm": 0.03847046568989754, | |
| "learning_rate": 1.2633756764795247e-06, | |
| "loss": 0.0063, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 1.7102668910795864, | |
| "grad_norm": 0.04787128418684006, | |
| "learning_rate": 1.249796718737275e-06, | |
| "loss": 0.0064, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 1.711869840506532, | |
| "grad_norm": 0.04256165400147438, | |
| "learning_rate": 1.2362862645006213e-06, | |
| "loss": 0.0065, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 1.7134727899334776, | |
| "grad_norm": 0.047069843858480453, | |
| "learning_rate": 1.2228444195400757e-06, | |
| "loss": 0.007, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 1.7150757393604232, | |
| "grad_norm": 0.025805678218603134, | |
| "learning_rate": 1.2094712890890193e-06, | |
| "loss": 0.0073, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.7166786887873688, | |
| "grad_norm": 0.03637049347162247, | |
| "learning_rate": 1.1961669778428874e-06, | |
| "loss": 0.0063, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 1.7182816382143145, | |
| "grad_norm": 0.0403585359454155, | |
| "learning_rate": 1.1829315899583393e-06, | |
| "loss": 0.0068, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 1.71988458764126, | |
| "grad_norm": 0.04056670516729355, | |
| "learning_rate": 1.1697652290524497e-06, | |
| "loss": 0.0061, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 1.7214875370682055, | |
| "grad_norm": 0.02951175719499588, | |
| "learning_rate": 1.156667998201899e-06, | |
| "loss": 0.0065, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 1.723090486495151, | |
| "grad_norm": 0.045906949788331985, | |
| "learning_rate": 1.143639999942152e-06, | |
| "loss": 0.0065, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.7246934359220967, | |
| "grad_norm": 0.04483688622713089, | |
| "learning_rate": 1.1306813362666846e-06, | |
| "loss": 0.0063, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 1.7262963853490423, | |
| "grad_norm": 0.04201997444033623, | |
| "learning_rate": 1.1177921086261467e-06, | |
| "loss": 0.0064, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 1.7278993347759877, | |
| "grad_norm": 0.03533167019486427, | |
| "learning_rate": 1.1049724179276034e-06, | |
| "loss": 0.0068, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 1.7295022842029333, | |
| "grad_norm": 0.04161592572927475, | |
| "learning_rate": 1.0922223645337181e-06, | |
| "loss": 0.0066, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 1.731105233629879, | |
| "grad_norm": 0.053074780851602554, | |
| "learning_rate": 1.0795420482619867e-06, | |
| "loss": 0.0071, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.7327081830568245, | |
| "grad_norm": 0.04212959110736847, | |
| "learning_rate": 1.0669315683839455e-06, | |
| "loss": 0.0058, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 1.7343111324837701, | |
| "grad_norm": 0.05835643783211708, | |
| "learning_rate": 1.0543910236243926e-06, | |
| "loss": 0.0066, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 1.7359140819107157, | |
| "grad_norm": 0.03991185128688812, | |
| "learning_rate": 1.0419205121606246e-06, | |
| "loss": 0.0068, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 1.7375170313376613, | |
| "grad_norm": 0.03626156225800514, | |
| "learning_rate": 1.0295201316216596e-06, | |
| "loss": 0.0066, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 1.739119980764607, | |
| "grad_norm": 0.04507741332054138, | |
| "learning_rate": 1.0171899790874718e-06, | |
| "loss": 0.0065, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.7407229301915526, | |
| "grad_norm": 0.05982038006186485, | |
| "learning_rate": 1.0049301510882404e-06, | |
| "loss": 0.0067, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 1.7423258796184982, | |
| "grad_norm": 0.043249331414699554, | |
| "learning_rate": 9.927407436035886e-07, | |
| "loss": 0.0065, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 1.7439288290454438, | |
| "grad_norm": 0.03685468062758446, | |
| "learning_rate": 9.80621852061826e-07, | |
| "loss": 0.006, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 1.7455317784723892, | |
| "grad_norm": 0.03677702322602272, | |
| "learning_rate": 9.685735713392141e-07, | |
| "loss": 0.0059, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 1.7471347278993348, | |
| "grad_norm": 0.034399621188640594, | |
| "learning_rate": 9.565959957592141e-07, | |
| "loss": 0.0076, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.7487376773262804, | |
| "grad_norm": 0.0476701557636261, | |
| "learning_rate": 9.446892190917556e-07, | |
| "loss": 0.0068, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 1.7503406267532258, | |
| "grad_norm": 0.04046626016497612, | |
| "learning_rate": 9.328533345524893e-07, | |
| "loss": 0.0064, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 1.7519435761801714, | |
| "grad_norm": 0.04283340275287628, | |
| "learning_rate": 9.210884348020744e-07, | |
| "loss": 0.0065, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 1.753546525607117, | |
| "grad_norm": 0.04990899935364723, | |
| "learning_rate": 9.093946119454455e-07, | |
| "loss": 0.0063, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 1.7551494750340626, | |
| "grad_norm": 0.061496201902627945, | |
| "learning_rate": 8.97771957531084e-07, | |
| "loss": 0.0063, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.7567524244610082, | |
| "grad_norm": 0.08075609058141708, | |
| "learning_rate": 8.86220562550314e-07, | |
| "loss": 0.0066, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 1.7583553738879538, | |
| "grad_norm": 0.04579548165202141, | |
| "learning_rate": 8.747405174365853e-07, | |
| "loss": 0.0066, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 1.7599583233148994, | |
| "grad_norm": 0.058112818747758865, | |
| "learning_rate": 8.633319120647587e-07, | |
| "loss": 0.0067, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 1.761561272741845, | |
| "grad_norm": 0.042892564088106155, | |
| "learning_rate": 8.51994835750416e-07, | |
| "loss": 0.0068, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 1.7631642221687907, | |
| "grad_norm": 0.039829425513744354, | |
| "learning_rate": 8.407293772491432e-07, | |
| "loss": 0.0064, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.7647671715957363, | |
| "grad_norm": 0.04083314165472984, | |
| "learning_rate": 8.295356247558595e-07, | |
| "loss": 0.0063, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 1.7663701210226819, | |
| "grad_norm": 0.05451719090342522, | |
| "learning_rate": 8.184136659040986e-07, | |
| "loss": 0.0067, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 1.7679730704496273, | |
| "grad_norm": 0.03532470762729645, | |
| "learning_rate": 8.07363587765343e-07, | |
| "loss": 0.0061, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 1.7695760198765729, | |
| "grad_norm": 0.04727141186594963, | |
| "learning_rate": 7.963854768483392e-07, | |
| "loss": 0.0061, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 1.7711789693035185, | |
| "grad_norm": 0.04995537921786308, | |
| "learning_rate": 7.854794190984116e-07, | |
| "loss": 0.0068, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.772781918730464, | |
| "grad_norm": 0.05020635575056076, | |
| "learning_rate": 7.746454998968012e-07, | |
| "loss": 0.006, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 1.7743848681574095, | |
| "grad_norm": 0.0372854508459568, | |
| "learning_rate": 7.638838040599838e-07, | |
| "loss": 0.007, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 1.775987817584355, | |
| "grad_norm": 0.04687948524951935, | |
| "learning_rate": 7.531944158390203e-07, | |
| "loss": 0.0059, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 1.7775907670113007, | |
| "grad_norm": 0.09241676330566406, | |
| "learning_rate": 7.425774189188906e-07, | |
| "loss": 0.0063, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 1.7791937164382463, | |
| "grad_norm": 0.05238804966211319, | |
| "learning_rate": 7.320328964178325e-07, | |
| "loss": 0.0072, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.780796665865192, | |
| "grad_norm": 0.033452149480581284, | |
| "learning_rate": 7.215609308867022e-07, | |
| "loss": 0.0055, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 1.7823996152921375, | |
| "grad_norm": 0.05519595742225647, | |
| "learning_rate": 7.111616043083202e-07, | |
| "loss": 0.0067, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 1.7840025647190831, | |
| "grad_norm": 0.04810706898570061, | |
| "learning_rate": 7.008349980968321e-07, | |
| "loss": 0.0059, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 1.7856055141460287, | |
| "grad_norm": 0.0363197885453701, | |
| "learning_rate": 6.905811930970718e-07, | |
| "loss": 0.0062, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 1.7872084635729744, | |
| "grad_norm": 0.03364889323711395, | |
| "learning_rate": 6.804002695839274e-07, | |
| "loss": 0.0059, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.78881141299992, | |
| "grad_norm": 0.03774208575487137, | |
| "learning_rate": 6.702923072617129e-07, | |
| "loss": 0.0057, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 1.7904143624268656, | |
| "grad_norm": 0.05517459660768509, | |
| "learning_rate": 6.602573852635441e-07, | |
| "loss": 0.0068, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 1.792017311853811, | |
| "grad_norm": 0.04096028581261635, | |
| "learning_rate": 6.502955821507196e-07, | |
| "loss": 0.007, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 1.7936202612807566, | |
| "grad_norm": 0.03794392570853233, | |
| "learning_rate": 6.404069759121079e-07, | |
| "loss": 0.0065, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 1.7952232107077022, | |
| "grad_norm": 0.03523090481758118, | |
| "learning_rate": 6.305916439635295e-07, | |
| "loss": 0.0063, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.7968261601346478, | |
| "grad_norm": 0.04677393287420273, | |
| "learning_rate": 6.208496631471605e-07, | |
| "loss": 0.0062, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 1.7984291095615932, | |
| "grad_norm": 0.0406043566763401, | |
| "learning_rate": 6.111811097309262e-07, | |
| "loss": 0.0066, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 1.8000320589885388, | |
| "grad_norm": 0.03882203623652458, | |
| "learning_rate": 6.015860594079004e-07, | |
| "loss": 0.0058, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 1.8016350084154844, | |
| "grad_norm": 0.03886554762721062, | |
| "learning_rate": 5.920645872957187e-07, | |
| "loss": 0.0065, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 1.80323795784243, | |
| "grad_norm": 0.039025116711854935, | |
| "learning_rate": 5.826167679359917e-07, | |
| "loss": 0.0067, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.8048409072693756, | |
| "grad_norm": 0.05819341167807579, | |
| "learning_rate": 5.732426752937103e-07, | |
| "loss": 0.0077, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 1.8064438566963212, | |
| "grad_norm": 0.05688609555363655, | |
| "learning_rate": 5.639423827566837e-07, | |
| "loss": 0.0068, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 1.8080468061232668, | |
| "grad_norm": 0.04290845990180969, | |
| "learning_rate": 5.547159631349452e-07, | |
| "loss": 0.0069, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 1.8096497555502125, | |
| "grad_norm": 0.049413323402404785, | |
| "learning_rate": 5.455634886602046e-07, | |
| "loss": 0.0062, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 1.811252704977158, | |
| "grad_norm": 0.045171111822128296, | |
| "learning_rate": 5.364850309852598e-07, | |
| "loss": 0.0065, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.8128556544041037, | |
| "grad_norm": 0.038502875715494156, | |
| "learning_rate": 5.274806611834527e-07, | |
| "loss": 0.0063, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 1.8144586038310493, | |
| "grad_norm": 0.037532929331064224, | |
| "learning_rate": 5.185504497481064e-07, | |
| "loss": 0.0059, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 1.8160615532579947, | |
| "grad_norm": 0.03429022803902626, | |
| "learning_rate": 5.096944665919712e-07, | |
| "loss": 0.0056, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 1.8176645026849403, | |
| "grad_norm": 0.03987206146121025, | |
| "learning_rate": 5.009127810466808e-07, | |
| "loss": 0.0065, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 1.819267452111886, | |
| "grad_norm": 0.032589782029390335, | |
| "learning_rate": 4.922054618622096e-07, | |
| "loss": 0.0062, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.8208704015388313, | |
| "grad_norm": 0.04659878835082054, | |
| "learning_rate": 4.835725772063316e-07, | |
| "loss": 0.0061, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 1.822473350965777, | |
| "grad_norm": 0.03782256692647934, | |
| "learning_rate": 4.750141946640918e-07, | |
| "loss": 0.0063, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 1.8240763003927225, | |
| "grad_norm": 0.03973528742790222, | |
| "learning_rate": 4.665303812372668e-07, | |
| "loss": 0.0062, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 1.8256792498196681, | |
| "grad_norm": 0.06990045309066772, | |
| "learning_rate": 4.581212033438576e-07, | |
| "loss": 0.0064, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 1.8272821992466137, | |
| "grad_norm": 0.055125899612903595, | |
| "learning_rate": 4.4978672681755153e-07, | |
| "loss": 0.0069, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.8288851486735593, | |
| "grad_norm": 0.038944311439991, | |
| "learning_rate": 4.41527016907215e-07, | |
| "loss": 0.0066, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 1.830488098100505, | |
| "grad_norm": 0.04602019861340523, | |
| "learning_rate": 4.333421382763847e-07, | |
| "loss": 0.006, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 1.8320910475274506, | |
| "grad_norm": 0.055467262864112854, | |
| "learning_rate": 4.252321550027583e-07, | |
| "loss": 0.0068, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 1.8336939969543962, | |
| "grad_norm": 0.029419003054499626, | |
| "learning_rate": 4.171971305776945e-07, | |
| "loss": 0.0065, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 1.8352969463813418, | |
| "grad_norm": 0.039543673396110535, | |
| "learning_rate": 4.0923712790571167e-07, | |
| "loss": 0.0059, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.8368998958082874, | |
| "grad_norm": 0.04183452948927879, | |
| "learning_rate": 4.013522093040023e-07, | |
| "loss": 0.0062, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 1.8385028452352328, | |
| "grad_norm": 0.04477937892079353, | |
| "learning_rate": 3.9354243650194025e-07, | |
| "loss": 0.0059, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 1.8401057946621784, | |
| "grad_norm": 0.04892928525805473, | |
| "learning_rate": 3.8580787064059544e-07, | |
| "loss": 0.0056, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 1.841708744089124, | |
| "grad_norm": 0.0421488918364048, | |
| "learning_rate": 3.781485722722622e-07, | |
| "loss": 0.006, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 1.8433116935160696, | |
| "grad_norm": 0.04015891253948212, | |
| "learning_rate": 3.7056460135998283e-07, | |
| "loss": 0.0066, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.844914642943015, | |
| "grad_norm": 0.04604795202612877, | |
| "learning_rate": 3.630560172770714e-07, | |
| "loss": 0.0059, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 1.8465175923699606, | |
| "grad_norm": 0.05634016543626785, | |
| "learning_rate": 3.5562287880665845e-07, | |
| "loss": 0.0072, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 1.8481205417969062, | |
| "grad_norm": 0.04716205969452858, | |
| "learning_rate": 3.48265244141226e-07, | |
| "loss": 0.0066, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 1.8497234912238518, | |
| "grad_norm": 0.04330425336956978, | |
| "learning_rate": 3.4098317088215203e-07, | |
| "loss": 0.0062, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 1.8513264406507974, | |
| "grad_norm": 0.044012319296598434, | |
| "learning_rate": 3.337767160392602e-07, | |
| "loss": 0.0064, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.852929390077743, | |
| "grad_norm": 0.03510000556707382, | |
| "learning_rate": 3.2664593603037196e-07, | |
| "loss": 0.0065, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 1.8545323395046887, | |
| "grad_norm": 0.03645529970526695, | |
| "learning_rate": 3.1959088668087055e-07, | |
| "loss": 0.0063, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 1.8561352889316343, | |
| "grad_norm": 0.03181441128253937, | |
| "learning_rate": 3.1261162322325343e-07, | |
| "loss": 0.0063, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 1.8577382383585799, | |
| "grad_norm": 0.0412897914648056, | |
| "learning_rate": 3.0570820029671377e-07, | |
| "loss": 0.0064, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 1.8593411877855255, | |
| "grad_norm": 0.03842850774526596, | |
| "learning_rate": 2.988806719466997e-07, | |
| "loss": 0.006, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.860944137212471, | |
| "grad_norm": 0.0427357517182827, | |
| "learning_rate": 2.9212909162449785e-07, | |
| "loss": 0.006, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 1.8625470866394165, | |
| "grad_norm": 0.055357471108436584, | |
| "learning_rate": 2.8545351218681406e-07, | |
| "loss": 0.0064, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 1.864150036066362, | |
| "grad_norm": 0.0415298193693161, | |
| "learning_rate": 2.788539858953587e-07, | |
| "loss": 0.0063, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 1.8657529854933077, | |
| "grad_norm": 0.038512542843818665, | |
| "learning_rate": 2.723305644164398e-07, | |
| "loss": 0.0064, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 1.8673559349202533, | |
| "grad_norm": 0.04085739329457283, | |
| "learning_rate": 2.6588329882055506e-07, | |
| "loss": 0.006, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.8689588843471987, | |
| "grad_norm": 0.036621492356061935, | |
| "learning_rate": 2.5951223958199157e-07, | |
| "loss": 0.0064, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 1.8705618337741443, | |
| "grad_norm": 0.05602623149752617, | |
| "learning_rate": 2.5321743657844013e-07, | |
| "loss": 0.0063, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 1.87216478320109, | |
| "grad_norm": 0.040210746228694916, | |
| "learning_rate": 2.4699893909058805e-07, | |
| "loss": 0.0057, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 1.8737677326280355, | |
| "grad_norm": 0.03419404849410057, | |
| "learning_rate": 2.408567958017516e-07, | |
| "loss": 0.0065, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 1.8753706820549811, | |
| "grad_norm": 0.03755233436822891, | |
| "learning_rate": 2.3479105479747854e-07, | |
| "loss": 0.0058, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.8769736314819268, | |
| "grad_norm": 0.035565607249736786, | |
| "learning_rate": 2.2880176356518292e-07, | |
| "loss": 0.0066, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 1.8785765809088724, | |
| "grad_norm": 0.0364888571202755, | |
| "learning_rate": 2.2288896899377187e-07, | |
| "loss": 0.0065, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 1.880179530335818, | |
| "grad_norm": 0.03588583692908287, | |
| "learning_rate": 2.170527173732706e-07, | |
| "loss": 0.0064, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 1.8817824797627636, | |
| "grad_norm": 0.03791727498173714, | |
| "learning_rate": 2.1129305439447023e-07, | |
| "loss": 0.0072, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 1.8833854291897092, | |
| "grad_norm": 0.07219801843166351, | |
| "learning_rate": 2.0561002514856377e-07, | |
| "loss": 0.0064, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.8849883786166548, | |
| "grad_norm": 0.035773083567619324, | |
| "learning_rate": 2.00003674126793e-07, | |
| "loss": 0.0065, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 1.8865913280436002, | |
| "grad_norm": 0.033491574227809906, | |
| "learning_rate": 1.9447404522010548e-07, | |
| "loss": 0.0065, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 1.8881942774705458, | |
| "grad_norm": 0.03553665056824684, | |
| "learning_rate": 1.890211817188059e-07, | |
| "loss": 0.0061, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 1.8897972268974914, | |
| "grad_norm": 0.04234781861305237, | |
| "learning_rate": 1.8364512631221633e-07, | |
| "loss": 0.0068, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 1.8914001763244368, | |
| "grad_norm": 0.045198485255241394, | |
| "learning_rate": 1.783459210883498e-07, | |
| "loss": 0.0061, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.8930031257513824, | |
| "grad_norm": 0.034584879875183105, | |
| "learning_rate": 1.731236075335696e-07, | |
| "loss": 0.0063, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 1.894606075178328, | |
| "grad_norm": 0.03774869441986084, | |
| "learning_rate": 1.6797822653227492e-07, | |
| "loss": 0.0062, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 1.8962090246052736, | |
| "grad_norm": 0.03940942883491516, | |
| "learning_rate": 1.6290981836657116e-07, | |
| "loss": 0.0058, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 1.8978119740322192, | |
| "grad_norm": 0.04277713969349861, | |
| "learning_rate": 1.579184227159658e-07, | |
| "loss": 0.0062, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 1.8994149234591649, | |
| "grad_norm": 0.04427545145153999, | |
| "learning_rate": 1.5300407865704637e-07, | |
| "loss": 0.0068, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.9010178728861105, | |
| "grad_norm": 0.048489801585674286, | |
| "learning_rate": 1.4816682466318178e-07, | |
| "loss": 0.0057, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 1.902620822313056, | |
| "grad_norm": 0.04006476700305939, | |
| "learning_rate": 1.4340669860421708e-07, | |
| "loss": 0.0065, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 1.9042237717400017, | |
| "grad_norm": 0.051166266202926636, | |
| "learning_rate": 1.3872373774618363e-07, | |
| "loss": 0.0056, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 1.9058267211669473, | |
| "grad_norm": 0.03423753380775452, | |
| "learning_rate": 1.3411797875099718e-07, | |
| "loss": 0.0061, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 1.907429670593893, | |
| "grad_norm": 0.03915632143616676, | |
| "learning_rate": 1.2958945767617915e-07, | |
| "loss": 0.0063, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.9090326200208383, | |
| "grad_norm": 0.037460289895534515, | |
| "learning_rate": 1.2513820997456904e-07, | |
| "loss": 0.0064, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 1.910635569447784, | |
| "grad_norm": 0.0423584058880806, | |
| "learning_rate": 1.2076427049405482e-07, | |
| "loss": 0.0063, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 1.9122385188747295, | |
| "grad_norm": 0.04563596844673157, | |
| "learning_rate": 1.164676734772896e-07, | |
| "loss": 0.0065, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 1.9138414683016751, | |
| "grad_norm": 0.036899786442518234, | |
| "learning_rate": 1.1224845256142758e-07, | |
| "loss": 0.0061, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 1.9154444177286205, | |
| "grad_norm": 0.03818698972463608, | |
| "learning_rate": 1.0810664077786747e-07, | |
| "loss": 0.0061, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.9170473671555661, | |
| "grad_norm": 0.04241606220602989, | |
| "learning_rate": 1.040422705519828e-07, | |
| "loss": 0.0072, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 1.9186503165825117, | |
| "grad_norm": 0.039895787835121155, | |
| "learning_rate": 1.0005537370287532e-07, | |
| "loss": 0.0059, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 1.9202532660094573, | |
| "grad_norm": 0.03996073827147484, | |
| "learning_rate": 9.614598144312426e-08, | |
| "loss": 0.0065, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 1.921856215436403, | |
| "grad_norm": 0.040646884590387344, | |
| "learning_rate": 9.231412437854192e-08, | |
| "loss": 0.0066, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 1.9234591648633486, | |
| "grad_norm": 0.037054888904094696, | |
| "learning_rate": 8.855983250793287e-08, | |
| "loss": 0.007, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.9250621142902942, | |
| "grad_norm": 0.03988795354962349, | |
| "learning_rate": 8.488313522286074e-08, | |
| "loss": 0.0059, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 1.9266650637172398, | |
| "grad_norm": 0.045791856944561005, | |
| "learning_rate": 8.128406130741617e-08, | |
| "loss": 0.0057, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 1.9282680131441854, | |
| "grad_norm": 0.03719751164317131, | |
| "learning_rate": 7.776263893799485e-08, | |
| "loss": 0.0063, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 1.929870962571131, | |
| "grad_norm": 0.05731379985809326, | |
| "learning_rate": 7.431889568307316e-08, | |
| "loss": 0.0072, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 1.9314739119980766, | |
| "grad_norm": 0.03646623343229294, | |
| "learning_rate": 7.095285850299505e-08, | |
| "loss": 0.0068, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 1.933076861425022, | |
| "grad_norm": 0.032672230154275894, | |
| "learning_rate": 6.766455374975777e-08, | |
| "loss": 0.0061, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 1.9346798108519676, | |
| "grad_norm": 0.09227609634399414, | |
| "learning_rate": 6.445400716681205e-08, | |
| "loss": 0.0069, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 1.9362827602789132, | |
| "grad_norm": 0.04252972453832626, | |
| "learning_rate": 6.132124388885107e-08, | |
| "loss": 0.0065, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 1.9378857097058588, | |
| "grad_norm": 0.047904375940561295, | |
| "learning_rate": 5.8266288441621855e-08, | |
| "loss": 0.0065, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 1.9394886591328042, | |
| "grad_norm": 0.04473813623189926, | |
| "learning_rate": 5.528916474172974e-08, | |
| "loss": 0.0071, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.9410916085597498, | |
| "grad_norm": 0.04604315385222435, | |
| "learning_rate": 5.2389896096451954e-08, | |
| "loss": 0.0069, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 1.9426945579866954, | |
| "grad_norm": 0.04812907055020332, | |
| "learning_rate": 4.9568505203553277e-08, | |
| "loss": 0.0061, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 1.944297507413641, | |
| "grad_norm": 0.04590696468949318, | |
| "learning_rate": 4.6825014151113955e-08, | |
| "loss": 0.0064, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 1.9459004568405867, | |
| "grad_norm": 0.03498758748173714, | |
| "learning_rate": 4.415944441734543e-08, | |
| "loss": 0.0062, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 1.9475034062675323, | |
| "grad_norm": 0.04751746729016304, | |
| "learning_rate": 4.1571816870438206e-08, | |
| "loss": 0.0065, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 1.9491063556944779, | |
| "grad_norm": 0.038238272070884705, | |
| "learning_rate": 3.9062151768382015e-08, | |
| "loss": 0.0062, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 1.9507093051214235, | |
| "grad_norm": 0.04823305085301399, | |
| "learning_rate": 3.663046875882037e-08, | |
| "loss": 0.007, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 1.952312254548369, | |
| "grad_norm": 0.0401928685605526, | |
| "learning_rate": 3.427678687888847e-08, | |
| "loss": 0.0068, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 1.9539152039753147, | |
| "grad_norm": 0.03627694770693779, | |
| "learning_rate": 3.200112455506777e-08, | |
| "loss": 0.006, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 1.9555181534022603, | |
| "grad_norm": 0.04345129802823067, | |
| "learning_rate": 2.980349960304274e-08, | |
| "loss": 0.0063, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.9571211028292057, | |
| "grad_norm": 0.049445588141679764, | |
| "learning_rate": 2.7683929227556585e-08, | |
| "loss": 0.0058, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 1.9587240522561513, | |
| "grad_norm": 0.05957973003387451, | |
| "learning_rate": 2.5642430022281285e-08, | |
| "loss": 0.0065, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 1.960327001683097, | |
| "grad_norm": 0.032285600900650024, | |
| "learning_rate": 2.3679017969685524e-08, | |
| "loss": 0.0059, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 1.9619299511100423, | |
| "grad_norm": 0.03355714678764343, | |
| "learning_rate": 2.1793708440910334e-08, | |
| "loss": 0.0061, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 1.963532900536988, | |
| "grad_norm": 0.053335681557655334, | |
| "learning_rate": 1.9986516195650284e-08, | |
| "loss": 0.0063, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 1.9651358499639335, | |
| "grad_norm": 0.031918782740831375, | |
| "learning_rate": 1.8257455382031386e-08, | |
| "loss": 0.0071, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 1.9667387993908791, | |
| "grad_norm": 0.038328561931848526, | |
| "learning_rate": 1.6606539536510037e-08, | |
| "loss": 0.0072, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 1.9683417488178248, | |
| "grad_norm": 0.045656658709049225, | |
| "learning_rate": 1.5033781583758677e-08, | |
| "loss": 0.006, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 1.9699446982447704, | |
| "grad_norm": 0.03573407977819443, | |
| "learning_rate": 1.3539193836571429e-08, | |
| "loss": 0.0068, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 1.971547647671716, | |
| "grad_norm": 0.03196869418025017, | |
| "learning_rate": 1.2122787995759722e-08, | |
| "loss": 0.0063, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.9731505970986616, | |
| "grad_norm": 0.04788234829902649, | |
| "learning_rate": 1.0784575150069033e-08, | |
| "loss": 0.006, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 1.9747535465256072, | |
| "grad_norm": 0.03255213052034378, | |
| "learning_rate": 9.524565776086736e-09, | |
| "loss": 0.0067, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 1.9763564959525528, | |
| "grad_norm": 0.036573149263858795, | |
| "learning_rate": 8.342769738161061e-09, | |
| "loss": 0.0062, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 1.9779594453794984, | |
| "grad_norm": 0.04543542489409447, | |
| "learning_rate": 7.2391962883267e-09, | |
| "loss": 0.007, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 1.9795623948064438, | |
| "grad_norm": 0.03676731884479523, | |
| "learning_rate": 6.213854066228208e-09, | |
| "loss": 0.0063, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 1.9811653442333894, | |
| "grad_norm": 0.04279434308409691, | |
| "learning_rate": 5.266751099054501e-09, | |
| "loss": 0.0062, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 1.982768293660335, | |
| "grad_norm": 0.04709082096815109, | |
| "learning_rate": 4.3978948014755664e-09, | |
| "loss": 0.006, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 1.9843712430872806, | |
| "grad_norm": 0.0320071280002594, | |
| "learning_rate": 3.607291975584737e-09, | |
| "loss": 0.0068, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 1.985974192514226, | |
| "grad_norm": 0.03497837111353874, | |
| "learning_rate": 2.89494881084762e-09, | |
| "loss": 0.0061, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 1.9875771419411716, | |
| "grad_norm": 0.042159345000982285, | |
| "learning_rate": 2.2608708840476947e-09, | |
| "loss": 0.0063, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.9891800913681172, | |
| "grad_norm": 0.031165743246674538, | |
| "learning_rate": 1.7050631592485657e-09, | |
| "loss": 0.0053, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 1.9907830407950629, | |
| "grad_norm": 0.040396977216005325, | |
| "learning_rate": 1.2275299877517743e-09, | |
| "loss": 0.0065, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 1.9923859902220085, | |
| "grad_norm": 0.05201442912220955, | |
| "learning_rate": 8.282751080646023e-10, | |
| "loss": 0.0064, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 1.993988939648954, | |
| "grad_norm": 0.043205760419368744, | |
| "learning_rate": 5.073016458700952e-10, | |
| "loss": 0.0058, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 1.9955918890758997, | |
| "grad_norm": 0.03582298383116722, | |
| "learning_rate": 2.6461211400152785e-10, | |
| "loss": 0.0065, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 1.9971948385028453, | |
| "grad_norm": 0.032651137560606, | |
| "learning_rate": 1.0020841242575075e-10, | |
| "loss": 0.0065, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 1.998797787929791, | |
| "grad_norm": 0.03591388836503029, | |
| "learning_rate": 1.4091828223206094e-11, | |
| "loss": 0.0059, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 1.9997595575859581, | |
| "step": 12476, | |
| "total_flos": 7.626990245493342e+19, | |
| "train_loss": 0.014145435631614382, | |
| "train_runtime": 240833.2709, | |
| "train_samples_per_second": 9.947, | |
| "train_steps_per_second": 0.052 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 12476, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.626990245493342e+19, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |