| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 380.95238095238096, |
| "eval_steps": 500, |
| "global_step": 16000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.23809523809523808, |
| "grad_norm": 4.980715274810791, |
| "learning_rate": 1.8e-07, |
| "loss": 0.6604, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.47619047619047616, |
| "grad_norm": 5.100787162780762, |
| "learning_rate": 3.8e-07, |
| "loss": 0.6669, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 4.46560001373291, |
| "learning_rate": 5.8e-07, |
| "loss": 0.6453, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 4.517127513885498, |
| "learning_rate": 7.8e-07, |
| "loss": 0.6192, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1904761904761905, |
| "grad_norm": 3.4314279556274414, |
| "learning_rate": 9.8e-07, |
| "loss": 0.5167, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 1.9630482196807861, |
| "learning_rate": 1.18e-06, |
| "loss": 0.442, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1.5227594375610352, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": 0.3349, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.9047619047619047, |
| "grad_norm": 0.7596906423568726, |
| "learning_rate": 1.5800000000000003e-06, |
| "loss": 0.2814, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 0.4877447783946991, |
| "learning_rate": 1.7800000000000001e-06, |
| "loss": 0.2234, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.380952380952381, |
| "grad_norm": 0.37567827105522156, |
| "learning_rate": 1.98e-06, |
| "loss": 0.1902, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.619047619047619, |
| "grad_norm": 0.340460866689682, |
| "learning_rate": 2.1800000000000003e-06, |
| "loss": 0.1719, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.2655262351036072, |
| "learning_rate": 2.38e-06, |
| "loss": 0.1504, |
| "step": 120 |
| }, |
| { |
| "epoch": 3.0952380952380953, |
| "grad_norm": 0.28478285670280457, |
| "learning_rate": 2.5800000000000003e-06, |
| "loss": 0.1388, |
| "step": 130 |
| }, |
| { |
| "epoch": 3.3333333333333335, |
| "grad_norm": 0.1955876350402832, |
| "learning_rate": 2.78e-06, |
| "loss": 0.1275, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 0.17272064089775085, |
| "learning_rate": 2.9800000000000003e-06, |
| "loss": 0.122, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.8095238095238093, |
| "grad_norm": 0.18175487220287323, |
| "learning_rate": 3.1800000000000005e-06, |
| "loss": 0.1142, |
| "step": 160 |
| }, |
| { |
| "epoch": 4.0476190476190474, |
| "grad_norm": 0.15202519297599792, |
| "learning_rate": 3.38e-06, |
| "loss": 0.1067, |
| "step": 170 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 0.12951944768428802, |
| "learning_rate": 3.58e-06, |
| "loss": 0.1, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.523809523809524, |
| "grad_norm": 0.12901848554611206, |
| "learning_rate": 3.7800000000000002e-06, |
| "loss": 0.1002, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.761904761904762, |
| "grad_norm": 0.1332135945558548, |
| "learning_rate": 3.98e-06, |
| "loss": 0.0942, |
| "step": 200 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.1288524866104126, |
| "learning_rate": 4.18e-06, |
| "loss": 0.0876, |
| "step": 210 |
| }, |
| { |
| "epoch": 5.238095238095238, |
| "grad_norm": 0.19060096144676208, |
| "learning_rate": 4.38e-06, |
| "loss": 0.087, |
| "step": 220 |
| }, |
| { |
| "epoch": 5.476190476190476, |
| "grad_norm": 0.11821025609970093, |
| "learning_rate": 4.58e-06, |
| "loss": 0.0832, |
| "step": 230 |
| }, |
| { |
| "epoch": 5.714285714285714, |
| "grad_norm": 0.13816511631011963, |
| "learning_rate": 4.780000000000001e-06, |
| "loss": 0.0794, |
| "step": 240 |
| }, |
| { |
| "epoch": 5.9523809523809526, |
| "grad_norm": 0.12835876643657684, |
| "learning_rate": 4.98e-06, |
| "loss": 0.076, |
| "step": 250 |
| }, |
| { |
| "epoch": 6.190476190476191, |
| "grad_norm": 0.11601896584033966, |
| "learning_rate": 5.18e-06, |
| "loss": 0.0714, |
| "step": 260 |
| }, |
| { |
| "epoch": 6.428571428571429, |
| "grad_norm": 0.17429235577583313, |
| "learning_rate": 5.38e-06, |
| "loss": 0.067, |
| "step": 270 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.1290334016084671, |
| "learning_rate": 5.580000000000001e-06, |
| "loss": 0.0635, |
| "step": 280 |
| }, |
| { |
| "epoch": 6.904761904761905, |
| "grad_norm": 0.12527474761009216, |
| "learning_rate": 5.78e-06, |
| "loss": 0.0623, |
| "step": 290 |
| }, |
| { |
| "epoch": 7.142857142857143, |
| "grad_norm": 0.12742209434509277, |
| "learning_rate": 5.98e-06, |
| "loss": 0.0603, |
| "step": 300 |
| }, |
| { |
| "epoch": 7.380952380952381, |
| "grad_norm": 0.1435515135526657, |
| "learning_rate": 6.18e-06, |
| "loss": 0.0562, |
| "step": 310 |
| }, |
| { |
| "epoch": 7.619047619047619, |
| "grad_norm": 0.11913318186998367, |
| "learning_rate": 6.38e-06, |
| "loss": 0.0537, |
| "step": 320 |
| }, |
| { |
| "epoch": 7.857142857142857, |
| "grad_norm": 0.1420675665140152, |
| "learning_rate": 6.58e-06, |
| "loss": 0.05, |
| "step": 330 |
| }, |
| { |
| "epoch": 8.095238095238095, |
| "grad_norm": 0.14488308131694794, |
| "learning_rate": 6.78e-06, |
| "loss": 0.0508, |
| "step": 340 |
| }, |
| { |
| "epoch": 8.333333333333334, |
| "grad_norm": 0.16232061386108398, |
| "learning_rate": 6.98e-06, |
| "loss": 0.0462, |
| "step": 350 |
| }, |
| { |
| "epoch": 8.571428571428571, |
| "grad_norm": 0.15470632910728455, |
| "learning_rate": 7.180000000000001e-06, |
| "loss": 0.0434, |
| "step": 360 |
| }, |
| { |
| "epoch": 8.80952380952381, |
| "grad_norm": 0.1478538066148758, |
| "learning_rate": 7.3800000000000005e-06, |
| "loss": 0.0436, |
| "step": 370 |
| }, |
| { |
| "epoch": 9.047619047619047, |
| "grad_norm": 0.16160432994365692, |
| "learning_rate": 7.580000000000001e-06, |
| "loss": 0.0404, |
| "step": 380 |
| }, |
| { |
| "epoch": 9.285714285714286, |
| "grad_norm": 0.1626199185848236, |
| "learning_rate": 7.78e-06, |
| "loss": 0.0414, |
| "step": 390 |
| }, |
| { |
| "epoch": 9.523809523809524, |
| "grad_norm": 0.15566575527191162, |
| "learning_rate": 7.98e-06, |
| "loss": 0.0395, |
| "step": 400 |
| }, |
| { |
| "epoch": 9.761904761904763, |
| "grad_norm": 0.11159353703260422, |
| "learning_rate": 8.18e-06, |
| "loss": 0.0358, |
| "step": 410 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.1658770889043808, |
| "learning_rate": 8.380000000000001e-06, |
| "loss": 0.0342, |
| "step": 420 |
| }, |
| { |
| "epoch": 10.238095238095237, |
| "grad_norm": 0.17186634242534637, |
| "learning_rate": 8.580000000000001e-06, |
| "loss": 0.0365, |
| "step": 430 |
| }, |
| { |
| "epoch": 10.476190476190476, |
| "grad_norm": 0.13238121569156647, |
| "learning_rate": 8.78e-06, |
| "loss": 0.034, |
| "step": 440 |
| }, |
| { |
| "epoch": 10.714285714285714, |
| "grad_norm": 0.20483730733394623, |
| "learning_rate": 8.98e-06, |
| "loss": 0.0346, |
| "step": 450 |
| }, |
| { |
| "epoch": 10.952380952380953, |
| "grad_norm": 0.17070575058460236, |
| "learning_rate": 9.180000000000002e-06, |
| "loss": 0.0316, |
| "step": 460 |
| }, |
| { |
| "epoch": 11.19047619047619, |
| "grad_norm": 0.17450638115406036, |
| "learning_rate": 9.38e-06, |
| "loss": 0.0318, |
| "step": 470 |
| }, |
| { |
| "epoch": 11.428571428571429, |
| "grad_norm": 0.15336617827415466, |
| "learning_rate": 9.58e-06, |
| "loss": 0.031, |
| "step": 480 |
| }, |
| { |
| "epoch": 11.666666666666666, |
| "grad_norm": 0.18544168770313263, |
| "learning_rate": 9.78e-06, |
| "loss": 0.03, |
| "step": 490 |
| }, |
| { |
| "epoch": 11.904761904761905, |
| "grad_norm": 0.14671775698661804, |
| "learning_rate": 9.980000000000001e-06, |
| "loss": 0.0297, |
| "step": 500 |
| }, |
| { |
| "epoch": 12.142857142857142, |
| "grad_norm": 0.13371697068214417, |
| "learning_rate": 1.018e-05, |
| "loss": 0.0288, |
| "step": 510 |
| }, |
| { |
| "epoch": 12.380952380952381, |
| "grad_norm": 0.14989984035491943, |
| "learning_rate": 1.038e-05, |
| "loss": 0.0278, |
| "step": 520 |
| }, |
| { |
| "epoch": 12.619047619047619, |
| "grad_norm": 0.10795731842517853, |
| "learning_rate": 1.058e-05, |
| "loss": 0.0276, |
| "step": 530 |
| }, |
| { |
| "epoch": 12.857142857142858, |
| "grad_norm": 0.1730237901210785, |
| "learning_rate": 1.0780000000000002e-05, |
| "loss": 0.0265, |
| "step": 540 |
| }, |
| { |
| "epoch": 13.095238095238095, |
| "grad_norm": 0.1715904027223587, |
| "learning_rate": 1.098e-05, |
| "loss": 0.0265, |
| "step": 550 |
| }, |
| { |
| "epoch": 13.333333333333334, |
| "grad_norm": 0.15687128901481628, |
| "learning_rate": 1.118e-05, |
| "loss": 0.0265, |
| "step": 560 |
| }, |
| { |
| "epoch": 13.571428571428571, |
| "grad_norm": 0.2053767293691635, |
| "learning_rate": 1.1380000000000001e-05, |
| "loss": 0.0257, |
| "step": 570 |
| }, |
| { |
| "epoch": 13.80952380952381, |
| "grad_norm": 0.19093821942806244, |
| "learning_rate": 1.1580000000000001e-05, |
| "loss": 0.0256, |
| "step": 580 |
| }, |
| { |
| "epoch": 14.047619047619047, |
| "grad_norm": 0.19474785029888153, |
| "learning_rate": 1.178e-05, |
| "loss": 0.0263, |
| "step": 590 |
| }, |
| { |
| "epoch": 14.285714285714286, |
| "grad_norm": 0.15355925261974335, |
| "learning_rate": 1.198e-05, |
| "loss": 0.0243, |
| "step": 600 |
| }, |
| { |
| "epoch": 14.523809523809524, |
| "grad_norm": 0.1538766324520111, |
| "learning_rate": 1.2180000000000002e-05, |
| "loss": 0.0254, |
| "step": 610 |
| }, |
| { |
| "epoch": 14.761904761904763, |
| "grad_norm": 0.17512081563472748, |
| "learning_rate": 1.238e-05, |
| "loss": 0.0233, |
| "step": 620 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.1335851401090622, |
| "learning_rate": 1.258e-05, |
| "loss": 0.0246, |
| "step": 630 |
| }, |
| { |
| "epoch": 15.238095238095237, |
| "grad_norm": 0.1288122981786728, |
| "learning_rate": 1.278e-05, |
| "loss": 0.0241, |
| "step": 640 |
| }, |
| { |
| "epoch": 15.476190476190476, |
| "grad_norm": 0.20682376623153687, |
| "learning_rate": 1.2980000000000001e-05, |
| "loss": 0.0224, |
| "step": 650 |
| }, |
| { |
| "epoch": 15.714285714285714, |
| "grad_norm": 0.1862407773733139, |
| "learning_rate": 1.3180000000000001e-05, |
| "loss": 0.023, |
| "step": 660 |
| }, |
| { |
| "epoch": 15.952380952380953, |
| "grad_norm": 0.17341835796833038, |
| "learning_rate": 1.338e-05, |
| "loss": 0.0224, |
| "step": 670 |
| }, |
| { |
| "epoch": 16.19047619047619, |
| "grad_norm": 0.16344322264194489, |
| "learning_rate": 1.358e-05, |
| "loss": 0.0221, |
| "step": 680 |
| }, |
| { |
| "epoch": 16.428571428571427, |
| "grad_norm": 0.1753958761692047, |
| "learning_rate": 1.3780000000000002e-05, |
| "loss": 0.0205, |
| "step": 690 |
| }, |
| { |
| "epoch": 16.666666666666668, |
| "grad_norm": 0.21752162277698517, |
| "learning_rate": 1.3980000000000002e-05, |
| "loss": 0.0216, |
| "step": 700 |
| }, |
| { |
| "epoch": 16.904761904761905, |
| "grad_norm": 0.1686408370733261, |
| "learning_rate": 1.4180000000000001e-05, |
| "loss": 0.0216, |
| "step": 710 |
| }, |
| { |
| "epoch": 17.142857142857142, |
| "grad_norm": 0.19970044493675232, |
| "learning_rate": 1.4380000000000001e-05, |
| "loss": 0.0223, |
| "step": 720 |
| }, |
| { |
| "epoch": 17.38095238095238, |
| "grad_norm": 0.17127229273319244, |
| "learning_rate": 1.4580000000000003e-05, |
| "loss": 0.0209, |
| "step": 730 |
| }, |
| { |
| "epoch": 17.61904761904762, |
| "grad_norm": 0.1305769830942154, |
| "learning_rate": 1.4779999999999999e-05, |
| "loss": 0.0224, |
| "step": 740 |
| }, |
| { |
| "epoch": 17.857142857142858, |
| "grad_norm": 0.14144815504550934, |
| "learning_rate": 1.4979999999999999e-05, |
| "loss": 0.0195, |
| "step": 750 |
| }, |
| { |
| "epoch": 18.095238095238095, |
| "grad_norm": 0.17538538575172424, |
| "learning_rate": 1.518e-05, |
| "loss": 0.0193, |
| "step": 760 |
| }, |
| { |
| "epoch": 18.333333333333332, |
| "grad_norm": 0.18594413995742798, |
| "learning_rate": 1.538e-05, |
| "loss": 0.019, |
| "step": 770 |
| }, |
| { |
| "epoch": 18.571428571428573, |
| "grad_norm": 0.21348120272159576, |
| "learning_rate": 1.558e-05, |
| "loss": 0.0203, |
| "step": 780 |
| }, |
| { |
| "epoch": 18.80952380952381, |
| "grad_norm": 0.24014145135879517, |
| "learning_rate": 1.578e-05, |
| "loss": 0.0202, |
| "step": 790 |
| }, |
| { |
| "epoch": 19.047619047619047, |
| "grad_norm": 0.24534733593463898, |
| "learning_rate": 1.598e-05, |
| "loss": 0.0191, |
| "step": 800 |
| }, |
| { |
| "epoch": 19.285714285714285, |
| "grad_norm": 0.18500757217407227, |
| "learning_rate": 1.618e-05, |
| "loss": 0.0184, |
| "step": 810 |
| }, |
| { |
| "epoch": 19.523809523809526, |
| "grad_norm": 0.17265824973583221, |
| "learning_rate": 1.6380000000000002e-05, |
| "loss": 0.0194, |
| "step": 820 |
| }, |
| { |
| "epoch": 19.761904761904763, |
| "grad_norm": 0.17397823929786682, |
| "learning_rate": 1.658e-05, |
| "loss": 0.0203, |
| "step": 830 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.17956259846687317, |
| "learning_rate": 1.6780000000000002e-05, |
| "loss": 0.0184, |
| "step": 840 |
| }, |
| { |
| "epoch": 20.238095238095237, |
| "grad_norm": 0.2076825201511383, |
| "learning_rate": 1.698e-05, |
| "loss": 0.0186, |
| "step": 850 |
| }, |
| { |
| "epoch": 20.476190476190474, |
| "grad_norm": 0.215548574924469, |
| "learning_rate": 1.718e-05, |
| "loss": 0.0179, |
| "step": 860 |
| }, |
| { |
| "epoch": 20.714285714285715, |
| "grad_norm": 0.24945653975009918, |
| "learning_rate": 1.7380000000000003e-05, |
| "loss": 0.0192, |
| "step": 870 |
| }, |
| { |
| "epoch": 20.952380952380953, |
| "grad_norm": 0.17560195922851562, |
| "learning_rate": 1.758e-05, |
| "loss": 0.018, |
| "step": 880 |
| }, |
| { |
| "epoch": 21.19047619047619, |
| "grad_norm": 0.2002139538526535, |
| "learning_rate": 1.7780000000000003e-05, |
| "loss": 0.0181, |
| "step": 890 |
| }, |
| { |
| "epoch": 21.428571428571427, |
| "grad_norm": 0.22682271897792816, |
| "learning_rate": 1.798e-05, |
| "loss": 0.0185, |
| "step": 900 |
| }, |
| { |
| "epoch": 21.666666666666668, |
| "grad_norm": 0.1713908612728119, |
| "learning_rate": 1.818e-05, |
| "loss": 0.017, |
| "step": 910 |
| }, |
| { |
| "epoch": 21.904761904761905, |
| "grad_norm": 0.18751071393489838, |
| "learning_rate": 1.838e-05, |
| "loss": 0.0175, |
| "step": 920 |
| }, |
| { |
| "epoch": 22.142857142857142, |
| "grad_norm": 0.15847370028495789, |
| "learning_rate": 1.858e-05, |
| "loss": 0.0178, |
| "step": 930 |
| }, |
| { |
| "epoch": 22.38095238095238, |
| "grad_norm": 0.16867293417453766, |
| "learning_rate": 1.878e-05, |
| "loss": 0.0175, |
| "step": 940 |
| }, |
| { |
| "epoch": 22.61904761904762, |
| "grad_norm": 0.1420915126800537, |
| "learning_rate": 1.898e-05, |
| "loss": 0.0174, |
| "step": 950 |
| }, |
| { |
| "epoch": 22.857142857142858, |
| "grad_norm": 0.17012666165828705, |
| "learning_rate": 1.918e-05, |
| "loss": 0.0164, |
| "step": 960 |
| }, |
| { |
| "epoch": 23.095238095238095, |
| "grad_norm": 0.15484298765659332, |
| "learning_rate": 1.938e-05, |
| "loss": 0.0165, |
| "step": 970 |
| }, |
| { |
| "epoch": 23.333333333333332, |
| "grad_norm": 0.23401230573654175, |
| "learning_rate": 1.9580000000000002e-05, |
| "loss": 0.0174, |
| "step": 980 |
| }, |
| { |
| "epoch": 23.571428571428573, |
| "grad_norm": 0.18525172770023346, |
| "learning_rate": 1.978e-05, |
| "loss": 0.0164, |
| "step": 990 |
| }, |
| { |
| "epoch": 23.80952380952381, |
| "grad_norm": 0.17840850353240967, |
| "learning_rate": 1.9980000000000002e-05, |
| "loss": 0.0193, |
| "step": 1000 |
| }, |
| { |
| "epoch": 24.047619047619047, |
| "grad_norm": 0.14338454604148865, |
| "learning_rate": 2.0180000000000003e-05, |
| "loss": 0.0163, |
| "step": 1010 |
| }, |
| { |
| "epoch": 24.285714285714285, |
| "grad_norm": 0.17406629025936127, |
| "learning_rate": 2.038e-05, |
| "loss": 0.0154, |
| "step": 1020 |
| }, |
| { |
| "epoch": 24.523809523809526, |
| "grad_norm": 0.2546505928039551, |
| "learning_rate": 2.0580000000000003e-05, |
| "loss": 0.0165, |
| "step": 1030 |
| }, |
| { |
| "epoch": 24.761904761904763, |
| "grad_norm": 0.2101059854030609, |
| "learning_rate": 2.078e-05, |
| "loss": 0.0167, |
| "step": 1040 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.17019684612751007, |
| "learning_rate": 2.098e-05, |
| "loss": 0.0153, |
| "step": 1050 |
| }, |
| { |
| "epoch": 25.238095238095237, |
| "grad_norm": 0.18125365674495697, |
| "learning_rate": 2.118e-05, |
| "loss": 0.015, |
| "step": 1060 |
| }, |
| { |
| "epoch": 25.476190476190474, |
| "grad_norm": 0.17026013135910034, |
| "learning_rate": 2.138e-05, |
| "loss": 0.015, |
| "step": 1070 |
| }, |
| { |
| "epoch": 25.714285714285715, |
| "grad_norm": 0.14356698095798492, |
| "learning_rate": 2.158e-05, |
| "loss": 0.0156, |
| "step": 1080 |
| }, |
| { |
| "epoch": 25.952380952380953, |
| "grad_norm": 0.21496063470840454, |
| "learning_rate": 2.178e-05, |
| "loss": 0.0153, |
| "step": 1090 |
| }, |
| { |
| "epoch": 26.19047619047619, |
| "grad_norm": 0.22909711301326752, |
| "learning_rate": 2.198e-05, |
| "loss": 0.0162, |
| "step": 1100 |
| }, |
| { |
| "epoch": 26.428571428571427, |
| "grad_norm": 0.1759607195854187, |
| "learning_rate": 2.218e-05, |
| "loss": 0.0154, |
| "step": 1110 |
| }, |
| { |
| "epoch": 26.666666666666668, |
| "grad_norm": 0.2447257786989212, |
| "learning_rate": 2.2380000000000003e-05, |
| "loss": 0.0149, |
| "step": 1120 |
| }, |
| { |
| "epoch": 26.904761904761905, |
| "grad_norm": 0.21732710301876068, |
| "learning_rate": 2.258e-05, |
| "loss": 0.0146, |
| "step": 1130 |
| }, |
| { |
| "epoch": 27.142857142857142, |
| "grad_norm": 0.22138142585754395, |
| "learning_rate": 2.2780000000000002e-05, |
| "loss": 0.0148, |
| "step": 1140 |
| }, |
| { |
| "epoch": 27.38095238095238, |
| "grad_norm": 0.15064100921154022, |
| "learning_rate": 2.298e-05, |
| "loss": 0.015, |
| "step": 1150 |
| }, |
| { |
| "epoch": 27.61904761904762, |
| "grad_norm": 0.20759208500385284, |
| "learning_rate": 2.318e-05, |
| "loss": 0.0149, |
| "step": 1160 |
| }, |
| { |
| "epoch": 27.857142857142858, |
| "grad_norm": 0.18319548666477203, |
| "learning_rate": 2.3380000000000003e-05, |
| "loss": 0.0151, |
| "step": 1170 |
| }, |
| { |
| "epoch": 28.095238095238095, |
| "grad_norm": 0.22574104368686676, |
| "learning_rate": 2.358e-05, |
| "loss": 0.0154, |
| "step": 1180 |
| }, |
| { |
| "epoch": 28.333333333333332, |
| "grad_norm": 0.2193443924188614, |
| "learning_rate": 2.3780000000000003e-05, |
| "loss": 0.0155, |
| "step": 1190 |
| }, |
| { |
| "epoch": 28.571428571428573, |
| "grad_norm": 0.219894140958786, |
| "learning_rate": 2.398e-05, |
| "loss": 0.0155, |
| "step": 1200 |
| }, |
| { |
| "epoch": 28.80952380952381, |
| "grad_norm": 0.2714121639728546, |
| "learning_rate": 2.418e-05, |
| "loss": 0.0159, |
| "step": 1210 |
| }, |
| { |
| "epoch": 29.047619047619047, |
| "grad_norm": 0.2523249089717865, |
| "learning_rate": 2.438e-05, |
| "loss": 0.0145, |
| "step": 1220 |
| }, |
| { |
| "epoch": 29.285714285714285, |
| "grad_norm": 0.19838352501392365, |
| "learning_rate": 2.4580000000000002e-05, |
| "loss": 0.0147, |
| "step": 1230 |
| }, |
| { |
| "epoch": 29.523809523809526, |
| "grad_norm": 0.18331754207611084, |
| "learning_rate": 2.478e-05, |
| "loss": 0.0159, |
| "step": 1240 |
| }, |
| { |
| "epoch": 29.761904761904763, |
| "grad_norm": 0.27887165546417236, |
| "learning_rate": 2.498e-05, |
| "loss": 0.015, |
| "step": 1250 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.2225918471813202, |
| "learning_rate": 2.5180000000000003e-05, |
| "loss": 0.015, |
| "step": 1260 |
| }, |
| { |
| "epoch": 30.238095238095237, |
| "grad_norm": 0.30043312907218933, |
| "learning_rate": 2.5380000000000004e-05, |
| "loss": 0.0135, |
| "step": 1270 |
| }, |
| { |
| "epoch": 30.476190476190474, |
| "grad_norm": 0.24254007637500763, |
| "learning_rate": 2.5580000000000002e-05, |
| "loss": 0.0142, |
| "step": 1280 |
| }, |
| { |
| "epoch": 30.714285714285715, |
| "grad_norm": 0.18920157849788666, |
| "learning_rate": 2.5779999999999997e-05, |
| "loss": 0.0149, |
| "step": 1290 |
| }, |
| { |
| "epoch": 30.952380952380953, |
| "grad_norm": 0.21070720255374908, |
| "learning_rate": 2.598e-05, |
| "loss": 0.013, |
| "step": 1300 |
| }, |
| { |
| "epoch": 31.19047619047619, |
| "grad_norm": 0.18980194628238678, |
| "learning_rate": 2.618e-05, |
| "loss": 0.0133, |
| "step": 1310 |
| }, |
| { |
| "epoch": 31.428571428571427, |
| "grad_norm": 0.19967220723628998, |
| "learning_rate": 2.6379999999999998e-05, |
| "loss": 0.014, |
| "step": 1320 |
| }, |
| { |
| "epoch": 31.666666666666668, |
| "grad_norm": 0.24424080550670624, |
| "learning_rate": 2.658e-05, |
| "loss": 0.013, |
| "step": 1330 |
| }, |
| { |
| "epoch": 31.904761904761905, |
| "grad_norm": 0.24723918735980988, |
| "learning_rate": 2.678e-05, |
| "loss": 0.0133, |
| "step": 1340 |
| }, |
| { |
| "epoch": 32.142857142857146, |
| "grad_norm": 0.1781580150127411, |
| "learning_rate": 2.698e-05, |
| "loss": 0.0132, |
| "step": 1350 |
| }, |
| { |
| "epoch": 32.38095238095238, |
| "grad_norm": 0.1974799484014511, |
| "learning_rate": 2.718e-05, |
| "loss": 0.0133, |
| "step": 1360 |
| }, |
| { |
| "epoch": 32.61904761904762, |
| "grad_norm": 0.25829148292541504, |
| "learning_rate": 2.738e-05, |
| "loss": 0.0127, |
| "step": 1370 |
| }, |
| { |
| "epoch": 32.857142857142854, |
| "grad_norm": 0.18170489370822906, |
| "learning_rate": 2.758e-05, |
| "loss": 0.0136, |
| "step": 1380 |
| }, |
| { |
| "epoch": 33.095238095238095, |
| "grad_norm": 0.19150814414024353, |
| "learning_rate": 2.778e-05, |
| "loss": 0.0136, |
| "step": 1390 |
| }, |
| { |
| "epoch": 33.333333333333336, |
| "grad_norm": 0.24943962693214417, |
| "learning_rate": 2.798e-05, |
| "loss": 0.015, |
| "step": 1400 |
| }, |
| { |
| "epoch": 33.57142857142857, |
| "grad_norm": 0.16312743723392487, |
| "learning_rate": 2.818e-05, |
| "loss": 0.0136, |
| "step": 1410 |
| }, |
| { |
| "epoch": 33.80952380952381, |
| "grad_norm": 0.19045665860176086, |
| "learning_rate": 2.8380000000000003e-05, |
| "loss": 0.0133, |
| "step": 1420 |
| }, |
| { |
| "epoch": 34.04761904761905, |
| "grad_norm": 0.16698956489562988, |
| "learning_rate": 2.858e-05, |
| "loss": 0.0127, |
| "step": 1430 |
| }, |
| { |
| "epoch": 34.285714285714285, |
| "grad_norm": 0.14542844891548157, |
| "learning_rate": 2.8780000000000002e-05, |
| "loss": 0.0132, |
| "step": 1440 |
| }, |
| { |
| "epoch": 34.523809523809526, |
| "grad_norm": 0.1837533563375473, |
| "learning_rate": 2.898e-05, |
| "loss": 0.0128, |
| "step": 1450 |
| }, |
| { |
| "epoch": 34.76190476190476, |
| "grad_norm": 0.13794368505477905, |
| "learning_rate": 2.9180000000000002e-05, |
| "loss": 0.0124, |
| "step": 1460 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.18701200187206268, |
| "learning_rate": 2.9380000000000003e-05, |
| "loss": 0.0124, |
| "step": 1470 |
| }, |
| { |
| "epoch": 35.23809523809524, |
| "grad_norm": 0.2423512190580368, |
| "learning_rate": 2.958e-05, |
| "loss": 0.012, |
| "step": 1480 |
| }, |
| { |
| "epoch": 35.476190476190474, |
| "grad_norm": 0.1845133900642395, |
| "learning_rate": 2.9780000000000003e-05, |
| "loss": 0.0139, |
| "step": 1490 |
| }, |
| { |
| "epoch": 35.714285714285715, |
| "grad_norm": 0.24942807853221893, |
| "learning_rate": 2.998e-05, |
| "loss": 0.0123, |
| "step": 1500 |
| }, |
| { |
| "epoch": 35.95238095238095, |
| "grad_norm": 0.21653687953948975, |
| "learning_rate": 3.0180000000000002e-05, |
| "loss": 0.013, |
| "step": 1510 |
| }, |
| { |
| "epoch": 36.19047619047619, |
| "grad_norm": 0.25448745489120483, |
| "learning_rate": 3.0380000000000004e-05, |
| "loss": 0.0133, |
| "step": 1520 |
| }, |
| { |
| "epoch": 36.42857142857143, |
| "grad_norm": 0.25178658962249756, |
| "learning_rate": 3.058e-05, |
| "loss": 0.0134, |
| "step": 1530 |
| }, |
| { |
| "epoch": 36.666666666666664, |
| "grad_norm": 0.3439308702945709, |
| "learning_rate": 3.078e-05, |
| "loss": 0.0126, |
| "step": 1540 |
| }, |
| { |
| "epoch": 36.904761904761905, |
| "grad_norm": 0.27060025930404663, |
| "learning_rate": 3.0980000000000005e-05, |
| "loss": 0.0119, |
| "step": 1550 |
| }, |
| { |
| "epoch": 37.142857142857146, |
| "grad_norm": 0.22990508377552032, |
| "learning_rate": 3.118e-05, |
| "loss": 0.0127, |
| "step": 1560 |
| }, |
| { |
| "epoch": 37.38095238095238, |
| "grad_norm": 0.19711598753929138, |
| "learning_rate": 3.138e-05, |
| "loss": 0.0113, |
| "step": 1570 |
| }, |
| { |
| "epoch": 37.61904761904762, |
| "grad_norm": 0.22587527334690094, |
| "learning_rate": 3.1580000000000006e-05, |
| "loss": 0.0118, |
| "step": 1580 |
| }, |
| { |
| "epoch": 37.857142857142854, |
| "grad_norm": 0.19575542211532593, |
| "learning_rate": 3.1780000000000004e-05, |
| "loss": 0.0125, |
| "step": 1590 |
| }, |
| { |
| "epoch": 38.095238095238095, |
| "grad_norm": 0.18160071969032288, |
| "learning_rate": 3.198e-05, |
| "loss": 0.0126, |
| "step": 1600 |
| }, |
| { |
| "epoch": 38.333333333333336, |
| "grad_norm": 0.19056737422943115, |
| "learning_rate": 3.218e-05, |
| "loss": 0.0125, |
| "step": 1610 |
| }, |
| { |
| "epoch": 38.57142857142857, |
| "grad_norm": 0.23315215110778809, |
| "learning_rate": 3.238e-05, |
| "loss": 0.0112, |
| "step": 1620 |
| }, |
| { |
| "epoch": 38.80952380952381, |
| "grad_norm": 0.1477610021829605, |
| "learning_rate": 3.2579999999999996e-05, |
| "loss": 0.0127, |
| "step": 1630 |
| }, |
| { |
| "epoch": 39.04761904761905, |
| "grad_norm": 0.28820210695266724, |
| "learning_rate": 3.278e-05, |
| "loss": 0.0115, |
| "step": 1640 |
| }, |
| { |
| "epoch": 39.285714285714285, |
| "grad_norm": 0.20518805086612701, |
| "learning_rate": 3.298e-05, |
| "loss": 0.0115, |
| "step": 1650 |
| }, |
| { |
| "epoch": 39.523809523809526, |
| "grad_norm": 0.17225578427314758, |
| "learning_rate": 3.318e-05, |
| "loss": 0.0133, |
| "step": 1660 |
| }, |
| { |
| "epoch": 39.76190476190476, |
| "grad_norm": 0.20158152282238007, |
| "learning_rate": 3.338e-05, |
| "loss": 0.012, |
| "step": 1670 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.16892178356647491, |
| "learning_rate": 3.358e-05, |
| "loss": 0.012, |
| "step": 1680 |
| }, |
| { |
| "epoch": 40.23809523809524, |
| "grad_norm": 0.18283018469810486, |
| "learning_rate": 3.378e-05, |
| "loss": 0.0127, |
| "step": 1690 |
| }, |
| { |
| "epoch": 40.476190476190474, |
| "grad_norm": 0.16400155425071716, |
| "learning_rate": 3.398e-05, |
| "loss": 0.0122, |
| "step": 1700 |
| }, |
| { |
| "epoch": 40.714285714285715, |
| "grad_norm": 0.1659807711839676, |
| "learning_rate": 3.418e-05, |
| "loss": 0.0119, |
| "step": 1710 |
| }, |
| { |
| "epoch": 40.95238095238095, |
| "grad_norm": 0.25890877842903137, |
| "learning_rate": 3.438e-05, |
| "loss": 0.0116, |
| "step": 1720 |
| }, |
| { |
| "epoch": 41.19047619047619, |
| "grad_norm": 0.2385096698999405, |
| "learning_rate": 3.4580000000000004e-05, |
| "loss": 0.012, |
| "step": 1730 |
| }, |
| { |
| "epoch": 41.42857142857143, |
| "grad_norm": 0.24934934079647064, |
| "learning_rate": 3.478e-05, |
| "loss": 0.0118, |
| "step": 1740 |
| }, |
| { |
| "epoch": 41.666666666666664, |
| "grad_norm": 0.22661565244197845, |
| "learning_rate": 3.498e-05, |
| "loss": 0.0112, |
| "step": 1750 |
| }, |
| { |
| "epoch": 41.904761904761905, |
| "grad_norm": 0.19338788092136383, |
| "learning_rate": 3.518e-05, |
| "loss": 0.0112, |
| "step": 1760 |
| }, |
| { |
| "epoch": 42.142857142857146, |
| "grad_norm": 0.17126916348934174, |
| "learning_rate": 3.5380000000000003e-05, |
| "loss": 0.0109, |
| "step": 1770 |
| }, |
| { |
| "epoch": 42.38095238095238, |
| "grad_norm": 0.17331229150295258, |
| "learning_rate": 3.558e-05, |
| "loss": 0.0112, |
| "step": 1780 |
| }, |
| { |
| "epoch": 42.61904761904762, |
| "grad_norm": 0.23362383246421814, |
| "learning_rate": 3.578e-05, |
| "loss": 0.0107, |
| "step": 1790 |
| }, |
| { |
| "epoch": 42.857142857142854, |
| "grad_norm": 0.22185546159744263, |
| "learning_rate": 3.5980000000000004e-05, |
| "loss": 0.0119, |
| "step": 1800 |
| }, |
| { |
| "epoch": 43.095238095238095, |
| "grad_norm": 0.1831795573234558, |
| "learning_rate": 3.618e-05, |
| "loss": 0.0115, |
| "step": 1810 |
| }, |
| { |
| "epoch": 43.333333333333336, |
| "grad_norm": 0.2054087072610855, |
| "learning_rate": 3.638e-05, |
| "loss": 0.0104, |
| "step": 1820 |
| }, |
| { |
| "epoch": 43.57142857142857, |
| "grad_norm": 0.19214603304862976, |
| "learning_rate": 3.6580000000000006e-05, |
| "loss": 0.0117, |
| "step": 1830 |
| }, |
| { |
| "epoch": 43.80952380952381, |
| "grad_norm": 0.29215097427368164, |
| "learning_rate": 3.6780000000000004e-05, |
| "loss": 0.0116, |
| "step": 1840 |
| }, |
| { |
| "epoch": 44.04761904761905, |
| "grad_norm": 0.2026723176240921, |
| "learning_rate": 3.698e-05, |
| "loss": 0.0119, |
| "step": 1850 |
| }, |
| { |
| "epoch": 44.285714285714285, |
| "grad_norm": 0.19622834026813507, |
| "learning_rate": 3.7180000000000007e-05, |
| "loss": 0.0103, |
| "step": 1860 |
| }, |
| { |
| "epoch": 44.523809523809526, |
| "grad_norm": 0.2094084918498993, |
| "learning_rate": 3.7380000000000005e-05, |
| "loss": 0.0107, |
| "step": 1870 |
| }, |
| { |
| "epoch": 44.76190476190476, |
| "grad_norm": 0.16172198951244354, |
| "learning_rate": 3.758e-05, |
| "loss": 0.0102, |
| "step": 1880 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.21518279612064362, |
| "learning_rate": 3.778000000000001e-05, |
| "loss": 0.0109, |
| "step": 1890 |
| }, |
| { |
| "epoch": 45.23809523809524, |
| "grad_norm": 0.1921117603778839, |
| "learning_rate": 3.7980000000000006e-05, |
| "loss": 0.0102, |
| "step": 1900 |
| }, |
| { |
| "epoch": 45.476190476190474, |
| "grad_norm": 0.28162482380867004, |
| "learning_rate": 3.818e-05, |
| "loss": 0.0105, |
| "step": 1910 |
| }, |
| { |
| "epoch": 45.714285714285715, |
| "grad_norm": 0.21823492646217346, |
| "learning_rate": 3.838e-05, |
| "loss": 0.0116, |
| "step": 1920 |
| }, |
| { |
| "epoch": 45.95238095238095, |
| "grad_norm": 0.19757795333862305, |
| "learning_rate": 3.858e-05, |
| "loss": 0.011, |
| "step": 1930 |
| }, |
| { |
| "epoch": 46.19047619047619, |
| "grad_norm": 0.17883461713790894, |
| "learning_rate": 3.878e-05, |
| "loss": 0.0105, |
| "step": 1940 |
| }, |
| { |
| "epoch": 46.42857142857143, |
| "grad_norm": 0.18027201294898987, |
| "learning_rate": 3.898e-05, |
| "loss": 0.011, |
| "step": 1950 |
| }, |
| { |
| "epoch": 46.666666666666664, |
| "grad_norm": 0.24172571301460266, |
| "learning_rate": 3.918e-05, |
| "loss": 0.0101, |
| "step": 1960 |
| }, |
| { |
| "epoch": 46.904761904761905, |
| "grad_norm": 0.20530356466770172, |
| "learning_rate": 3.938e-05, |
| "loss": 0.0103, |
| "step": 1970 |
| }, |
| { |
| "epoch": 47.142857142857146, |
| "grad_norm": 0.17441929876804352, |
| "learning_rate": 3.958e-05, |
| "loss": 0.011, |
| "step": 1980 |
| }, |
| { |
| "epoch": 47.38095238095238, |
| "grad_norm": 0.22547873854637146, |
| "learning_rate": 3.978e-05, |
| "loss": 0.0113, |
| "step": 1990 |
| }, |
| { |
| "epoch": 47.61904761904762, |
| "grad_norm": 0.18987424671649933, |
| "learning_rate": 3.998e-05, |
| "loss": 0.0112, |
| "step": 2000 |
| }, |
| { |
| "epoch": 47.857142857142854, |
| "grad_norm": 0.24269607663154602, |
| "learning_rate": 4.018e-05, |
| "loss": 0.0099, |
| "step": 2010 |
| }, |
| { |
| "epoch": 48.095238095238095, |
| "grad_norm": 0.3229423761367798, |
| "learning_rate": 4.038e-05, |
| "loss": 0.0108, |
| "step": 2020 |
| }, |
| { |
| "epoch": 48.333333333333336, |
| "grad_norm": 0.2665686011314392, |
| "learning_rate": 4.058e-05, |
| "loss": 0.0108, |
| "step": 2030 |
| }, |
| { |
| "epoch": 48.57142857142857, |
| "grad_norm": 0.19778819382190704, |
| "learning_rate": 4.078e-05, |
| "loss": 0.0105, |
| "step": 2040 |
| }, |
| { |
| "epoch": 48.80952380952381, |
| "grad_norm": 0.21437539160251617, |
| "learning_rate": 4.0980000000000004e-05, |
| "loss": 0.0108, |
| "step": 2050 |
| }, |
| { |
| "epoch": 49.04761904761905, |
| "grad_norm": 0.275593101978302, |
| "learning_rate": 4.118e-05, |
| "loss": 0.0097, |
| "step": 2060 |
| }, |
| { |
| "epoch": 49.285714285714285, |
| "grad_norm": 0.24952125549316406, |
| "learning_rate": 4.138e-05, |
| "loss": 0.0102, |
| "step": 2070 |
| }, |
| { |
| "epoch": 49.523809523809526, |
| "grad_norm": 0.24934525787830353, |
| "learning_rate": 4.1580000000000005e-05, |
| "loss": 0.0103, |
| "step": 2080 |
| }, |
| { |
| "epoch": 49.76190476190476, |
| "grad_norm": 0.2743508815765381, |
| "learning_rate": 4.178e-05, |
| "loss": 0.0104, |
| "step": 2090 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.26263362169265747, |
| "learning_rate": 4.198e-05, |
| "loss": 0.0096, |
| "step": 2100 |
| }, |
| { |
| "epoch": 50.23809523809524, |
| "grad_norm": 0.1859271079301834, |
| "learning_rate": 4.2180000000000006e-05, |
| "loss": 0.01, |
| "step": 2110 |
| }, |
| { |
| "epoch": 50.476190476190474, |
| "grad_norm": 0.19789977371692657, |
| "learning_rate": 4.2380000000000004e-05, |
| "loss": 0.0106, |
| "step": 2120 |
| }, |
| { |
| "epoch": 50.714285714285715, |
| "grad_norm": 0.19243483245372772, |
| "learning_rate": 4.258e-05, |
| "loss": 0.0105, |
| "step": 2130 |
| }, |
| { |
| "epoch": 50.95238095238095, |
| "grad_norm": 0.21670812368392944, |
| "learning_rate": 4.278e-05, |
| "loss": 0.01, |
| "step": 2140 |
| }, |
| { |
| "epoch": 51.19047619047619, |
| "grad_norm": 0.2696094810962677, |
| "learning_rate": 4.2980000000000005e-05, |
| "loss": 0.0106, |
| "step": 2150 |
| }, |
| { |
| "epoch": 51.42857142857143, |
| "grad_norm": 0.16100794076919556, |
| "learning_rate": 4.318e-05, |
| "loss": 0.0106, |
| "step": 2160 |
| }, |
| { |
| "epoch": 51.666666666666664, |
| "grad_norm": 0.20846322178840637, |
| "learning_rate": 4.338e-05, |
| "loss": 0.0106, |
| "step": 2170 |
| }, |
| { |
| "epoch": 51.904761904761905, |
| "grad_norm": 0.16723722219467163, |
| "learning_rate": 4.3580000000000006e-05, |
| "loss": 0.0094, |
| "step": 2180 |
| }, |
| { |
| "epoch": 52.142857142857146, |
| "grad_norm": 0.2035456895828247, |
| "learning_rate": 4.3780000000000004e-05, |
| "loss": 0.009, |
| "step": 2190 |
| }, |
| { |
| "epoch": 52.38095238095238, |
| "grad_norm": 0.2457050383090973, |
| "learning_rate": 4.398e-05, |
| "loss": 0.0105, |
| "step": 2200 |
| }, |
| { |
| "epoch": 52.61904761904762, |
| "grad_norm": 0.24727100133895874, |
| "learning_rate": 4.418000000000001e-05, |
| "loss": 0.0102, |
| "step": 2210 |
| }, |
| { |
| "epoch": 52.857142857142854, |
| "grad_norm": 0.23075580596923828, |
| "learning_rate": 4.438e-05, |
| "loss": 0.0104, |
| "step": 2220 |
| }, |
| { |
| "epoch": 53.095238095238095, |
| "grad_norm": 0.23572269082069397, |
| "learning_rate": 4.458e-05, |
| "loss": 0.0104, |
| "step": 2230 |
| }, |
| { |
| "epoch": 53.333333333333336, |
| "grad_norm": 0.18401513993740082, |
| "learning_rate": 4.478e-05, |
| "loss": 0.0107, |
| "step": 2240 |
| }, |
| { |
| "epoch": 53.57142857142857, |
| "grad_norm": 0.1530696302652359, |
| "learning_rate": 4.498e-05, |
| "loss": 0.0107, |
| "step": 2250 |
| }, |
| { |
| "epoch": 53.80952380952381, |
| "grad_norm": 0.1747877299785614, |
| "learning_rate": 4.518e-05, |
| "loss": 0.011, |
| "step": 2260 |
| }, |
| { |
| "epoch": 54.04761904761905, |
| "grad_norm": 0.26969122886657715, |
| "learning_rate": 4.538e-05, |
| "loss": 0.0097, |
| "step": 2270 |
| }, |
| { |
| "epoch": 54.285714285714285, |
| "grad_norm": 0.2572666108608246, |
| "learning_rate": 4.558e-05, |
| "loss": 0.0095, |
| "step": 2280 |
| }, |
| { |
| "epoch": 54.523809523809526, |
| "grad_norm": 0.15771296620368958, |
| "learning_rate": 4.578e-05, |
| "loss": 0.0093, |
| "step": 2290 |
| }, |
| { |
| "epoch": 54.76190476190476, |
| "grad_norm": 0.16032639145851135, |
| "learning_rate": 4.5980000000000004e-05, |
| "loss": 0.0095, |
| "step": 2300 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.2077546864748001, |
| "learning_rate": 4.618e-05, |
| "loss": 0.0094, |
| "step": 2310 |
| }, |
| { |
| "epoch": 55.23809523809524, |
| "grad_norm": 0.2246176302433014, |
| "learning_rate": 4.638e-05, |
| "loss": 0.0108, |
| "step": 2320 |
| }, |
| { |
| "epoch": 55.476190476190474, |
| "grad_norm": 0.1786874681711197, |
| "learning_rate": 4.6580000000000005e-05, |
| "loss": 0.0091, |
| "step": 2330 |
| }, |
| { |
| "epoch": 55.714285714285715, |
| "grad_norm": 0.18008869886398315, |
| "learning_rate": 4.678e-05, |
| "loss": 0.0106, |
| "step": 2340 |
| }, |
| { |
| "epoch": 55.95238095238095, |
| "grad_norm": 0.17840005457401276, |
| "learning_rate": 4.698e-05, |
| "loss": 0.0087, |
| "step": 2350 |
| }, |
| { |
| "epoch": 56.19047619047619, |
| "grad_norm": 0.2124982625246048, |
| "learning_rate": 4.718e-05, |
| "loss": 0.0089, |
| "step": 2360 |
| }, |
| { |
| "epoch": 56.42857142857143, |
| "grad_norm": 0.21830888092517853, |
| "learning_rate": 4.7380000000000004e-05, |
| "loss": 0.0093, |
| "step": 2370 |
| }, |
| { |
| "epoch": 56.666666666666664, |
| "grad_norm": 0.16683553159236908, |
| "learning_rate": 4.758e-05, |
| "loss": 0.0084, |
| "step": 2380 |
| }, |
| { |
| "epoch": 56.904761904761905, |
| "grad_norm": 0.12809693813323975, |
| "learning_rate": 4.778e-05, |
| "loss": 0.0082, |
| "step": 2390 |
| }, |
| { |
| "epoch": 57.142857142857146, |
| "grad_norm": 0.1644316464662552, |
| "learning_rate": 4.7980000000000005e-05, |
| "loss": 0.0094, |
| "step": 2400 |
| }, |
| { |
| "epoch": 57.38095238095238, |
| "grad_norm": 0.17521336674690247, |
| "learning_rate": 4.818e-05, |
| "loss": 0.0088, |
| "step": 2410 |
| }, |
| { |
| "epoch": 57.61904761904762, |
| "grad_norm": 0.19145992398262024, |
| "learning_rate": 4.838e-05, |
| "loss": 0.0086, |
| "step": 2420 |
| }, |
| { |
| "epoch": 57.857142857142854, |
| "grad_norm": 0.20108817517757416, |
| "learning_rate": 4.8580000000000006e-05, |
| "loss": 0.0091, |
| "step": 2430 |
| }, |
| { |
| "epoch": 58.095238095238095, |
| "grad_norm": 0.1912873089313507, |
| "learning_rate": 4.8780000000000004e-05, |
| "loss": 0.0095, |
| "step": 2440 |
| }, |
| { |
| "epoch": 58.333333333333336, |
| "grad_norm": 0.22434067726135254, |
| "learning_rate": 4.898e-05, |
| "loss": 0.0098, |
| "step": 2450 |
| }, |
| { |
| "epoch": 58.57142857142857, |
| "grad_norm": 0.2043454349040985, |
| "learning_rate": 4.918000000000001e-05, |
| "loss": 0.0082, |
| "step": 2460 |
| }, |
| { |
| "epoch": 58.80952380952381, |
| "grad_norm": 0.20794729888439178, |
| "learning_rate": 4.9380000000000005e-05, |
| "loss": 0.0099, |
| "step": 2470 |
| }, |
| { |
| "epoch": 59.04761904761905, |
| "grad_norm": 0.20118959248065948, |
| "learning_rate": 4.958e-05, |
| "loss": 0.01, |
| "step": 2480 |
| }, |
| { |
| "epoch": 59.285714285714285, |
| "grad_norm": 0.20195062458515167, |
| "learning_rate": 4.978e-05, |
| "loss": 0.0095, |
| "step": 2490 |
| }, |
| { |
| "epoch": 59.523809523809526, |
| "grad_norm": 0.2070167064666748, |
| "learning_rate": 4.9980000000000006e-05, |
| "loss": 0.0083, |
| "step": 2500 |
| }, |
| { |
| "epoch": 59.76190476190476, |
| "grad_norm": 0.16778701543807983, |
| "learning_rate": 5.0180000000000004e-05, |
| "loss": 0.0085, |
| "step": 2510 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.20277884602546692, |
| "learning_rate": 5.038e-05, |
| "loss": 0.0087, |
| "step": 2520 |
| }, |
| { |
| "epoch": 60.23809523809524, |
| "grad_norm": 0.2618632912635803, |
| "learning_rate": 5.058000000000001e-05, |
| "loss": 0.0093, |
| "step": 2530 |
| }, |
| { |
| "epoch": 60.476190476190474, |
| "grad_norm": 0.21601352095603943, |
| "learning_rate": 5.0780000000000005e-05, |
| "loss": 0.0077, |
| "step": 2540 |
| }, |
| { |
| "epoch": 60.714285714285715, |
| "grad_norm": 0.27718445658683777, |
| "learning_rate": 5.098e-05, |
| "loss": 0.0089, |
| "step": 2550 |
| }, |
| { |
| "epoch": 60.95238095238095, |
| "grad_norm": 0.22408516705036163, |
| "learning_rate": 5.118000000000001e-05, |
| "loss": 0.0087, |
| "step": 2560 |
| }, |
| { |
| "epoch": 61.19047619047619, |
| "grad_norm": 0.1639304906129837, |
| "learning_rate": 5.1380000000000006e-05, |
| "loss": 0.0088, |
| "step": 2570 |
| }, |
| { |
| "epoch": 61.42857142857143, |
| "grad_norm": 0.16470015048980713, |
| "learning_rate": 5.1580000000000004e-05, |
| "loss": 0.0083, |
| "step": 2580 |
| }, |
| { |
| "epoch": 61.666666666666664, |
| "grad_norm": 0.1954708844423294, |
| "learning_rate": 5.178000000000001e-05, |
| "loss": 0.0088, |
| "step": 2590 |
| }, |
| { |
| "epoch": 61.904761904761905, |
| "grad_norm": 0.21078795194625854, |
| "learning_rate": 5.198000000000001e-05, |
| "loss": 0.0086, |
| "step": 2600 |
| }, |
| { |
| "epoch": 62.142857142857146, |
| "grad_norm": 0.2233748435974121, |
| "learning_rate": 5.2180000000000005e-05, |
| "loss": 0.0078, |
| "step": 2610 |
| }, |
| { |
| "epoch": 62.38095238095238, |
| "grad_norm": 0.17456229031085968, |
| "learning_rate": 5.238000000000001e-05, |
| "loss": 0.0087, |
| "step": 2620 |
| }, |
| { |
| "epoch": 62.61904761904762, |
| "grad_norm": 0.2898193299770355, |
| "learning_rate": 5.258000000000001e-05, |
| "loss": 0.0092, |
| "step": 2630 |
| }, |
| { |
| "epoch": 62.857142857142854, |
| "grad_norm": 0.2961515486240387, |
| "learning_rate": 5.2780000000000006e-05, |
| "loss": 0.0093, |
| "step": 2640 |
| }, |
| { |
| "epoch": 63.095238095238095, |
| "grad_norm": 0.17851883172988892, |
| "learning_rate": 5.2980000000000004e-05, |
| "loss": 0.0082, |
| "step": 2650 |
| }, |
| { |
| "epoch": 63.333333333333336, |
| "grad_norm": 0.19875399768352509, |
| "learning_rate": 5.318000000000001e-05, |
| "loss": 0.0093, |
| "step": 2660 |
| }, |
| { |
| "epoch": 63.57142857142857, |
| "grad_norm": 0.27968868613243103, |
| "learning_rate": 5.338000000000001e-05, |
| "loss": 0.0085, |
| "step": 2670 |
| }, |
| { |
| "epoch": 63.80952380952381, |
| "grad_norm": 0.23410305380821228, |
| "learning_rate": 5.3580000000000005e-05, |
| "loss": 0.0089, |
| "step": 2680 |
| }, |
| { |
| "epoch": 64.04761904761905, |
| "grad_norm": 0.24374030530452728, |
| "learning_rate": 5.378e-05, |
| "loss": 0.0087, |
| "step": 2690 |
| }, |
| { |
| "epoch": 64.28571428571429, |
| "grad_norm": 0.13225769996643066, |
| "learning_rate": 5.3979999999999995e-05, |
| "loss": 0.0078, |
| "step": 2700 |
| }, |
| { |
| "epoch": 64.52380952380952, |
| "grad_norm": 0.14868958294391632, |
| "learning_rate": 5.418e-05, |
| "loss": 0.0082, |
| "step": 2710 |
| }, |
| { |
| "epoch": 64.76190476190476, |
| "grad_norm": 0.2185503989458084, |
| "learning_rate": 5.438e-05, |
| "loss": 0.0087, |
| "step": 2720 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.1768341064453125, |
| "learning_rate": 5.4579999999999996e-05, |
| "loss": 0.0083, |
| "step": 2730 |
| }, |
| { |
| "epoch": 65.23809523809524, |
| "grad_norm": 0.15162885189056396, |
| "learning_rate": 5.478e-05, |
| "loss": 0.0081, |
| "step": 2740 |
| }, |
| { |
| "epoch": 65.47619047619048, |
| "grad_norm": 0.270111620426178, |
| "learning_rate": 5.498e-05, |
| "loss": 0.0091, |
| "step": 2750 |
| }, |
| { |
| "epoch": 65.71428571428571, |
| "grad_norm": 0.2208758145570755, |
| "learning_rate": 5.518e-05, |
| "loss": 0.0081, |
| "step": 2760 |
| }, |
| { |
| "epoch": 65.95238095238095, |
| "grad_norm": 0.24379397928714752, |
| "learning_rate": 5.538e-05, |
| "loss": 0.0084, |
| "step": 2770 |
| }, |
| { |
| "epoch": 66.19047619047619, |
| "grad_norm": 0.19786155223846436, |
| "learning_rate": 5.558e-05, |
| "loss": 0.0085, |
| "step": 2780 |
| }, |
| { |
| "epoch": 66.42857142857143, |
| "grad_norm": 0.28605917096138, |
| "learning_rate": 5.578e-05, |
| "loss": 0.0083, |
| "step": 2790 |
| }, |
| { |
| "epoch": 66.66666666666667, |
| "grad_norm": 0.29404252767562866, |
| "learning_rate": 5.5979999999999996e-05, |
| "loss": 0.0092, |
| "step": 2800 |
| }, |
| { |
| "epoch": 66.9047619047619, |
| "grad_norm": 0.2808834910392761, |
| "learning_rate": 5.618e-05, |
| "loss": 0.0094, |
| "step": 2810 |
| }, |
| { |
| "epoch": 67.14285714285714, |
| "grad_norm": 0.18105798959732056, |
| "learning_rate": 5.638e-05, |
| "loss": 0.0078, |
| "step": 2820 |
| }, |
| { |
| "epoch": 67.38095238095238, |
| "grad_norm": 0.2150157243013382, |
| "learning_rate": 5.658e-05, |
| "loss": 0.0077, |
| "step": 2830 |
| }, |
| { |
| "epoch": 67.61904761904762, |
| "grad_norm": 0.1953282207250595, |
| "learning_rate": 5.678e-05, |
| "loss": 0.0089, |
| "step": 2840 |
| }, |
| { |
| "epoch": 67.85714285714286, |
| "grad_norm": 0.2190709114074707, |
| "learning_rate": 5.698e-05, |
| "loss": 0.0088, |
| "step": 2850 |
| }, |
| { |
| "epoch": 68.0952380952381, |
| "grad_norm": 0.22195559740066528, |
| "learning_rate": 5.718e-05, |
| "loss": 0.0079, |
| "step": 2860 |
| }, |
| { |
| "epoch": 68.33333333333333, |
| "grad_norm": 0.15911747515201569, |
| "learning_rate": 5.738e-05, |
| "loss": 0.008, |
| "step": 2870 |
| }, |
| { |
| "epoch": 68.57142857142857, |
| "grad_norm": 0.16862092912197113, |
| "learning_rate": 5.758e-05, |
| "loss": 0.0082, |
| "step": 2880 |
| }, |
| { |
| "epoch": 68.80952380952381, |
| "grad_norm": 0.23898084461688995, |
| "learning_rate": 5.778e-05, |
| "loss": 0.0096, |
| "step": 2890 |
| }, |
| { |
| "epoch": 69.04761904761905, |
| "grad_norm": 0.2653178870677948, |
| "learning_rate": 5.7980000000000004e-05, |
| "loss": 0.0086, |
| "step": 2900 |
| }, |
| { |
| "epoch": 69.28571428571429, |
| "grad_norm": 0.277632474899292, |
| "learning_rate": 5.818e-05, |
| "loss": 0.0103, |
| "step": 2910 |
| }, |
| { |
| "epoch": 69.52380952380952, |
| "grad_norm": 0.1783531755208969, |
| "learning_rate": 5.838e-05, |
| "loss": 0.008, |
| "step": 2920 |
| }, |
| { |
| "epoch": 69.76190476190476, |
| "grad_norm": 0.2558484375476837, |
| "learning_rate": 5.858e-05, |
| "loss": 0.0086, |
| "step": 2930 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 0.288993239402771, |
| "learning_rate": 5.878e-05, |
| "loss": 0.0089, |
| "step": 2940 |
| }, |
| { |
| "epoch": 70.23809523809524, |
| "grad_norm": 0.12619392573833466, |
| "learning_rate": 5.898e-05, |
| "loss": 0.008, |
| "step": 2950 |
| }, |
| { |
| "epoch": 70.47619047619048, |
| "grad_norm": 0.27174708247184753, |
| "learning_rate": 5.918e-05, |
| "loss": 0.009, |
| "step": 2960 |
| }, |
| { |
| "epoch": 70.71428571428571, |
| "grad_norm": 0.1787755787372589, |
| "learning_rate": 5.9380000000000004e-05, |
| "loss": 0.0087, |
| "step": 2970 |
| }, |
| { |
| "epoch": 70.95238095238095, |
| "grad_norm": 0.1610296368598938, |
| "learning_rate": 5.958e-05, |
| "loss": 0.0083, |
| "step": 2980 |
| }, |
| { |
| "epoch": 71.19047619047619, |
| "grad_norm": 0.1545838713645935, |
| "learning_rate": 5.978e-05, |
| "loss": 0.0097, |
| "step": 2990 |
| }, |
| { |
| "epoch": 71.42857142857143, |
| "grad_norm": 0.22549951076507568, |
| "learning_rate": 5.9980000000000005e-05, |
| "loss": 0.0085, |
| "step": 3000 |
| }, |
| { |
| "epoch": 71.66666666666667, |
| "grad_norm": 0.20115163922309875, |
| "learning_rate": 6.018e-05, |
| "loss": 0.0083, |
| "step": 3010 |
| }, |
| { |
| "epoch": 71.9047619047619, |
| "grad_norm": 0.1446419358253479, |
| "learning_rate": 6.038e-05, |
| "loss": 0.0077, |
| "step": 3020 |
| }, |
| { |
| "epoch": 72.14285714285714, |
| "grad_norm": 0.1421142816543579, |
| "learning_rate": 6.0580000000000006e-05, |
| "loss": 0.0078, |
| "step": 3030 |
| }, |
| { |
| "epoch": 72.38095238095238, |
| "grad_norm": 0.19602350890636444, |
| "learning_rate": 6.0780000000000004e-05, |
| "loss": 0.0079, |
| "step": 3040 |
| }, |
| { |
| "epoch": 72.61904761904762, |
| "grad_norm": 0.12963727116584778, |
| "learning_rate": 6.098e-05, |
| "loss": 0.008, |
| "step": 3050 |
| }, |
| { |
| "epoch": 72.85714285714286, |
| "grad_norm": 0.20508438348770142, |
| "learning_rate": 6.118000000000001e-05, |
| "loss": 0.0085, |
| "step": 3060 |
| }, |
| { |
| "epoch": 73.0952380952381, |
| "grad_norm": 0.19156931340694427, |
| "learning_rate": 6.138e-05, |
| "loss": 0.0078, |
| "step": 3070 |
| }, |
| { |
| "epoch": 73.33333333333333, |
| "grad_norm": 0.18398821353912354, |
| "learning_rate": 6.158e-05, |
| "loss": 0.008, |
| "step": 3080 |
| }, |
| { |
| "epoch": 73.57142857142857, |
| "grad_norm": 0.15092571079730988, |
| "learning_rate": 6.178000000000001e-05, |
| "loss": 0.008, |
| "step": 3090 |
| }, |
| { |
| "epoch": 73.80952380952381, |
| "grad_norm": 0.16772013902664185, |
| "learning_rate": 6.198e-05, |
| "loss": 0.0072, |
| "step": 3100 |
| }, |
| { |
| "epoch": 74.04761904761905, |
| "grad_norm": 0.15763218700885773, |
| "learning_rate": 6.218e-05, |
| "loss": 0.0073, |
| "step": 3110 |
| }, |
| { |
| "epoch": 74.28571428571429, |
| "grad_norm": 0.14704430103302002, |
| "learning_rate": 6.238000000000001e-05, |
| "loss": 0.0072, |
| "step": 3120 |
| }, |
| { |
| "epoch": 74.52380952380952, |
| "grad_norm": 0.1548171490430832, |
| "learning_rate": 6.258e-05, |
| "loss": 0.0079, |
| "step": 3130 |
| }, |
| { |
| "epoch": 74.76190476190476, |
| "grad_norm": 0.20991156995296478, |
| "learning_rate": 6.278e-05, |
| "loss": 0.0078, |
| "step": 3140 |
| }, |
| { |
| "epoch": 75.0, |
| "grad_norm": 0.1863728016614914, |
| "learning_rate": 6.298000000000001e-05, |
| "loss": 0.0076, |
| "step": 3150 |
| }, |
| { |
| "epoch": 75.23809523809524, |
| "grad_norm": 0.19600482285022736, |
| "learning_rate": 6.318e-05, |
| "loss": 0.007, |
| "step": 3160 |
| }, |
| { |
| "epoch": 75.47619047619048, |
| "grad_norm": 0.16242371499538422, |
| "learning_rate": 6.338e-05, |
| "loss": 0.0073, |
| "step": 3170 |
| }, |
| { |
| "epoch": 75.71428571428571, |
| "grad_norm": 0.20527952909469604, |
| "learning_rate": 6.358000000000001e-05, |
| "loss": 0.0085, |
| "step": 3180 |
| }, |
| { |
| "epoch": 75.95238095238095, |
| "grad_norm": 0.2159862071275711, |
| "learning_rate": 6.378e-05, |
| "loss": 0.0081, |
| "step": 3190 |
| }, |
| { |
| "epoch": 76.19047619047619, |
| "grad_norm": 0.18620286881923676, |
| "learning_rate": 6.398000000000001e-05, |
| "loss": 0.0082, |
| "step": 3200 |
| }, |
| { |
| "epoch": 76.42857142857143, |
| "grad_norm": 0.11087871342897415, |
| "learning_rate": 6.418000000000001e-05, |
| "loss": 0.0074, |
| "step": 3210 |
| }, |
| { |
| "epoch": 76.66666666666667, |
| "grad_norm": 0.17623348534107208, |
| "learning_rate": 6.438e-05, |
| "loss": 0.0078, |
| "step": 3220 |
| }, |
| { |
| "epoch": 76.9047619047619, |
| "grad_norm": 0.15830525755882263, |
| "learning_rate": 6.458000000000001e-05, |
| "loss": 0.0071, |
| "step": 3230 |
| }, |
| { |
| "epoch": 77.14285714285714, |
| "grad_norm": 0.2139369249343872, |
| "learning_rate": 6.478000000000001e-05, |
| "loss": 0.0079, |
| "step": 3240 |
| }, |
| { |
| "epoch": 77.38095238095238, |
| "grad_norm": 0.1451287716627121, |
| "learning_rate": 6.498e-05, |
| "loss": 0.0077, |
| "step": 3250 |
| }, |
| { |
| "epoch": 77.61904761904762, |
| "grad_norm": 0.20386913418769836, |
| "learning_rate": 6.518000000000001e-05, |
| "loss": 0.0076, |
| "step": 3260 |
| }, |
| { |
| "epoch": 77.85714285714286, |
| "grad_norm": 0.20200611650943756, |
| "learning_rate": 6.538000000000001e-05, |
| "loss": 0.0078, |
| "step": 3270 |
| }, |
| { |
| "epoch": 78.0952380952381, |
| "grad_norm": 0.17592903971672058, |
| "learning_rate": 6.558e-05, |
| "loss": 0.0077, |
| "step": 3280 |
| }, |
| { |
| "epoch": 78.33333333333333, |
| "grad_norm": 0.1737641990184784, |
| "learning_rate": 6.578000000000001e-05, |
| "loss": 0.0071, |
| "step": 3290 |
| }, |
| { |
| "epoch": 78.57142857142857, |
| "grad_norm": 0.17545191943645477, |
| "learning_rate": 6.598e-05, |
| "loss": 0.0074, |
| "step": 3300 |
| }, |
| { |
| "epoch": 78.80952380952381, |
| "grad_norm": 0.17335550487041473, |
| "learning_rate": 6.618e-05, |
| "loss": 0.0073, |
| "step": 3310 |
| }, |
| { |
| "epoch": 79.04761904761905, |
| "grad_norm": 0.17565906047821045, |
| "learning_rate": 6.638e-05, |
| "loss": 0.0074, |
| "step": 3320 |
| }, |
| { |
| "epoch": 79.28571428571429, |
| "grad_norm": 0.1891113817691803, |
| "learning_rate": 6.658e-05, |
| "loss": 0.0085, |
| "step": 3330 |
| }, |
| { |
| "epoch": 79.52380952380952, |
| "grad_norm": 0.2000664621591568, |
| "learning_rate": 6.678e-05, |
| "loss": 0.0079, |
| "step": 3340 |
| }, |
| { |
| "epoch": 79.76190476190476, |
| "grad_norm": 0.25789329409599304, |
| "learning_rate": 6.698e-05, |
| "loss": 0.0075, |
| "step": 3350 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 0.21093080937862396, |
| "learning_rate": 6.718e-05, |
| "loss": 0.0085, |
| "step": 3360 |
| }, |
| { |
| "epoch": 80.23809523809524, |
| "grad_norm": 0.21562764048576355, |
| "learning_rate": 6.738e-05, |
| "loss": 0.0078, |
| "step": 3370 |
| }, |
| { |
| "epoch": 80.47619047619048, |
| "grad_norm": 0.12749473750591278, |
| "learning_rate": 6.758e-05, |
| "loss": 0.0073, |
| "step": 3380 |
| }, |
| { |
| "epoch": 80.71428571428571, |
| "grad_norm": 0.16423152387142181, |
| "learning_rate": 6.778e-05, |
| "loss": 0.008, |
| "step": 3390 |
| }, |
| { |
| "epoch": 80.95238095238095, |
| "grad_norm": 0.19509761035442352, |
| "learning_rate": 6.798e-05, |
| "loss": 0.0081, |
| "step": 3400 |
| }, |
| { |
| "epoch": 81.19047619047619, |
| "grad_norm": 0.13191723823547363, |
| "learning_rate": 6.818e-05, |
| "loss": 0.0069, |
| "step": 3410 |
| }, |
| { |
| "epoch": 81.42857142857143, |
| "grad_norm": 0.18503569066524506, |
| "learning_rate": 6.838e-05, |
| "loss": 0.0081, |
| "step": 3420 |
| }, |
| { |
| "epoch": 81.66666666666667, |
| "grad_norm": 0.1624750792980194, |
| "learning_rate": 6.858e-05, |
| "loss": 0.0075, |
| "step": 3430 |
| }, |
| { |
| "epoch": 81.9047619047619, |
| "grad_norm": 0.1917305588722229, |
| "learning_rate": 6.878e-05, |
| "loss": 0.0085, |
| "step": 3440 |
| }, |
| { |
| "epoch": 82.14285714285714, |
| "grad_norm": 0.12193186581134796, |
| "learning_rate": 6.898e-05, |
| "loss": 0.0063, |
| "step": 3450 |
| }, |
| { |
| "epoch": 82.38095238095238, |
| "grad_norm": 0.1703914999961853, |
| "learning_rate": 6.918e-05, |
| "loss": 0.0075, |
| "step": 3460 |
| }, |
| { |
| "epoch": 82.61904761904762, |
| "grad_norm": 0.163728728890419, |
| "learning_rate": 6.938e-05, |
| "loss": 0.0072, |
| "step": 3470 |
| }, |
| { |
| "epoch": 82.85714285714286, |
| "grad_norm": 0.16221626102924347, |
| "learning_rate": 6.958e-05, |
| "loss": 0.0071, |
| "step": 3480 |
| }, |
| { |
| "epoch": 83.0952380952381, |
| "grad_norm": 0.16557961702346802, |
| "learning_rate": 6.978e-05, |
| "loss": 0.0067, |
| "step": 3490 |
| }, |
| { |
| "epoch": 83.33333333333333, |
| "grad_norm": 0.18963250517845154, |
| "learning_rate": 6.998e-05, |
| "loss": 0.0072, |
| "step": 3500 |
| }, |
| { |
| "epoch": 83.57142857142857, |
| "grad_norm": 0.17970404028892517, |
| "learning_rate": 7.018e-05, |
| "loss": 0.0071, |
| "step": 3510 |
| }, |
| { |
| "epoch": 83.80952380952381, |
| "grad_norm": 0.28654032945632935, |
| "learning_rate": 7.038e-05, |
| "loss": 0.0066, |
| "step": 3520 |
| }, |
| { |
| "epoch": 84.04761904761905, |
| "grad_norm": 0.2622557580471039, |
| "learning_rate": 7.058e-05, |
| "loss": 0.0073, |
| "step": 3530 |
| }, |
| { |
| "epoch": 84.28571428571429, |
| "grad_norm": 0.1843796670436859, |
| "learning_rate": 7.078e-05, |
| "loss": 0.0068, |
| "step": 3540 |
| }, |
| { |
| "epoch": 84.52380952380952, |
| "grad_norm": 0.1925242692232132, |
| "learning_rate": 7.098e-05, |
| "loss": 0.0075, |
| "step": 3550 |
| }, |
| { |
| "epoch": 84.76190476190476, |
| "grad_norm": 0.1450975388288498, |
| "learning_rate": 7.118e-05, |
| "loss": 0.0079, |
| "step": 3560 |
| }, |
| { |
| "epoch": 85.0, |
| "grad_norm": 0.15161316096782684, |
| "learning_rate": 7.138e-05, |
| "loss": 0.007, |
| "step": 3570 |
| }, |
| { |
| "epoch": 85.23809523809524, |
| "grad_norm": 0.13440363109111786, |
| "learning_rate": 7.158e-05, |
| "loss": 0.0076, |
| "step": 3580 |
| }, |
| { |
| "epoch": 85.47619047619048, |
| "grad_norm": 0.1517293006181717, |
| "learning_rate": 7.178000000000001e-05, |
| "loss": 0.0065, |
| "step": 3590 |
| }, |
| { |
| "epoch": 85.71428571428571, |
| "grad_norm": 0.17097093164920807, |
| "learning_rate": 7.198e-05, |
| "loss": 0.007, |
| "step": 3600 |
| }, |
| { |
| "epoch": 85.95238095238095, |
| "grad_norm": 0.18135547637939453, |
| "learning_rate": 7.218e-05, |
| "loss": 0.0066, |
| "step": 3610 |
| }, |
| { |
| "epoch": 86.19047619047619, |
| "grad_norm": 0.1623767912387848, |
| "learning_rate": 7.238000000000001e-05, |
| "loss": 0.0068, |
| "step": 3620 |
| }, |
| { |
| "epoch": 86.42857142857143, |
| "grad_norm": 0.17980942130088806, |
| "learning_rate": 7.258e-05, |
| "loss": 0.0072, |
| "step": 3630 |
| }, |
| { |
| "epoch": 86.66666666666667, |
| "grad_norm": 0.1973033845424652, |
| "learning_rate": 7.278e-05, |
| "loss": 0.0075, |
| "step": 3640 |
| }, |
| { |
| "epoch": 86.9047619047619, |
| "grad_norm": 0.14353333413600922, |
| "learning_rate": 7.298000000000001e-05, |
| "loss": 0.0067, |
| "step": 3650 |
| }, |
| { |
| "epoch": 87.14285714285714, |
| "grad_norm": 0.17345847189426422, |
| "learning_rate": 7.318e-05, |
| "loss": 0.0071, |
| "step": 3660 |
| }, |
| { |
| "epoch": 87.38095238095238, |
| "grad_norm": 0.1442338228225708, |
| "learning_rate": 7.338e-05, |
| "loss": 0.0069, |
| "step": 3670 |
| }, |
| { |
| "epoch": 87.61904761904762, |
| "grad_norm": 0.15484997630119324, |
| "learning_rate": 7.358000000000001e-05, |
| "loss": 0.0068, |
| "step": 3680 |
| }, |
| { |
| "epoch": 87.85714285714286, |
| "grad_norm": 0.13868385553359985, |
| "learning_rate": 7.378e-05, |
| "loss": 0.0071, |
| "step": 3690 |
| }, |
| { |
| "epoch": 88.0952380952381, |
| "grad_norm": 0.14823536574840546, |
| "learning_rate": 7.398e-05, |
| "loss": 0.0068, |
| "step": 3700 |
| }, |
| { |
| "epoch": 88.33333333333333, |
| "grad_norm": 0.16034354269504547, |
| "learning_rate": 7.418000000000001e-05, |
| "loss": 0.0067, |
| "step": 3710 |
| }, |
| { |
| "epoch": 88.57142857142857, |
| "grad_norm": 0.15171010792255402, |
| "learning_rate": 7.438e-05, |
| "loss": 0.0068, |
| "step": 3720 |
| }, |
| { |
| "epoch": 88.80952380952381, |
| "grad_norm": 0.15789009630680084, |
| "learning_rate": 7.458000000000001e-05, |
| "loss": 0.0075, |
| "step": 3730 |
| }, |
| { |
| "epoch": 89.04761904761905, |
| "grad_norm": 0.12390757352113724, |
| "learning_rate": 7.478e-05, |
| "loss": 0.007, |
| "step": 3740 |
| }, |
| { |
| "epoch": 89.28571428571429, |
| "grad_norm": 0.12417872250080109, |
| "learning_rate": 7.498e-05, |
| "loss": 0.0068, |
| "step": 3750 |
| }, |
| { |
| "epoch": 89.52380952380952, |
| "grad_norm": 0.15850147604942322, |
| "learning_rate": 7.518000000000001e-05, |
| "loss": 0.0072, |
| "step": 3760 |
| }, |
| { |
| "epoch": 89.76190476190476, |
| "grad_norm": 0.15888957679271698, |
| "learning_rate": 7.538e-05, |
| "loss": 0.0084, |
| "step": 3770 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 0.16546015441417694, |
| "learning_rate": 7.558e-05, |
| "loss": 0.0081, |
| "step": 3780 |
| }, |
| { |
| "epoch": 90.23809523809524, |
| "grad_norm": 0.1372298002243042, |
| "learning_rate": 7.578000000000001e-05, |
| "loss": 0.0065, |
| "step": 3790 |
| }, |
| { |
| "epoch": 90.47619047619048, |
| "grad_norm": 0.1882469207048416, |
| "learning_rate": 7.598e-05, |
| "loss": 0.0075, |
| "step": 3800 |
| }, |
| { |
| "epoch": 90.71428571428571, |
| "grad_norm": 0.13481615483760834, |
| "learning_rate": 7.618e-05, |
| "loss": 0.007, |
| "step": 3810 |
| }, |
| { |
| "epoch": 90.95238095238095, |
| "grad_norm": 0.21587280929088593, |
| "learning_rate": 7.638000000000001e-05, |
| "loss": 0.0074, |
| "step": 3820 |
| }, |
| { |
| "epoch": 91.19047619047619, |
| "grad_norm": 0.17822697758674622, |
| "learning_rate": 7.658e-05, |
| "loss": 0.0069, |
| "step": 3830 |
| }, |
| { |
| "epoch": 91.42857142857143, |
| "grad_norm": 0.1890319585800171, |
| "learning_rate": 7.678000000000001e-05, |
| "loss": 0.0073, |
| "step": 3840 |
| }, |
| { |
| "epoch": 91.66666666666667, |
| "grad_norm": 0.15310367941856384, |
| "learning_rate": 7.698000000000001e-05, |
| "loss": 0.0079, |
| "step": 3850 |
| }, |
| { |
| "epoch": 91.9047619047619, |
| "grad_norm": 0.24771210551261902, |
| "learning_rate": 7.718e-05, |
| "loss": 0.0076, |
| "step": 3860 |
| }, |
| { |
| "epoch": 92.14285714285714, |
| "grad_norm": 0.17208373546600342, |
| "learning_rate": 7.738000000000001e-05, |
| "loss": 0.0067, |
| "step": 3870 |
| }, |
| { |
| "epoch": 92.38095238095238, |
| "grad_norm": 0.18385735154151917, |
| "learning_rate": 7.758000000000001e-05, |
| "loss": 0.0078, |
| "step": 3880 |
| }, |
| { |
| "epoch": 92.61904761904762, |
| "grad_norm": 0.20605699717998505, |
| "learning_rate": 7.778e-05, |
| "loss": 0.0076, |
| "step": 3890 |
| }, |
| { |
| "epoch": 92.85714285714286, |
| "grad_norm": 0.15454132854938507, |
| "learning_rate": 7.798000000000001e-05, |
| "loss": 0.0073, |
| "step": 3900 |
| }, |
| { |
| "epoch": 93.0952380952381, |
| "grad_norm": 0.15797552466392517, |
| "learning_rate": 7.818000000000001e-05, |
| "loss": 0.0064, |
| "step": 3910 |
| }, |
| { |
| "epoch": 93.33333333333333, |
| "grad_norm": 0.1377422958612442, |
| "learning_rate": 7.838e-05, |
| "loss": 0.0065, |
| "step": 3920 |
| }, |
| { |
| "epoch": 93.57142857142857, |
| "grad_norm": 0.244019016623497, |
| "learning_rate": 7.858000000000001e-05, |
| "loss": 0.0067, |
| "step": 3930 |
| }, |
| { |
| "epoch": 93.80952380952381, |
| "grad_norm": 0.14223751425743103, |
| "learning_rate": 7.878e-05, |
| "loss": 0.0071, |
| "step": 3940 |
| }, |
| { |
| "epoch": 94.04761904761905, |
| "grad_norm": 0.20977427065372467, |
| "learning_rate": 7.897999999999999e-05, |
| "loss": 0.0077, |
| "step": 3950 |
| }, |
| { |
| "epoch": 94.28571428571429, |
| "grad_norm": 0.16180303692817688, |
| "learning_rate": 7.918e-05, |
| "loss": 0.0067, |
| "step": 3960 |
| }, |
| { |
| "epoch": 94.52380952380952, |
| "grad_norm": 0.15075701475143433, |
| "learning_rate": 7.938e-05, |
| "loss": 0.0074, |
| "step": 3970 |
| }, |
| { |
| "epoch": 94.76190476190476, |
| "grad_norm": 0.22439907491207123, |
| "learning_rate": 7.958e-05, |
| "loss": 0.0071, |
| "step": 3980 |
| }, |
| { |
| "epoch": 95.0, |
| "grad_norm": 0.18078812956809998, |
| "learning_rate": 7.978e-05, |
| "loss": 0.0067, |
| "step": 3990 |
| }, |
| { |
| "epoch": 95.23809523809524, |
| "grad_norm": 0.21833130717277527, |
| "learning_rate": 7.998e-05, |
| "loss": 0.0074, |
| "step": 4000 |
| }, |
| { |
| "epoch": 95.47619047619048, |
| "grad_norm": 0.16880813241004944, |
| "learning_rate": 8.018e-05, |
| "loss": 0.0065, |
| "step": 4010 |
| }, |
| { |
| "epoch": 95.71428571428571, |
| "grad_norm": 0.21530959010124207, |
| "learning_rate": 8.038e-05, |
| "loss": 0.0068, |
| "step": 4020 |
| }, |
| { |
| "epoch": 95.95238095238095, |
| "grad_norm": 0.14860278367996216, |
| "learning_rate": 8.058e-05, |
| "loss": 0.0068, |
| "step": 4030 |
| }, |
| { |
| "epoch": 96.19047619047619, |
| "grad_norm": 0.1412385106086731, |
| "learning_rate": 8.078e-05, |
| "loss": 0.0066, |
| "step": 4040 |
| }, |
| { |
| "epoch": 96.42857142857143, |
| "grad_norm": 0.18202956020832062, |
| "learning_rate": 8.098e-05, |
| "loss": 0.0074, |
| "step": 4050 |
| }, |
| { |
| "epoch": 96.66666666666667, |
| "grad_norm": 0.1721826195716858, |
| "learning_rate": 8.118e-05, |
| "loss": 0.0065, |
| "step": 4060 |
| }, |
| { |
| "epoch": 96.9047619047619, |
| "grad_norm": 0.1281909942626953, |
| "learning_rate": 8.138e-05, |
| "loss": 0.0075, |
| "step": 4070 |
| }, |
| { |
| "epoch": 97.14285714285714, |
| "grad_norm": 0.21998120844364166, |
| "learning_rate": 8.158e-05, |
| "loss": 0.0077, |
| "step": 4080 |
| }, |
| { |
| "epoch": 97.38095238095238, |
| "grad_norm": 0.1478026658296585, |
| "learning_rate": 8.178e-05, |
| "loss": 0.0069, |
| "step": 4090 |
| }, |
| { |
| "epoch": 97.61904761904762, |
| "grad_norm": 0.16635026037693024, |
| "learning_rate": 8.198e-05, |
| "loss": 0.007, |
| "step": 4100 |
| }, |
| { |
| "epoch": 97.85714285714286, |
| "grad_norm": 0.15864118933677673, |
| "learning_rate": 8.218e-05, |
| "loss": 0.0066, |
| "step": 4110 |
| }, |
| { |
| "epoch": 98.0952380952381, |
| "grad_norm": 0.18102677166461945, |
| "learning_rate": 8.238000000000001e-05, |
| "loss": 0.007, |
| "step": 4120 |
| }, |
| { |
| "epoch": 98.33333333333333, |
| "grad_norm": 0.15162087976932526, |
| "learning_rate": 8.258e-05, |
| "loss": 0.0065, |
| "step": 4130 |
| }, |
| { |
| "epoch": 98.57142857142857, |
| "grad_norm": 0.1276671290397644, |
| "learning_rate": 8.278e-05, |
| "loss": 0.0064, |
| "step": 4140 |
| }, |
| { |
| "epoch": 98.80952380952381, |
| "grad_norm": 0.16977721452713013, |
| "learning_rate": 8.298000000000001e-05, |
| "loss": 0.0077, |
| "step": 4150 |
| }, |
| { |
| "epoch": 99.04761904761905, |
| "grad_norm": 0.15173737704753876, |
| "learning_rate": 8.318e-05, |
| "loss": 0.0071, |
| "step": 4160 |
| }, |
| { |
| "epoch": 99.28571428571429, |
| "grad_norm": 0.1682591736316681, |
| "learning_rate": 8.338e-05, |
| "loss": 0.0071, |
| "step": 4170 |
| }, |
| { |
| "epoch": 99.52380952380952, |
| "grad_norm": 0.1667081117630005, |
| "learning_rate": 8.358e-05, |
| "loss": 0.0074, |
| "step": 4180 |
| }, |
| { |
| "epoch": 99.76190476190476, |
| "grad_norm": 0.1746528148651123, |
| "learning_rate": 8.378e-05, |
| "loss": 0.007, |
| "step": 4190 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 0.15634509921073914, |
| "learning_rate": 8.398e-05, |
| "loss": 0.0071, |
| "step": 4200 |
| }, |
| { |
| "epoch": 100.23809523809524, |
| "grad_norm": 0.1528768241405487, |
| "learning_rate": 8.418e-05, |
| "loss": 0.0076, |
| "step": 4210 |
| }, |
| { |
| "epoch": 100.47619047619048, |
| "grad_norm": 0.18466581404209137, |
| "learning_rate": 8.438e-05, |
| "loss": 0.0068, |
| "step": 4220 |
| }, |
| { |
| "epoch": 100.71428571428571, |
| "grad_norm": 0.1265394389629364, |
| "learning_rate": 8.458e-05, |
| "loss": 0.0068, |
| "step": 4230 |
| }, |
| { |
| "epoch": 100.95238095238095, |
| "grad_norm": 0.16528388857841492, |
| "learning_rate": 8.478e-05, |
| "loss": 0.0061, |
| "step": 4240 |
| }, |
| { |
| "epoch": 101.19047619047619, |
| "grad_norm": 0.2074710726737976, |
| "learning_rate": 8.498e-05, |
| "loss": 0.0073, |
| "step": 4250 |
| }, |
| { |
| "epoch": 101.42857142857143, |
| "grad_norm": 0.13792483508586884, |
| "learning_rate": 8.518000000000001e-05, |
| "loss": 0.0071, |
| "step": 4260 |
| }, |
| { |
| "epoch": 101.66666666666667, |
| "grad_norm": 0.14737482368946075, |
| "learning_rate": 8.538e-05, |
| "loss": 0.0068, |
| "step": 4270 |
| }, |
| { |
| "epoch": 101.9047619047619, |
| "grad_norm": 0.15028272569179535, |
| "learning_rate": 8.558e-05, |
| "loss": 0.0068, |
| "step": 4280 |
| }, |
| { |
| "epoch": 102.14285714285714, |
| "grad_norm": 0.11532406508922577, |
| "learning_rate": 8.578000000000001e-05, |
| "loss": 0.0068, |
| "step": 4290 |
| }, |
| { |
| "epoch": 102.38095238095238, |
| "grad_norm": 0.14527995884418488, |
| "learning_rate": 8.598e-05, |
| "loss": 0.0068, |
| "step": 4300 |
| }, |
| { |
| "epoch": 102.61904761904762, |
| "grad_norm": 0.16907252371311188, |
| "learning_rate": 8.618e-05, |
| "loss": 0.007, |
| "step": 4310 |
| }, |
| { |
| "epoch": 102.85714285714286, |
| "grad_norm": 0.15627753734588623, |
| "learning_rate": 8.638000000000001e-05, |
| "loss": 0.0063, |
| "step": 4320 |
| }, |
| { |
| "epoch": 103.0952380952381, |
| "grad_norm": 0.18564121425151825, |
| "learning_rate": 8.658e-05, |
| "loss": 0.0067, |
| "step": 4330 |
| }, |
| { |
| "epoch": 103.33333333333333, |
| "grad_norm": 0.15645846724510193, |
| "learning_rate": 8.678e-05, |
| "loss": 0.0062, |
| "step": 4340 |
| }, |
| { |
| "epoch": 103.57142857142857, |
| "grad_norm": 0.1696149706840515, |
| "learning_rate": 8.698000000000001e-05, |
| "loss": 0.0075, |
| "step": 4350 |
| }, |
| { |
| "epoch": 103.80952380952381, |
| "grad_norm": 0.12378894537687302, |
| "learning_rate": 8.718e-05, |
| "loss": 0.0058, |
| "step": 4360 |
| }, |
| { |
| "epoch": 104.04761904761905, |
| "grad_norm": 0.15677836537361145, |
| "learning_rate": 8.738000000000001e-05, |
| "loss": 0.0063, |
| "step": 4370 |
| }, |
| { |
| "epoch": 104.28571428571429, |
| "grad_norm": 0.14414283633232117, |
| "learning_rate": 8.758000000000001e-05, |
| "loss": 0.0065, |
| "step": 4380 |
| }, |
| { |
| "epoch": 104.52380952380952, |
| "grad_norm": 0.15373027324676514, |
| "learning_rate": 8.778e-05, |
| "loss": 0.007, |
| "step": 4390 |
| }, |
| { |
| "epoch": 104.76190476190476, |
| "grad_norm": 0.14955894649028778, |
| "learning_rate": 8.798000000000001e-05, |
| "loss": 0.0073, |
| "step": 4400 |
| }, |
| { |
| "epoch": 105.0, |
| "grad_norm": 0.17094863951206207, |
| "learning_rate": 8.818000000000001e-05, |
| "loss": 0.0063, |
| "step": 4410 |
| }, |
| { |
| "epoch": 105.23809523809524, |
| "grad_norm": 0.14394135773181915, |
| "learning_rate": 8.838e-05, |
| "loss": 0.0063, |
| "step": 4420 |
| }, |
| { |
| "epoch": 105.47619047619048, |
| "grad_norm": 0.10035566985607147, |
| "learning_rate": 8.858000000000001e-05, |
| "loss": 0.0062, |
| "step": 4430 |
| }, |
| { |
| "epoch": 105.71428571428571, |
| "grad_norm": 0.11350033432245255, |
| "learning_rate": 8.878000000000001e-05, |
| "loss": 0.0057, |
| "step": 4440 |
| }, |
| { |
| "epoch": 105.95238095238095, |
| "grad_norm": 0.1663508117198944, |
| "learning_rate": 8.898e-05, |
| "loss": 0.006, |
| "step": 4450 |
| }, |
| { |
| "epoch": 106.19047619047619, |
| "grad_norm": 0.1375061422586441, |
| "learning_rate": 8.918000000000001e-05, |
| "loss": 0.0065, |
| "step": 4460 |
| }, |
| { |
| "epoch": 106.42857142857143, |
| "grad_norm": 0.11623188108205795, |
| "learning_rate": 8.938e-05, |
| "loss": 0.0064, |
| "step": 4470 |
| }, |
| { |
| "epoch": 106.66666666666667, |
| "grad_norm": 0.1134343296289444, |
| "learning_rate": 8.958e-05, |
| "loss": 0.0063, |
| "step": 4480 |
| }, |
| { |
| "epoch": 106.9047619047619, |
| "grad_norm": 0.1326800137758255, |
| "learning_rate": 8.978000000000001e-05, |
| "loss": 0.0056, |
| "step": 4490 |
| }, |
| { |
| "epoch": 107.14285714285714, |
| "grad_norm": 0.11536196619272232, |
| "learning_rate": 8.998e-05, |
| "loss": 0.0058, |
| "step": 4500 |
| }, |
| { |
| "epoch": 107.38095238095238, |
| "grad_norm": 0.15063893795013428, |
| "learning_rate": 9.018000000000001e-05, |
| "loss": 0.0063, |
| "step": 4510 |
| }, |
| { |
| "epoch": 107.61904761904762, |
| "grad_norm": 0.1233978122472763, |
| "learning_rate": 9.038000000000001e-05, |
| "loss": 0.0063, |
| "step": 4520 |
| }, |
| { |
| "epoch": 107.85714285714286, |
| "grad_norm": 0.17340506613254547, |
| "learning_rate": 9.058e-05, |
| "loss": 0.006, |
| "step": 4530 |
| }, |
| { |
| "epoch": 108.0952380952381, |
| "grad_norm": 0.14346300065517426, |
| "learning_rate": 9.078000000000001e-05, |
| "loss": 0.0062, |
| "step": 4540 |
| }, |
| { |
| "epoch": 108.33333333333333, |
| "grad_norm": 0.12333904206752777, |
| "learning_rate": 9.098000000000001e-05, |
| "loss": 0.0067, |
| "step": 4550 |
| }, |
| { |
| "epoch": 108.57142857142857, |
| "grad_norm": 0.14184190332889557, |
| "learning_rate": 9.118e-05, |
| "loss": 0.0067, |
| "step": 4560 |
| }, |
| { |
| "epoch": 108.80952380952381, |
| "grad_norm": 0.12716515362262726, |
| "learning_rate": 9.138e-05, |
| "loss": 0.0064, |
| "step": 4570 |
| }, |
| { |
| "epoch": 109.04761904761905, |
| "grad_norm": 0.152634397149086, |
| "learning_rate": 9.158e-05, |
| "loss": 0.0064, |
| "step": 4580 |
| }, |
| { |
| "epoch": 109.28571428571429, |
| "grad_norm": 0.13993842899799347, |
| "learning_rate": 9.178e-05, |
| "loss": 0.0063, |
| "step": 4590 |
| }, |
| { |
| "epoch": 109.52380952380952, |
| "grad_norm": 0.1567574292421341, |
| "learning_rate": 9.198e-05, |
| "loss": 0.0068, |
| "step": 4600 |
| }, |
| { |
| "epoch": 109.76190476190476, |
| "grad_norm": 0.14426244795322418, |
| "learning_rate": 9.218e-05, |
| "loss": 0.0068, |
| "step": 4610 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 0.17362025380134583, |
| "learning_rate": 9.238e-05, |
| "loss": 0.0063, |
| "step": 4620 |
| }, |
| { |
| "epoch": 110.23809523809524, |
| "grad_norm": 0.2250196784734726, |
| "learning_rate": 9.258e-05, |
| "loss": 0.0072, |
| "step": 4630 |
| }, |
| { |
| "epoch": 110.47619047619048, |
| "grad_norm": 0.1585092395544052, |
| "learning_rate": 9.278e-05, |
| "loss": 0.0066, |
| "step": 4640 |
| }, |
| { |
| "epoch": 110.71428571428571, |
| "grad_norm": 0.12116267532110214, |
| "learning_rate": 9.298e-05, |
| "loss": 0.007, |
| "step": 4650 |
| }, |
| { |
| "epoch": 110.95238095238095, |
| "grad_norm": 0.15241120755672455, |
| "learning_rate": 9.318e-05, |
| "loss": 0.0063, |
| "step": 4660 |
| }, |
| { |
| "epoch": 111.19047619047619, |
| "grad_norm": 0.13969366252422333, |
| "learning_rate": 9.338e-05, |
| "loss": 0.0054, |
| "step": 4670 |
| }, |
| { |
| "epoch": 111.42857142857143, |
| "grad_norm": 0.15973520278930664, |
| "learning_rate": 9.358e-05, |
| "loss": 0.0064, |
| "step": 4680 |
| }, |
| { |
| "epoch": 111.66666666666667, |
| "grad_norm": 0.1298786699771881, |
| "learning_rate": 9.378e-05, |
| "loss": 0.0064, |
| "step": 4690 |
| }, |
| { |
| "epoch": 111.9047619047619, |
| "grad_norm": 0.17775169014930725, |
| "learning_rate": 9.398e-05, |
| "loss": 0.0067, |
| "step": 4700 |
| }, |
| { |
| "epoch": 112.14285714285714, |
| "grad_norm": 0.1498604565858841, |
| "learning_rate": 9.418e-05, |
| "loss": 0.0064, |
| "step": 4710 |
| }, |
| { |
| "epoch": 112.38095238095238, |
| "grad_norm": 0.17183223366737366, |
| "learning_rate": 9.438e-05, |
| "loss": 0.0063, |
| "step": 4720 |
| }, |
| { |
| "epoch": 112.61904761904762, |
| "grad_norm": 0.17093922197818756, |
| "learning_rate": 9.458e-05, |
| "loss": 0.0066, |
| "step": 4730 |
| }, |
| { |
| "epoch": 112.85714285714286, |
| "grad_norm": 0.16188636422157288, |
| "learning_rate": 9.478e-05, |
| "loss": 0.0061, |
| "step": 4740 |
| }, |
| { |
| "epoch": 113.0952380952381, |
| "grad_norm": 0.12783600389957428, |
| "learning_rate": 9.498e-05, |
| "loss": 0.0069, |
| "step": 4750 |
| }, |
| { |
| "epoch": 113.33333333333333, |
| "grad_norm": 0.17590761184692383, |
| "learning_rate": 9.518000000000001e-05, |
| "loss": 0.0067, |
| "step": 4760 |
| }, |
| { |
| "epoch": 113.57142857142857, |
| "grad_norm": 0.13549788296222687, |
| "learning_rate": 9.538e-05, |
| "loss": 0.0079, |
| "step": 4770 |
| }, |
| { |
| "epoch": 113.80952380952381, |
| "grad_norm": 0.1560969203710556, |
| "learning_rate": 9.558e-05, |
| "loss": 0.008, |
| "step": 4780 |
| }, |
| { |
| "epoch": 114.04761904761905, |
| "grad_norm": 0.17087014019489288, |
| "learning_rate": 9.578000000000001e-05, |
| "loss": 0.0073, |
| "step": 4790 |
| }, |
| { |
| "epoch": 114.28571428571429, |
| "grad_norm": 0.14696316421031952, |
| "learning_rate": 9.598e-05, |
| "loss": 0.0068, |
| "step": 4800 |
| }, |
| { |
| "epoch": 114.52380952380952, |
| "grad_norm": 0.16174004971981049, |
| "learning_rate": 9.618e-05, |
| "loss": 0.0062, |
| "step": 4810 |
| }, |
| { |
| "epoch": 114.76190476190476, |
| "grad_norm": 0.17660772800445557, |
| "learning_rate": 9.638000000000001e-05, |
| "loss": 0.0072, |
| "step": 4820 |
| }, |
| { |
| "epoch": 115.0, |
| "grad_norm": 0.12380684167146683, |
| "learning_rate": 9.658e-05, |
| "loss": 0.007, |
| "step": 4830 |
| }, |
| { |
| "epoch": 115.23809523809524, |
| "grad_norm": 0.12202756851911545, |
| "learning_rate": 9.678e-05, |
| "loss": 0.0058, |
| "step": 4840 |
| }, |
| { |
| "epoch": 115.47619047619048, |
| "grad_norm": 0.09938967227935791, |
| "learning_rate": 9.698000000000001e-05, |
| "loss": 0.0074, |
| "step": 4850 |
| }, |
| { |
| "epoch": 115.71428571428571, |
| "grad_norm": 0.13580027222633362, |
| "learning_rate": 9.718e-05, |
| "loss": 0.0068, |
| "step": 4860 |
| }, |
| { |
| "epoch": 115.95238095238095, |
| "grad_norm": 0.11069474369287491, |
| "learning_rate": 9.738e-05, |
| "loss": 0.0051, |
| "step": 4870 |
| }, |
| { |
| "epoch": 116.19047619047619, |
| "grad_norm": 0.13056397438049316, |
| "learning_rate": 9.758000000000001e-05, |
| "loss": 0.0058, |
| "step": 4880 |
| }, |
| { |
| "epoch": 116.42857142857143, |
| "grad_norm": 0.08931894600391388, |
| "learning_rate": 9.778e-05, |
| "loss": 0.006, |
| "step": 4890 |
| }, |
| { |
| "epoch": 116.66666666666667, |
| "grad_norm": 0.11002742499113083, |
| "learning_rate": 9.798000000000001e-05, |
| "loss": 0.0057, |
| "step": 4900 |
| }, |
| { |
| "epoch": 116.9047619047619, |
| "grad_norm": 0.0879506915807724, |
| "learning_rate": 9.818000000000001e-05, |
| "loss": 0.0056, |
| "step": 4910 |
| }, |
| { |
| "epoch": 117.14285714285714, |
| "grad_norm": 0.13614323735237122, |
| "learning_rate": 9.838e-05, |
| "loss": 0.0068, |
| "step": 4920 |
| }, |
| { |
| "epoch": 117.38095238095238, |
| "grad_norm": 0.1437147706747055, |
| "learning_rate": 9.858000000000001e-05, |
| "loss": 0.0054, |
| "step": 4930 |
| }, |
| { |
| "epoch": 117.61904761904762, |
| "grad_norm": 0.10744836926460266, |
| "learning_rate": 9.878e-05, |
| "loss": 0.0066, |
| "step": 4940 |
| }, |
| { |
| "epoch": 117.85714285714286, |
| "grad_norm": 0.10814452916383743, |
| "learning_rate": 9.898e-05, |
| "loss": 0.006, |
| "step": 4950 |
| }, |
| { |
| "epoch": 118.0952380952381, |
| "grad_norm": 0.13609112799167633, |
| "learning_rate": 9.918000000000001e-05, |
| "loss": 0.0061, |
| "step": 4960 |
| }, |
| { |
| "epoch": 118.33333333333333, |
| "grad_norm": 0.16339436173439026, |
| "learning_rate": 9.938e-05, |
| "loss": 0.0062, |
| "step": 4970 |
| }, |
| { |
| "epoch": 118.57142857142857, |
| "grad_norm": 0.13619464635849, |
| "learning_rate": 9.958e-05, |
| "loss": 0.0066, |
| "step": 4980 |
| }, |
| { |
| "epoch": 118.80952380952381, |
| "grad_norm": 0.16431888937950134, |
| "learning_rate": 9.978000000000001e-05, |
| "loss": 0.0076, |
| "step": 4990 |
| }, |
| { |
| "epoch": 119.04761904761905, |
| "grad_norm": 0.14003992080688477, |
| "learning_rate": 9.998e-05, |
| "loss": 0.006, |
| "step": 5000 |
| }, |
| { |
| "epoch": 119.28571428571429, |
| "grad_norm": 0.135029599070549, |
| "learning_rate": 9.999999778549045e-05, |
| "loss": 0.0058, |
| "step": 5010 |
| }, |
| { |
| "epoch": 119.52380952380952, |
| "grad_norm": 0.13554944097995758, |
| "learning_rate": 9.999999013039593e-05, |
| "loss": 0.0057, |
| "step": 5020 |
| }, |
| { |
| "epoch": 119.76190476190476, |
| "grad_norm": 0.18037289381027222, |
| "learning_rate": 9.999997700737766e-05, |
| "loss": 0.0062, |
| "step": 5030 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 0.1779029816389084, |
| "learning_rate": 9.999995841643709e-05, |
| "loss": 0.0067, |
| "step": 5040 |
| }, |
| { |
| "epoch": 120.23809523809524, |
| "grad_norm": 0.1433548778295517, |
| "learning_rate": 9.999993435757623e-05, |
| "loss": 0.0067, |
| "step": 5050 |
| }, |
| { |
| "epoch": 120.47619047619048, |
| "grad_norm": 0.1025407686829567, |
| "learning_rate": 9.999990483079773e-05, |
| "loss": 0.007, |
| "step": 5060 |
| }, |
| { |
| "epoch": 120.71428571428571, |
| "grad_norm": 0.13285775482654572, |
| "learning_rate": 9.999986983610481e-05, |
| "loss": 0.0062, |
| "step": 5070 |
| }, |
| { |
| "epoch": 120.95238095238095, |
| "grad_norm": 0.13312703371047974, |
| "learning_rate": 9.99998293735013e-05, |
| "loss": 0.0062, |
| "step": 5080 |
| }, |
| { |
| "epoch": 121.19047619047619, |
| "grad_norm": 0.16387765109539032, |
| "learning_rate": 9.999978344299161e-05, |
| "loss": 0.0065, |
| "step": 5090 |
| }, |
| { |
| "epoch": 121.42857142857143, |
| "grad_norm": 0.14623594284057617, |
| "learning_rate": 9.99997320445808e-05, |
| "loss": 0.0072, |
| "step": 5100 |
| }, |
| { |
| "epoch": 121.66666666666667, |
| "grad_norm": 0.13878948986530304, |
| "learning_rate": 9.999967517827444e-05, |
| "loss": 0.0063, |
| "step": 5110 |
| }, |
| { |
| "epoch": 121.9047619047619, |
| "grad_norm": 0.13989514112472534, |
| "learning_rate": 9.999961284407879e-05, |
| "loss": 0.0069, |
| "step": 5120 |
| }, |
| { |
| "epoch": 122.14285714285714, |
| "grad_norm": 0.13488399982452393, |
| "learning_rate": 9.999954504200067e-05, |
| "loss": 0.0055, |
| "step": 5130 |
| }, |
| { |
| "epoch": 122.38095238095238, |
| "grad_norm": 0.11975318193435669, |
| "learning_rate": 9.999947177204744e-05, |
| "loss": 0.0065, |
| "step": 5140 |
| }, |
| { |
| "epoch": 122.61904761904762, |
| "grad_norm": 0.12528879940509796, |
| "learning_rate": 9.999939303422718e-05, |
| "loss": 0.0063, |
| "step": 5150 |
| }, |
| { |
| "epoch": 122.85714285714286, |
| "grad_norm": 0.1375202238559723, |
| "learning_rate": 9.999930882854847e-05, |
| "loss": 0.006, |
| "step": 5160 |
| }, |
| { |
| "epoch": 123.0952380952381, |
| "grad_norm": 0.14823216199874878, |
| "learning_rate": 9.999921915502051e-05, |
| "loss": 0.0057, |
| "step": 5170 |
| }, |
| { |
| "epoch": 123.33333333333333, |
| "grad_norm": 0.15693895518779755, |
| "learning_rate": 9.99991240136531e-05, |
| "loss": 0.0073, |
| "step": 5180 |
| }, |
| { |
| "epoch": 123.57142857142857, |
| "grad_norm": 0.15830717980861664, |
| "learning_rate": 9.999902340445668e-05, |
| "loss": 0.0056, |
| "step": 5190 |
| }, |
| { |
| "epoch": 123.80952380952381, |
| "grad_norm": 0.11912095546722412, |
| "learning_rate": 9.999891732744224e-05, |
| "loss": 0.0059, |
| "step": 5200 |
| }, |
| { |
| "epoch": 124.04761904761905, |
| "grad_norm": 0.09550514817237854, |
| "learning_rate": 9.999880578262135e-05, |
| "loss": 0.006, |
| "step": 5210 |
| }, |
| { |
| "epoch": 124.28571428571429, |
| "grad_norm": 0.14895232021808624, |
| "learning_rate": 9.999868877000624e-05, |
| "loss": 0.0059, |
| "step": 5220 |
| }, |
| { |
| "epoch": 124.52380952380952, |
| "grad_norm": 0.14100459218025208, |
| "learning_rate": 9.99985662896097e-05, |
| "loss": 0.0062, |
| "step": 5230 |
| }, |
| { |
| "epoch": 124.76190476190476, |
| "grad_norm": 0.18378277122974396, |
| "learning_rate": 9.999843834144513e-05, |
| "loss": 0.0063, |
| "step": 5240 |
| }, |
| { |
| "epoch": 125.0, |
| "grad_norm": 0.13455770909786224, |
| "learning_rate": 9.99983049255265e-05, |
| "loss": 0.006, |
| "step": 5250 |
| }, |
| { |
| "epoch": 125.23809523809524, |
| "grad_norm": 0.12418422102928162, |
| "learning_rate": 9.999816604186843e-05, |
| "loss": 0.0061, |
| "step": 5260 |
| }, |
| { |
| "epoch": 125.47619047619048, |
| "grad_norm": 0.12098632752895355, |
| "learning_rate": 9.999802169048609e-05, |
| "loss": 0.0053, |
| "step": 5270 |
| }, |
| { |
| "epoch": 125.71428571428571, |
| "grad_norm": 0.11943228542804718, |
| "learning_rate": 9.999787187139527e-05, |
| "loss": 0.0053, |
| "step": 5280 |
| }, |
| { |
| "epoch": 125.95238095238095, |
| "grad_norm": 0.14577777683734894, |
| "learning_rate": 9.999771658461234e-05, |
| "loss": 0.0059, |
| "step": 5290 |
| }, |
| { |
| "epoch": 126.19047619047619, |
| "grad_norm": 0.14352424442768097, |
| "learning_rate": 9.999755583015431e-05, |
| "loss": 0.0055, |
| "step": 5300 |
| }, |
| { |
| "epoch": 126.42857142857143, |
| "grad_norm": 0.14176273345947266, |
| "learning_rate": 9.999738960803874e-05, |
| "loss": 0.0058, |
| "step": 5310 |
| }, |
| { |
| "epoch": 126.66666666666667, |
| "grad_norm": 0.14953647553920746, |
| "learning_rate": 9.99972179182838e-05, |
| "loss": 0.0047, |
| "step": 5320 |
| }, |
| { |
| "epoch": 126.9047619047619, |
| "grad_norm": 0.14677667617797852, |
| "learning_rate": 9.99970407609083e-05, |
| "loss": 0.0059, |
| "step": 5330 |
| }, |
| { |
| "epoch": 127.14285714285714, |
| "grad_norm": 0.16852952539920807, |
| "learning_rate": 9.999685813593159e-05, |
| "loss": 0.006, |
| "step": 5340 |
| }, |
| { |
| "epoch": 127.38095238095238, |
| "grad_norm": 0.1334640234708786, |
| "learning_rate": 9.999667004337362e-05, |
| "loss": 0.0052, |
| "step": 5350 |
| }, |
| { |
| "epoch": 127.61904761904762, |
| "grad_norm": 0.13407129049301147, |
| "learning_rate": 9.9996476483255e-05, |
| "loss": 0.0058, |
| "step": 5360 |
| }, |
| { |
| "epoch": 127.85714285714286, |
| "grad_norm": 0.1551060825586319, |
| "learning_rate": 9.999627745559688e-05, |
| "loss": 0.0055, |
| "step": 5370 |
| }, |
| { |
| "epoch": 128.0952380952381, |
| "grad_norm": 0.14662125706672668, |
| "learning_rate": 9.999607296042101e-05, |
| "loss": 0.0059, |
| "step": 5380 |
| }, |
| { |
| "epoch": 128.33333333333334, |
| "grad_norm": 0.15831278264522552, |
| "learning_rate": 9.99958629977498e-05, |
| "loss": 0.0053, |
| "step": 5390 |
| }, |
| { |
| "epoch": 128.57142857142858, |
| "grad_norm": 0.14049497246742249, |
| "learning_rate": 9.999564756760615e-05, |
| "loss": 0.0055, |
| "step": 5400 |
| }, |
| { |
| "epoch": 128.8095238095238, |
| "grad_norm": 0.20336127281188965, |
| "learning_rate": 9.999542667001366e-05, |
| "loss": 0.0059, |
| "step": 5410 |
| }, |
| { |
| "epoch": 129.04761904761904, |
| "grad_norm": 0.14762188494205475, |
| "learning_rate": 9.999520030499647e-05, |
| "loss": 0.0055, |
| "step": 5420 |
| }, |
| { |
| "epoch": 129.28571428571428, |
| "grad_norm": 0.12953810393810272, |
| "learning_rate": 9.999496847257936e-05, |
| "loss": 0.0058, |
| "step": 5430 |
| }, |
| { |
| "epoch": 129.52380952380952, |
| "grad_norm": 0.12495510280132294, |
| "learning_rate": 9.999473117278764e-05, |
| "loss": 0.0061, |
| "step": 5440 |
| }, |
| { |
| "epoch": 129.76190476190476, |
| "grad_norm": 0.1575600951910019, |
| "learning_rate": 9.999448840564731e-05, |
| "loss": 0.0056, |
| "step": 5450 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 0.16382208466529846, |
| "learning_rate": 9.999424017118488e-05, |
| "loss": 0.0057, |
| "step": 5460 |
| }, |
| { |
| "epoch": 130.23809523809524, |
| "grad_norm": 0.12822803854942322, |
| "learning_rate": 9.999398646942751e-05, |
| "loss": 0.0062, |
| "step": 5470 |
| }, |
| { |
| "epoch": 130.47619047619048, |
| "grad_norm": 0.14973299205303192, |
| "learning_rate": 9.999372730040296e-05, |
| "loss": 0.0056, |
| "step": 5480 |
| }, |
| { |
| "epoch": 130.71428571428572, |
| "grad_norm": 0.10302351415157318, |
| "learning_rate": 9.999346266413953e-05, |
| "loss": 0.0058, |
| "step": 5490 |
| }, |
| { |
| "epoch": 130.95238095238096, |
| "grad_norm": 0.15218771994113922, |
| "learning_rate": 9.99931925606662e-05, |
| "loss": 0.0059, |
| "step": 5500 |
| }, |
| { |
| "epoch": 131.1904761904762, |
| "grad_norm": 0.11885320395231247, |
| "learning_rate": 9.99929169900125e-05, |
| "loss": 0.0055, |
| "step": 5510 |
| }, |
| { |
| "epoch": 131.42857142857142, |
| "grad_norm": 0.11020412296056747, |
| "learning_rate": 9.999263595220855e-05, |
| "loss": 0.005, |
| "step": 5520 |
| }, |
| { |
| "epoch": 131.66666666666666, |
| "grad_norm": 0.12075421214103699, |
| "learning_rate": 9.99923494472851e-05, |
| "loss": 0.0052, |
| "step": 5530 |
| }, |
| { |
| "epoch": 131.9047619047619, |
| "grad_norm": 0.15431839227676392, |
| "learning_rate": 9.999205747527348e-05, |
| "loss": 0.0062, |
| "step": 5540 |
| }, |
| { |
| "epoch": 132.14285714285714, |
| "grad_norm": 0.14477379620075226, |
| "learning_rate": 9.999176003620561e-05, |
| "loss": 0.0055, |
| "step": 5550 |
| }, |
| { |
| "epoch": 132.38095238095238, |
| "grad_norm": 0.10674669593572617, |
| "learning_rate": 9.999145713011405e-05, |
| "loss": 0.0056, |
| "step": 5560 |
| }, |
| { |
| "epoch": 132.61904761904762, |
| "grad_norm": 0.11590486019849777, |
| "learning_rate": 9.999114875703186e-05, |
| "loss": 0.0053, |
| "step": 5570 |
| }, |
| { |
| "epoch": 132.85714285714286, |
| "grad_norm": 0.15544815361499786, |
| "learning_rate": 9.999083491699281e-05, |
| "loss": 0.006, |
| "step": 5580 |
| }, |
| { |
| "epoch": 133.0952380952381, |
| "grad_norm": 0.1322612166404724, |
| "learning_rate": 9.999051561003123e-05, |
| "loss": 0.006, |
| "step": 5590 |
| }, |
| { |
| "epoch": 133.33333333333334, |
| "grad_norm": 0.11986099183559418, |
| "learning_rate": 9.999019083618202e-05, |
| "loss": 0.006, |
| "step": 5600 |
| }, |
| { |
| "epoch": 133.57142857142858, |
| "grad_norm": 0.08869463950395584, |
| "learning_rate": 9.99898605954807e-05, |
| "loss": 0.0064, |
| "step": 5610 |
| }, |
| { |
| "epoch": 133.8095238095238, |
| "grad_norm": 0.12605644762516022, |
| "learning_rate": 9.998952488796338e-05, |
| "loss": 0.0058, |
| "step": 5620 |
| }, |
| { |
| "epoch": 134.04761904761904, |
| "grad_norm": 0.11599677056074142, |
| "learning_rate": 9.998918371366676e-05, |
| "loss": 0.0056, |
| "step": 5630 |
| }, |
| { |
| "epoch": 134.28571428571428, |
| "grad_norm": 0.12829624116420746, |
| "learning_rate": 9.99888370726282e-05, |
| "loss": 0.0054, |
| "step": 5640 |
| }, |
| { |
| "epoch": 134.52380952380952, |
| "grad_norm": 0.1482323855161667, |
| "learning_rate": 9.998848496488556e-05, |
| "loss": 0.0055, |
| "step": 5650 |
| }, |
| { |
| "epoch": 134.76190476190476, |
| "grad_norm": 0.11115171015262604, |
| "learning_rate": 9.998812739047736e-05, |
| "loss": 0.0062, |
| "step": 5660 |
| }, |
| { |
| "epoch": 135.0, |
| "grad_norm": 0.0969790369272232, |
| "learning_rate": 9.99877643494427e-05, |
| "loss": 0.0062, |
| "step": 5670 |
| }, |
| { |
| "epoch": 135.23809523809524, |
| "grad_norm": 0.14078184962272644, |
| "learning_rate": 9.998739584182128e-05, |
| "loss": 0.0066, |
| "step": 5680 |
| }, |
| { |
| "epoch": 135.47619047619048, |
| "grad_norm": 0.13027994334697723, |
| "learning_rate": 9.998702186765342e-05, |
| "loss": 0.0056, |
| "step": 5690 |
| }, |
| { |
| "epoch": 135.71428571428572, |
| "grad_norm": 0.11331813782453537, |
| "learning_rate": 9.998664242698e-05, |
| "loss": 0.0055, |
| "step": 5700 |
| }, |
| { |
| "epoch": 135.95238095238096, |
| "grad_norm": 0.1183035746216774, |
| "learning_rate": 9.998625751984251e-05, |
| "loss": 0.0047, |
| "step": 5710 |
| }, |
| { |
| "epoch": 136.1904761904762, |
| "grad_norm": 0.12869036197662354, |
| "learning_rate": 9.998586714628307e-05, |
| "loss": 0.0054, |
| "step": 5720 |
| }, |
| { |
| "epoch": 136.42857142857142, |
| "grad_norm": 0.16940119862556458, |
| "learning_rate": 9.998547130634432e-05, |
| "loss": 0.0057, |
| "step": 5730 |
| }, |
| { |
| "epoch": 136.66666666666666, |
| "grad_norm": 0.16292321681976318, |
| "learning_rate": 9.99850700000696e-05, |
| "loss": 0.0056, |
| "step": 5740 |
| }, |
| { |
| "epoch": 136.9047619047619, |
| "grad_norm": 0.19316048920154572, |
| "learning_rate": 9.998466322750278e-05, |
| "loss": 0.0057, |
| "step": 5750 |
| }, |
| { |
| "epoch": 137.14285714285714, |
| "grad_norm": 0.21143263578414917, |
| "learning_rate": 9.998425098868834e-05, |
| "loss": 0.0057, |
| "step": 5760 |
| }, |
| { |
| "epoch": 137.38095238095238, |
| "grad_norm": 0.16651703417301178, |
| "learning_rate": 9.998383328367136e-05, |
| "loss": 0.0058, |
| "step": 5770 |
| }, |
| { |
| "epoch": 137.61904761904762, |
| "grad_norm": 0.11469289660453796, |
| "learning_rate": 9.99834101124975e-05, |
| "loss": 0.006, |
| "step": 5780 |
| }, |
| { |
| "epoch": 137.85714285714286, |
| "grad_norm": 0.15064246952533722, |
| "learning_rate": 9.998298147521309e-05, |
| "loss": 0.0063, |
| "step": 5790 |
| }, |
| { |
| "epoch": 138.0952380952381, |
| "grad_norm": 0.15757083892822266, |
| "learning_rate": 9.998254737186496e-05, |
| "loss": 0.0054, |
| "step": 5800 |
| }, |
| { |
| "epoch": 138.33333333333334, |
| "grad_norm": 0.16035474836826324, |
| "learning_rate": 9.99821078025006e-05, |
| "loss": 0.0055, |
| "step": 5810 |
| }, |
| { |
| "epoch": 138.57142857142858, |
| "grad_norm": 0.17456494271755219, |
| "learning_rate": 9.998166276716807e-05, |
| "loss": 0.006, |
| "step": 5820 |
| }, |
| { |
| "epoch": 138.8095238095238, |
| "grad_norm": 0.12828657031059265, |
| "learning_rate": 9.998121226591606e-05, |
| "loss": 0.0054, |
| "step": 5830 |
| }, |
| { |
| "epoch": 139.04761904761904, |
| "grad_norm": 0.13332265615463257, |
| "learning_rate": 9.998075629879382e-05, |
| "loss": 0.0055, |
| "step": 5840 |
| }, |
| { |
| "epoch": 139.28571428571428, |
| "grad_norm": 0.14584071934223175, |
| "learning_rate": 9.99802948658512e-05, |
| "loss": 0.0055, |
| "step": 5850 |
| }, |
| { |
| "epoch": 139.52380952380952, |
| "grad_norm": 0.1273544728755951, |
| "learning_rate": 9.99798279671387e-05, |
| "loss": 0.0062, |
| "step": 5860 |
| }, |
| { |
| "epoch": 139.76190476190476, |
| "grad_norm": 0.14634773135185242, |
| "learning_rate": 9.997935560270734e-05, |
| "loss": 0.0064, |
| "step": 5870 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 0.1520228534936905, |
| "learning_rate": 9.997887777260879e-05, |
| "loss": 0.0055, |
| "step": 5880 |
| }, |
| { |
| "epoch": 140.23809523809524, |
| "grad_norm": 0.13599805533885956, |
| "learning_rate": 9.997839447689532e-05, |
| "loss": 0.0063, |
| "step": 5890 |
| }, |
| { |
| "epoch": 140.47619047619048, |
| "grad_norm": 0.1647782027721405, |
| "learning_rate": 9.997790571561978e-05, |
| "loss": 0.0059, |
| "step": 5900 |
| }, |
| { |
| "epoch": 140.71428571428572, |
| "grad_norm": 0.1353592574596405, |
| "learning_rate": 9.99774114888356e-05, |
| "loss": 0.0055, |
| "step": 5910 |
| }, |
| { |
| "epoch": 140.95238095238096, |
| "grad_norm": 0.13123513758182526, |
| "learning_rate": 9.997691179659684e-05, |
| "loss": 0.0058, |
| "step": 5920 |
| }, |
| { |
| "epoch": 141.1904761904762, |
| "grad_norm": 0.11137402802705765, |
| "learning_rate": 9.997640663895815e-05, |
| "loss": 0.0054, |
| "step": 5930 |
| }, |
| { |
| "epoch": 141.42857142857142, |
| "grad_norm": 0.12953342497348785, |
| "learning_rate": 9.997589601597477e-05, |
| "loss": 0.0063, |
| "step": 5940 |
| }, |
| { |
| "epoch": 141.66666666666666, |
| "grad_norm": 0.126222163438797, |
| "learning_rate": 9.997537992770252e-05, |
| "loss": 0.0051, |
| "step": 5950 |
| }, |
| { |
| "epoch": 141.9047619047619, |
| "grad_norm": 0.11339060217142105, |
| "learning_rate": 9.997485837419788e-05, |
| "loss": 0.0056, |
| "step": 5960 |
| }, |
| { |
| "epoch": 142.14285714285714, |
| "grad_norm": 0.11106069386005402, |
| "learning_rate": 9.997433135551786e-05, |
| "loss": 0.0052, |
| "step": 5970 |
| }, |
| { |
| "epoch": 142.38095238095238, |
| "grad_norm": 0.11859191209077835, |
| "learning_rate": 9.997379887172009e-05, |
| "loss": 0.0048, |
| "step": 5980 |
| }, |
| { |
| "epoch": 142.61904761904762, |
| "grad_norm": 0.1552848517894745, |
| "learning_rate": 9.997326092286281e-05, |
| "loss": 0.0056, |
| "step": 5990 |
| }, |
| { |
| "epoch": 142.85714285714286, |
| "grad_norm": 0.12435193359851837, |
| "learning_rate": 9.997271750900486e-05, |
| "loss": 0.0055, |
| "step": 6000 |
| }, |
| { |
| "epoch": 143.0952380952381, |
| "grad_norm": 0.12630172073841095, |
| "learning_rate": 9.997216863020565e-05, |
| "loss": 0.005, |
| "step": 6010 |
| }, |
| { |
| "epoch": 143.33333333333334, |
| "grad_norm": 0.11535774916410446, |
| "learning_rate": 9.99716142865252e-05, |
| "loss": 0.0052, |
| "step": 6020 |
| }, |
| { |
| "epoch": 143.57142857142858, |
| "grad_norm": 0.11773218214511871, |
| "learning_rate": 9.997105447802415e-05, |
| "loss": 0.0054, |
| "step": 6030 |
| }, |
| { |
| "epoch": 143.8095238095238, |
| "grad_norm": 0.1480342000722885, |
| "learning_rate": 9.997048920476373e-05, |
| "loss": 0.0055, |
| "step": 6040 |
| }, |
| { |
| "epoch": 144.04761904761904, |
| "grad_norm": 0.10526175051927567, |
| "learning_rate": 9.996991846680572e-05, |
| "loss": 0.0057, |
| "step": 6050 |
| }, |
| { |
| "epoch": 144.28571428571428, |
| "grad_norm": 0.14318504929542542, |
| "learning_rate": 9.996934226421257e-05, |
| "loss": 0.0051, |
| "step": 6060 |
| }, |
| { |
| "epoch": 144.52380952380952, |
| "grad_norm": 0.1683148443698883, |
| "learning_rate": 9.996876059704726e-05, |
| "loss": 0.0053, |
| "step": 6070 |
| }, |
| { |
| "epoch": 144.76190476190476, |
| "grad_norm": 0.13118334114551544, |
| "learning_rate": 9.996817346537343e-05, |
| "loss": 0.0065, |
| "step": 6080 |
| }, |
| { |
| "epoch": 145.0, |
| "grad_norm": 0.10521922260522842, |
| "learning_rate": 9.996758086925526e-05, |
| "loss": 0.0057, |
| "step": 6090 |
| }, |
| { |
| "epoch": 145.23809523809524, |
| "grad_norm": 0.16581527888774872, |
| "learning_rate": 9.996698280875759e-05, |
| "loss": 0.0055, |
| "step": 6100 |
| }, |
| { |
| "epoch": 145.47619047619048, |
| "grad_norm": 0.14781220257282257, |
| "learning_rate": 9.99663792839458e-05, |
| "loss": 0.0061, |
| "step": 6110 |
| }, |
| { |
| "epoch": 145.71428571428572, |
| "grad_norm": 0.1712285280227661, |
| "learning_rate": 9.99657702948859e-05, |
| "loss": 0.0057, |
| "step": 6120 |
| }, |
| { |
| "epoch": 145.95238095238096, |
| "grad_norm": 0.1566563993692398, |
| "learning_rate": 9.996515584164448e-05, |
| "loss": 0.0055, |
| "step": 6130 |
| }, |
| { |
| "epoch": 146.1904761904762, |
| "grad_norm": 0.1589353084564209, |
| "learning_rate": 9.996453592428873e-05, |
| "loss": 0.0052, |
| "step": 6140 |
| }, |
| { |
| "epoch": 146.42857142857142, |
| "grad_norm": 0.07841801643371582, |
| "learning_rate": 9.996391054288646e-05, |
| "loss": 0.0051, |
| "step": 6150 |
| }, |
| { |
| "epoch": 146.66666666666666, |
| "grad_norm": 0.08558449894189835, |
| "learning_rate": 9.996327969750605e-05, |
| "loss": 0.0053, |
| "step": 6160 |
| }, |
| { |
| "epoch": 146.9047619047619, |
| "grad_norm": 0.11871296167373657, |
| "learning_rate": 9.996264338821649e-05, |
| "loss": 0.0047, |
| "step": 6170 |
| }, |
| { |
| "epoch": 147.14285714285714, |
| "grad_norm": 0.14920009672641754, |
| "learning_rate": 9.996200161508735e-05, |
| "loss": 0.0055, |
| "step": 6180 |
| }, |
| { |
| "epoch": 147.38095238095238, |
| "grad_norm": 0.13973234593868256, |
| "learning_rate": 9.996135437818885e-05, |
| "loss": 0.0052, |
| "step": 6190 |
| }, |
| { |
| "epoch": 147.61904761904762, |
| "grad_norm": 0.1307632476091385, |
| "learning_rate": 9.996070167759175e-05, |
| "loss": 0.0058, |
| "step": 6200 |
| }, |
| { |
| "epoch": 147.85714285714286, |
| "grad_norm": 0.11196910589933395, |
| "learning_rate": 9.996004351336743e-05, |
| "loss": 0.0056, |
| "step": 6210 |
| }, |
| { |
| "epoch": 148.0952380952381, |
| "grad_norm": 0.1052478477358818, |
| "learning_rate": 9.995937988558785e-05, |
| "loss": 0.0057, |
| "step": 6220 |
| }, |
| { |
| "epoch": 148.33333333333334, |
| "grad_norm": 0.10207177698612213, |
| "learning_rate": 9.995871079432561e-05, |
| "loss": 0.0055, |
| "step": 6230 |
| }, |
| { |
| "epoch": 148.57142857142858, |
| "grad_norm": 0.0799967423081398, |
| "learning_rate": 9.995803623965389e-05, |
| "loss": 0.0057, |
| "step": 6240 |
| }, |
| { |
| "epoch": 148.8095238095238, |
| "grad_norm": 0.1409079134464264, |
| "learning_rate": 9.995735622164641e-05, |
| "loss": 0.0056, |
| "step": 6250 |
| }, |
| { |
| "epoch": 149.04761904761904, |
| "grad_norm": 0.12327799201011658, |
| "learning_rate": 9.995667074037758e-05, |
| "loss": 0.0054, |
| "step": 6260 |
| }, |
| { |
| "epoch": 149.28571428571428, |
| "grad_norm": 0.11791429668664932, |
| "learning_rate": 9.995597979592232e-05, |
| "loss": 0.0058, |
| "step": 6270 |
| }, |
| { |
| "epoch": 149.52380952380952, |
| "grad_norm": 0.08962273597717285, |
| "learning_rate": 9.995528338835625e-05, |
| "loss": 0.0052, |
| "step": 6280 |
| }, |
| { |
| "epoch": 149.76190476190476, |
| "grad_norm": 0.11616931110620499, |
| "learning_rate": 9.995458151775547e-05, |
| "loss": 0.0057, |
| "step": 6290 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 0.18882684409618378, |
| "learning_rate": 9.995387418419677e-05, |
| "loss": 0.0052, |
| "step": 6300 |
| }, |
| { |
| "epoch": 150.23809523809524, |
| "grad_norm": 0.12603889405727386, |
| "learning_rate": 9.99531613877575e-05, |
| "loss": 0.0058, |
| "step": 6310 |
| }, |
| { |
| "epoch": 150.47619047619048, |
| "grad_norm": 0.13159579038619995, |
| "learning_rate": 9.995244312851559e-05, |
| "loss": 0.0056, |
| "step": 6320 |
| }, |
| { |
| "epoch": 150.71428571428572, |
| "grad_norm": 0.08307720720767975, |
| "learning_rate": 9.995171940654961e-05, |
| "loss": 0.0052, |
| "step": 6330 |
| }, |
| { |
| "epoch": 150.95238095238096, |
| "grad_norm": 0.11263678222894669, |
| "learning_rate": 9.995099022193871e-05, |
| "loss": 0.0055, |
| "step": 6340 |
| }, |
| { |
| "epoch": 151.1904761904762, |
| "grad_norm": 0.09234488755464554, |
| "learning_rate": 9.995025557476261e-05, |
| "loss": 0.0048, |
| "step": 6350 |
| }, |
| { |
| "epoch": 151.42857142857142, |
| "grad_norm": 0.11990541964769363, |
| "learning_rate": 9.994951546510165e-05, |
| "loss": 0.0052, |
| "step": 6360 |
| }, |
| { |
| "epoch": 151.66666666666666, |
| "grad_norm": 0.10959186404943466, |
| "learning_rate": 9.994876989303679e-05, |
| "loss": 0.0057, |
| "step": 6370 |
| }, |
| { |
| "epoch": 151.9047619047619, |
| "grad_norm": 0.10707031935453415, |
| "learning_rate": 9.994801885864955e-05, |
| "loss": 0.0052, |
| "step": 6380 |
| }, |
| { |
| "epoch": 152.14285714285714, |
| "grad_norm": 0.09813109040260315, |
| "learning_rate": 9.994726236202205e-05, |
| "loss": 0.0056, |
| "step": 6390 |
| }, |
| { |
| "epoch": 152.38095238095238, |
| "grad_norm": 0.1409860998392105, |
| "learning_rate": 9.994650040323704e-05, |
| "loss": 0.0056, |
| "step": 6400 |
| }, |
| { |
| "epoch": 152.61904761904762, |
| "grad_norm": 0.11843699961900711, |
| "learning_rate": 9.994573298237784e-05, |
| "loss": 0.0047, |
| "step": 6410 |
| }, |
| { |
| "epoch": 152.85714285714286, |
| "grad_norm": 0.12546250224113464, |
| "learning_rate": 9.994496009952837e-05, |
| "loss": 0.0047, |
| "step": 6420 |
| }, |
| { |
| "epoch": 153.0952380952381, |
| "grad_norm": 0.10236939787864685, |
| "learning_rate": 9.994418175477316e-05, |
| "loss": 0.0055, |
| "step": 6430 |
| }, |
| { |
| "epoch": 153.33333333333334, |
| "grad_norm": 0.12065128982067108, |
| "learning_rate": 9.994339794819733e-05, |
| "loss": 0.0048, |
| "step": 6440 |
| }, |
| { |
| "epoch": 153.57142857142858, |
| "grad_norm": 0.1319369226694107, |
| "learning_rate": 9.994260867988658e-05, |
| "loss": 0.0055, |
| "step": 6450 |
| }, |
| { |
| "epoch": 153.8095238095238, |
| "grad_norm": 0.1429581344127655, |
| "learning_rate": 9.994181394992723e-05, |
| "loss": 0.0048, |
| "step": 6460 |
| }, |
| { |
| "epoch": 154.04761904761904, |
| "grad_norm": 0.11288180947303772, |
| "learning_rate": 9.994101375840618e-05, |
| "loss": 0.0048, |
| "step": 6470 |
| }, |
| { |
| "epoch": 154.28571428571428, |
| "grad_norm": 0.10542032867670059, |
| "learning_rate": 9.994020810541098e-05, |
| "loss": 0.0052, |
| "step": 6480 |
| }, |
| { |
| "epoch": 154.52380952380952, |
| "grad_norm": 0.11652541160583496, |
| "learning_rate": 9.99393969910297e-05, |
| "loss": 0.0052, |
| "step": 6490 |
| }, |
| { |
| "epoch": 154.76190476190476, |
| "grad_norm": 0.10989256203174591, |
| "learning_rate": 9.993858041535104e-05, |
| "loss": 0.0058, |
| "step": 6500 |
| }, |
| { |
| "epoch": 155.0, |
| "grad_norm": 0.10669521242380142, |
| "learning_rate": 9.99377583784643e-05, |
| "loss": 0.0053, |
| "step": 6510 |
| }, |
| { |
| "epoch": 155.23809523809524, |
| "grad_norm": 0.09719312191009521, |
| "learning_rate": 9.993693088045939e-05, |
| "loss": 0.0053, |
| "step": 6520 |
| }, |
| { |
| "epoch": 155.47619047619048, |
| "grad_norm": 0.14308500289916992, |
| "learning_rate": 9.99360979214268e-05, |
| "loss": 0.0051, |
| "step": 6530 |
| }, |
| { |
| "epoch": 155.71428571428572, |
| "grad_norm": 0.07576554268598557, |
| "learning_rate": 9.99352595014576e-05, |
| "loss": 0.0049, |
| "step": 6540 |
| }, |
| { |
| "epoch": 155.95238095238096, |
| "grad_norm": 0.10764189064502716, |
| "learning_rate": 9.993441562064354e-05, |
| "loss": 0.0051, |
| "step": 6550 |
| }, |
| { |
| "epoch": 156.1904761904762, |
| "grad_norm": 0.13403144478797913, |
| "learning_rate": 9.993356627907685e-05, |
| "loss": 0.0052, |
| "step": 6560 |
| }, |
| { |
| "epoch": 156.42857142857142, |
| "grad_norm": 0.12355195730924606, |
| "learning_rate": 9.99327114768504e-05, |
| "loss": 0.0059, |
| "step": 6570 |
| }, |
| { |
| "epoch": 156.66666666666666, |
| "grad_norm": 0.14904069900512695, |
| "learning_rate": 9.99318512140577e-05, |
| "loss": 0.0058, |
| "step": 6580 |
| }, |
| { |
| "epoch": 156.9047619047619, |
| "grad_norm": 0.13994714617729187, |
| "learning_rate": 9.993098549079284e-05, |
| "loss": 0.0059, |
| "step": 6590 |
| }, |
| { |
| "epoch": 157.14285714285714, |
| "grad_norm": 0.15971316397190094, |
| "learning_rate": 9.993011430715047e-05, |
| "loss": 0.0064, |
| "step": 6600 |
| }, |
| { |
| "epoch": 157.38095238095238, |
| "grad_norm": 0.11968282610177994, |
| "learning_rate": 9.992923766322586e-05, |
| "loss": 0.0053, |
| "step": 6610 |
| }, |
| { |
| "epoch": 157.61904761904762, |
| "grad_norm": 0.11539441347122192, |
| "learning_rate": 9.99283555591149e-05, |
| "loss": 0.0055, |
| "step": 6620 |
| }, |
| { |
| "epoch": 157.85714285714286, |
| "grad_norm": 0.11662052571773529, |
| "learning_rate": 9.992746799491404e-05, |
| "loss": 0.0061, |
| "step": 6630 |
| }, |
| { |
| "epoch": 158.0952380952381, |
| "grad_norm": 0.11560940742492676, |
| "learning_rate": 9.992657497072033e-05, |
| "loss": 0.0058, |
| "step": 6640 |
| }, |
| { |
| "epoch": 158.33333333333334, |
| "grad_norm": 0.14160063862800598, |
| "learning_rate": 9.992567648663147e-05, |
| "loss": 0.007, |
| "step": 6650 |
| }, |
| { |
| "epoch": 158.57142857142858, |
| "grad_norm": 0.1159655824303627, |
| "learning_rate": 9.992477254274568e-05, |
| "loss": 0.0061, |
| "step": 6660 |
| }, |
| { |
| "epoch": 158.8095238095238, |
| "grad_norm": 0.11605243384838104, |
| "learning_rate": 9.992386313916183e-05, |
| "loss": 0.0054, |
| "step": 6670 |
| }, |
| { |
| "epoch": 159.04761904761904, |
| "grad_norm": 0.13676942884922028, |
| "learning_rate": 9.992294827597934e-05, |
| "loss": 0.0056, |
| "step": 6680 |
| }, |
| { |
| "epoch": 159.28571428571428, |
| "grad_norm": 0.16445256769657135, |
| "learning_rate": 9.992202795329831e-05, |
| "loss": 0.0062, |
| "step": 6690 |
| }, |
| { |
| "epoch": 159.52380952380952, |
| "grad_norm": 0.1508353054523468, |
| "learning_rate": 9.992110217121936e-05, |
| "loss": 0.0067, |
| "step": 6700 |
| }, |
| { |
| "epoch": 159.76190476190476, |
| "grad_norm": 0.11327709257602692, |
| "learning_rate": 9.992017092984372e-05, |
| "loss": 0.0049, |
| "step": 6710 |
| }, |
| { |
| "epoch": 160.0, |
| "grad_norm": 0.1604740172624588, |
| "learning_rate": 9.991923422927326e-05, |
| "loss": 0.0053, |
| "step": 6720 |
| }, |
| { |
| "epoch": 160.23809523809524, |
| "grad_norm": 0.12607985734939575, |
| "learning_rate": 9.991829206961037e-05, |
| "loss": 0.0048, |
| "step": 6730 |
| }, |
| { |
| "epoch": 160.47619047619048, |
| "grad_norm": 0.12995365262031555, |
| "learning_rate": 9.991734445095813e-05, |
| "loss": 0.0048, |
| "step": 6740 |
| }, |
| { |
| "epoch": 160.71428571428572, |
| "grad_norm": 0.134323388338089, |
| "learning_rate": 9.991639137342015e-05, |
| "loss": 0.0053, |
| "step": 6750 |
| }, |
| { |
| "epoch": 160.95238095238096, |
| "grad_norm": 0.11326022446155548, |
| "learning_rate": 9.991543283710064e-05, |
| "loss": 0.0055, |
| "step": 6760 |
| }, |
| { |
| "epoch": 161.1904761904762, |
| "grad_norm": 0.11092625558376312, |
| "learning_rate": 9.991446884210445e-05, |
| "loss": 0.0056, |
| "step": 6770 |
| }, |
| { |
| "epoch": 161.42857142857142, |
| "grad_norm": 0.11235698312520981, |
| "learning_rate": 9.9913499388537e-05, |
| "loss": 0.0045, |
| "step": 6780 |
| }, |
| { |
| "epoch": 161.66666666666666, |
| "grad_norm": 0.11208867281675339, |
| "learning_rate": 9.99125244765043e-05, |
| "loss": 0.0045, |
| "step": 6790 |
| }, |
| { |
| "epoch": 161.9047619047619, |
| "grad_norm": 0.1136651560664177, |
| "learning_rate": 9.991154410611296e-05, |
| "loss": 0.0052, |
| "step": 6800 |
| }, |
| { |
| "epoch": 162.14285714285714, |
| "grad_norm": 0.1240292340517044, |
| "learning_rate": 9.99105582774702e-05, |
| "loss": 0.0048, |
| "step": 6810 |
| }, |
| { |
| "epoch": 162.38095238095238, |
| "grad_norm": 0.13426350057125092, |
| "learning_rate": 9.990956699068384e-05, |
| "loss": 0.0052, |
| "step": 6820 |
| }, |
| { |
| "epoch": 162.61904761904762, |
| "grad_norm": 0.11639861017465591, |
| "learning_rate": 9.990857024586224e-05, |
| "loss": 0.005, |
| "step": 6830 |
| }, |
| { |
| "epoch": 162.85714285714286, |
| "grad_norm": 0.12387306243181229, |
| "learning_rate": 9.990756804311446e-05, |
| "loss": 0.0048, |
| "step": 6840 |
| }, |
| { |
| "epoch": 163.0952380952381, |
| "grad_norm": 0.12166065722703934, |
| "learning_rate": 9.990656038255006e-05, |
| "loss": 0.0042, |
| "step": 6850 |
| }, |
| { |
| "epoch": 163.33333333333334, |
| "grad_norm": 0.10722938179969788, |
| "learning_rate": 9.990554726427926e-05, |
| "loss": 0.0049, |
| "step": 6860 |
| }, |
| { |
| "epoch": 163.57142857142858, |
| "grad_norm": 0.12783361971378326, |
| "learning_rate": 9.990452868841284e-05, |
| "loss": 0.0054, |
| "step": 6870 |
| }, |
| { |
| "epoch": 163.8095238095238, |
| "grad_norm": 0.13198988139629364, |
| "learning_rate": 9.99035046550622e-05, |
| "loss": 0.0052, |
| "step": 6880 |
| }, |
| { |
| "epoch": 164.04761904761904, |
| "grad_norm": 0.11582666635513306, |
| "learning_rate": 9.99024751643393e-05, |
| "loss": 0.0052, |
| "step": 6890 |
| }, |
| { |
| "epoch": 164.28571428571428, |
| "grad_norm": 0.11445613950490952, |
| "learning_rate": 9.990144021635677e-05, |
| "loss": 0.0048, |
| "step": 6900 |
| }, |
| { |
| "epoch": 164.52380952380952, |
| "grad_norm": 0.11646226048469543, |
| "learning_rate": 9.990039981122775e-05, |
| "loss": 0.0055, |
| "step": 6910 |
| }, |
| { |
| "epoch": 164.76190476190476, |
| "grad_norm": 0.11447363346815109, |
| "learning_rate": 9.989935394906602e-05, |
| "loss": 0.0051, |
| "step": 6920 |
| }, |
| { |
| "epoch": 165.0, |
| "grad_norm": 0.13199123740196228, |
| "learning_rate": 9.989830262998598e-05, |
| "loss": 0.0063, |
| "step": 6930 |
| }, |
| { |
| "epoch": 165.23809523809524, |
| "grad_norm": 0.1693180799484253, |
| "learning_rate": 9.989724585410259e-05, |
| "loss": 0.0053, |
| "step": 6940 |
| }, |
| { |
| "epoch": 165.47619047619048, |
| "grad_norm": 0.1327705681324005, |
| "learning_rate": 9.989618362153139e-05, |
| "loss": 0.0056, |
| "step": 6950 |
| }, |
| { |
| "epoch": 165.71428571428572, |
| "grad_norm": 0.1536731719970703, |
| "learning_rate": 9.989511593238859e-05, |
| "loss": 0.0049, |
| "step": 6960 |
| }, |
| { |
| "epoch": 165.95238095238096, |
| "grad_norm": 0.16071444749832153, |
| "learning_rate": 9.98940427867909e-05, |
| "loss": 0.0052, |
| "step": 6970 |
| }, |
| { |
| "epoch": 166.1904761904762, |
| "grad_norm": 0.16174905002117157, |
| "learning_rate": 9.989296418485573e-05, |
| "loss": 0.0065, |
| "step": 6980 |
| }, |
| { |
| "epoch": 166.42857142857142, |
| "grad_norm": 0.1563325971364975, |
| "learning_rate": 9.989188012670101e-05, |
| "loss": 0.0052, |
| "step": 6990 |
| }, |
| { |
| "epoch": 166.66666666666666, |
| "grad_norm": 0.1007891446352005, |
| "learning_rate": 9.989079061244528e-05, |
| "loss": 0.0056, |
| "step": 7000 |
| }, |
| { |
| "epoch": 166.9047619047619, |
| "grad_norm": 0.1034979298710823, |
| "learning_rate": 9.988969564220769e-05, |
| "loss": 0.0053, |
| "step": 7010 |
| }, |
| { |
| "epoch": 167.14285714285714, |
| "grad_norm": 0.1117708757519722, |
| "learning_rate": 9.988859521610801e-05, |
| "loss": 0.0053, |
| "step": 7020 |
| }, |
| { |
| "epoch": 167.38095238095238, |
| "grad_norm": 0.18143479526042938, |
| "learning_rate": 9.988748933426656e-05, |
| "loss": 0.0056, |
| "step": 7030 |
| }, |
| { |
| "epoch": 167.61904761904762, |
| "grad_norm": 0.11078419536352158, |
| "learning_rate": 9.988637799680428e-05, |
| "loss": 0.0058, |
| "step": 7040 |
| }, |
| { |
| "epoch": 167.85714285714286, |
| "grad_norm": 0.13102209568023682, |
| "learning_rate": 9.98852612038427e-05, |
| "loss": 0.0048, |
| "step": 7050 |
| }, |
| { |
| "epoch": 168.0952380952381, |
| "grad_norm": 0.09365234524011612, |
| "learning_rate": 9.988413895550397e-05, |
| "loss": 0.0045, |
| "step": 7060 |
| }, |
| { |
| "epoch": 168.33333333333334, |
| "grad_norm": 0.11247110366821289, |
| "learning_rate": 9.98830112519108e-05, |
| "loss": 0.0059, |
| "step": 7070 |
| }, |
| { |
| "epoch": 168.57142857142858, |
| "grad_norm": 0.11538353562355042, |
| "learning_rate": 9.98818780931865e-05, |
| "loss": 0.0048, |
| "step": 7080 |
| }, |
| { |
| "epoch": 168.8095238095238, |
| "grad_norm": 0.15419597923755646, |
| "learning_rate": 9.988073947945502e-05, |
| "loss": 0.005, |
| "step": 7090 |
| }, |
| { |
| "epoch": 169.04761904761904, |
| "grad_norm": 0.08312085270881653, |
| "learning_rate": 9.987959541084087e-05, |
| "loss": 0.0046, |
| "step": 7100 |
| }, |
| { |
| "epoch": 169.28571428571428, |
| "grad_norm": 0.09424073994159698, |
| "learning_rate": 9.987844588746915e-05, |
| "loss": 0.004, |
| "step": 7110 |
| }, |
| { |
| "epoch": 169.52380952380952, |
| "grad_norm": 0.13110065460205078, |
| "learning_rate": 9.987729090946558e-05, |
| "loss": 0.0047, |
| "step": 7120 |
| }, |
| { |
| "epoch": 169.76190476190476, |
| "grad_norm": 0.1349477469921112, |
| "learning_rate": 9.987613047695647e-05, |
| "loss": 0.0049, |
| "step": 7130 |
| }, |
| { |
| "epoch": 170.0, |
| "grad_norm": 0.10708650201559067, |
| "learning_rate": 9.987496459006871e-05, |
| "loss": 0.0046, |
| "step": 7140 |
| }, |
| { |
| "epoch": 170.23809523809524, |
| "grad_norm": 0.12619680166244507, |
| "learning_rate": 9.987379324892982e-05, |
| "loss": 0.0058, |
| "step": 7150 |
| }, |
| { |
| "epoch": 170.47619047619048, |
| "grad_norm": 0.13569281995296478, |
| "learning_rate": 9.987261645366788e-05, |
| "loss": 0.0049, |
| "step": 7160 |
| }, |
| { |
| "epoch": 170.71428571428572, |
| "grad_norm": 0.10620571672916412, |
| "learning_rate": 9.987143420441158e-05, |
| "loss": 0.0048, |
| "step": 7170 |
| }, |
| { |
| "epoch": 170.95238095238096, |
| "grad_norm": 0.15193606913089752, |
| "learning_rate": 9.987024650129022e-05, |
| "loss": 0.0049, |
| "step": 7180 |
| }, |
| { |
| "epoch": 171.1904761904762, |
| "grad_norm": 0.1510898768901825, |
| "learning_rate": 9.986905334443368e-05, |
| "loss": 0.0058, |
| "step": 7190 |
| }, |
| { |
| "epoch": 171.42857142857142, |
| "grad_norm": 0.10097415745258331, |
| "learning_rate": 9.986785473397245e-05, |
| "loss": 0.0049, |
| "step": 7200 |
| }, |
| { |
| "epoch": 171.66666666666666, |
| "grad_norm": 0.11911772936582565, |
| "learning_rate": 9.98666506700376e-05, |
| "loss": 0.0054, |
| "step": 7210 |
| }, |
| { |
| "epoch": 171.9047619047619, |
| "grad_norm": 0.185246542096138, |
| "learning_rate": 9.986544115276081e-05, |
| "loss": 0.0063, |
| "step": 7220 |
| }, |
| { |
| "epoch": 172.14285714285714, |
| "grad_norm": 0.13914185762405396, |
| "learning_rate": 9.986422618227433e-05, |
| "loss": 0.0051, |
| "step": 7230 |
| }, |
| { |
| "epoch": 172.38095238095238, |
| "grad_norm": 0.13030554354190826, |
| "learning_rate": 9.986300575871106e-05, |
| "loss": 0.0056, |
| "step": 7240 |
| }, |
| { |
| "epoch": 172.61904761904762, |
| "grad_norm": 0.14553847908973694, |
| "learning_rate": 9.986177988220444e-05, |
| "loss": 0.0049, |
| "step": 7250 |
| }, |
| { |
| "epoch": 172.85714285714286, |
| "grad_norm": 0.15613652765750885, |
| "learning_rate": 9.986054855288856e-05, |
| "loss": 0.0052, |
| "step": 7260 |
| }, |
| { |
| "epoch": 173.0952380952381, |
| "grad_norm": 0.13465726375579834, |
| "learning_rate": 9.985931177089802e-05, |
| "loss": 0.0057, |
| "step": 7270 |
| }, |
| { |
| "epoch": 173.33333333333334, |
| "grad_norm": 0.13512857258319855, |
| "learning_rate": 9.985806953636814e-05, |
| "loss": 0.0051, |
| "step": 7280 |
| }, |
| { |
| "epoch": 173.57142857142858, |
| "grad_norm": 0.13171744346618652, |
| "learning_rate": 9.985682184943471e-05, |
| "loss": 0.0058, |
| "step": 7290 |
| }, |
| { |
| "epoch": 173.8095238095238, |
| "grad_norm": 0.14279396831989288, |
| "learning_rate": 9.98555687102342e-05, |
| "loss": 0.005, |
| "step": 7300 |
| }, |
| { |
| "epoch": 174.04761904761904, |
| "grad_norm": 0.14104826748371124, |
| "learning_rate": 9.985431011890367e-05, |
| "loss": 0.0055, |
| "step": 7310 |
| }, |
| { |
| "epoch": 174.28571428571428, |
| "grad_norm": 0.1295803189277649, |
| "learning_rate": 9.985304607558075e-05, |
| "loss": 0.0052, |
| "step": 7320 |
| }, |
| { |
| "epoch": 174.52380952380952, |
| "grad_norm": 0.11689195781946182, |
| "learning_rate": 9.985177658040364e-05, |
| "loss": 0.0051, |
| "step": 7330 |
| }, |
| { |
| "epoch": 174.76190476190476, |
| "grad_norm": 0.13540656864643097, |
| "learning_rate": 9.985050163351119e-05, |
| "loss": 0.0051, |
| "step": 7340 |
| }, |
| { |
| "epoch": 175.0, |
| "grad_norm": 0.14846618473529816, |
| "learning_rate": 9.984922123504286e-05, |
| "loss": 0.0044, |
| "step": 7350 |
| }, |
| { |
| "epoch": 175.23809523809524, |
| "grad_norm": 0.0922514796257019, |
| "learning_rate": 9.984793538513862e-05, |
| "loss": 0.0049, |
| "step": 7360 |
| }, |
| { |
| "epoch": 175.47619047619048, |
| "grad_norm": 0.10331092029809952, |
| "learning_rate": 9.984664408393912e-05, |
| "loss": 0.0051, |
| "step": 7370 |
| }, |
| { |
| "epoch": 175.71428571428572, |
| "grad_norm": 0.14094491302967072, |
| "learning_rate": 9.984534733158556e-05, |
| "loss": 0.0052, |
| "step": 7380 |
| }, |
| { |
| "epoch": 175.95238095238096, |
| "grad_norm": 0.12188178300857544, |
| "learning_rate": 9.984404512821977e-05, |
| "loss": 0.0042, |
| "step": 7390 |
| }, |
| { |
| "epoch": 176.1904761904762, |
| "grad_norm": 0.110826775431633, |
| "learning_rate": 9.984273747398411e-05, |
| "loss": 0.0049, |
| "step": 7400 |
| }, |
| { |
| "epoch": 176.42857142857142, |
| "grad_norm": 0.10731463879346848, |
| "learning_rate": 9.984142436902165e-05, |
| "loss": 0.0052, |
| "step": 7410 |
| }, |
| { |
| "epoch": 176.66666666666666, |
| "grad_norm": 0.11551852524280548, |
| "learning_rate": 9.984010581347596e-05, |
| "loss": 0.0042, |
| "step": 7420 |
| }, |
| { |
| "epoch": 176.9047619047619, |
| "grad_norm": 0.10435628890991211, |
| "learning_rate": 9.983878180749121e-05, |
| "loss": 0.0048, |
| "step": 7430 |
| }, |
| { |
| "epoch": 177.14285714285714, |
| "grad_norm": 0.10606574267148972, |
| "learning_rate": 9.983745235121222e-05, |
| "loss": 0.0044, |
| "step": 7440 |
| }, |
| { |
| "epoch": 177.38095238095238, |
| "grad_norm": 0.1655212789773941, |
| "learning_rate": 9.983611744478438e-05, |
| "loss": 0.005, |
| "step": 7450 |
| }, |
| { |
| "epoch": 177.61904761904762, |
| "grad_norm": 0.11433423310518265, |
| "learning_rate": 9.983477708835365e-05, |
| "loss": 0.0051, |
| "step": 7460 |
| }, |
| { |
| "epoch": 177.85714285714286, |
| "grad_norm": 0.11277841031551361, |
| "learning_rate": 9.983343128206664e-05, |
| "loss": 0.0052, |
| "step": 7470 |
| }, |
| { |
| "epoch": 178.0952380952381, |
| "grad_norm": 0.10843323171138763, |
| "learning_rate": 9.983208002607049e-05, |
| "loss": 0.0052, |
| "step": 7480 |
| }, |
| { |
| "epoch": 178.33333333333334, |
| "grad_norm": 0.1280018836259842, |
| "learning_rate": 9.9830723320513e-05, |
| "loss": 0.0052, |
| "step": 7490 |
| }, |
| { |
| "epoch": 178.57142857142858, |
| "grad_norm": 0.09629008919000626, |
| "learning_rate": 9.982936116554254e-05, |
| "loss": 0.0043, |
| "step": 7500 |
| }, |
| { |
| "epoch": 178.8095238095238, |
| "grad_norm": 0.13388240337371826, |
| "learning_rate": 9.982799356130803e-05, |
| "loss": 0.0053, |
| "step": 7510 |
| }, |
| { |
| "epoch": 179.04761904761904, |
| "grad_norm": 0.11860666424036026, |
| "learning_rate": 9.982662050795908e-05, |
| "loss": 0.0052, |
| "step": 7520 |
| }, |
| { |
| "epoch": 179.28571428571428, |
| "grad_norm": 0.1311688870191574, |
| "learning_rate": 9.982524200564583e-05, |
| "loss": 0.0058, |
| "step": 7530 |
| }, |
| { |
| "epoch": 179.52380952380952, |
| "grad_norm": 0.11964283883571625, |
| "learning_rate": 9.982385805451901e-05, |
| "loss": 0.0051, |
| "step": 7540 |
| }, |
| { |
| "epoch": 179.76190476190476, |
| "grad_norm": 0.11575549095869064, |
| "learning_rate": 9.982246865472998e-05, |
| "loss": 0.0046, |
| "step": 7550 |
| }, |
| { |
| "epoch": 180.0, |
| "grad_norm": 0.10654815286397934, |
| "learning_rate": 9.982107380643069e-05, |
| "loss": 0.005, |
| "step": 7560 |
| }, |
| { |
| "epoch": 180.23809523809524, |
| "grad_norm": 0.08941005915403366, |
| "learning_rate": 9.981967350977368e-05, |
| "loss": 0.0054, |
| "step": 7570 |
| }, |
| { |
| "epoch": 180.47619047619048, |
| "grad_norm": 0.13677789270877838, |
| "learning_rate": 9.981826776491208e-05, |
| "loss": 0.0046, |
| "step": 7580 |
| }, |
| { |
| "epoch": 180.71428571428572, |
| "grad_norm": 0.13048696517944336, |
| "learning_rate": 9.98168565719996e-05, |
| "loss": 0.0056, |
| "step": 7590 |
| }, |
| { |
| "epoch": 180.95238095238096, |
| "grad_norm": 0.14826896786689758, |
| "learning_rate": 9.98154399311906e-05, |
| "loss": 0.0052, |
| "step": 7600 |
| }, |
| { |
| "epoch": 181.1904761904762, |
| "grad_norm": 0.13451898097991943, |
| "learning_rate": 9.981401784263997e-05, |
| "loss": 0.0053, |
| "step": 7610 |
| }, |
| { |
| "epoch": 181.42857142857142, |
| "grad_norm": 0.13226094841957092, |
| "learning_rate": 9.981259030650326e-05, |
| "loss": 0.0045, |
| "step": 7620 |
| }, |
| { |
| "epoch": 181.66666666666666, |
| "grad_norm": 0.1210789829492569, |
| "learning_rate": 9.981115732293655e-05, |
| "loss": 0.0046, |
| "step": 7630 |
| }, |
| { |
| "epoch": 181.9047619047619, |
| "grad_norm": 0.16755466163158417, |
| "learning_rate": 9.980971889209659e-05, |
| "loss": 0.0051, |
| "step": 7640 |
| }, |
| { |
| "epoch": 182.14285714285714, |
| "grad_norm": 0.16387924551963806, |
| "learning_rate": 9.980827501414064e-05, |
| "loss": 0.0052, |
| "step": 7650 |
| }, |
| { |
| "epoch": 182.38095238095238, |
| "grad_norm": 0.15185366570949554, |
| "learning_rate": 9.980682568922663e-05, |
| "loss": 0.0048, |
| "step": 7660 |
| }, |
| { |
| "epoch": 182.61904761904762, |
| "grad_norm": 0.18847574293613434, |
| "learning_rate": 9.980537091751304e-05, |
| "loss": 0.005, |
| "step": 7670 |
| }, |
| { |
| "epoch": 182.85714285714286, |
| "grad_norm": 0.16882918775081635, |
| "learning_rate": 9.980391069915897e-05, |
| "loss": 0.0052, |
| "step": 7680 |
| }, |
| { |
| "epoch": 183.0952380952381, |
| "grad_norm": 0.16836272180080414, |
| "learning_rate": 9.98024450343241e-05, |
| "loss": 0.0058, |
| "step": 7690 |
| }, |
| { |
| "epoch": 183.33333333333334, |
| "grad_norm": 0.17322920262813568, |
| "learning_rate": 9.980097392316872e-05, |
| "loss": 0.0046, |
| "step": 7700 |
| }, |
| { |
| "epoch": 183.57142857142858, |
| "grad_norm": 0.14725998044013977, |
| "learning_rate": 9.97994973658537e-05, |
| "loss": 0.0047, |
| "step": 7710 |
| }, |
| { |
| "epoch": 183.8095238095238, |
| "grad_norm": 0.13644789159297943, |
| "learning_rate": 9.979801536254054e-05, |
| "loss": 0.0043, |
| "step": 7720 |
| }, |
| { |
| "epoch": 184.04761904761904, |
| "grad_norm": 0.12822939455509186, |
| "learning_rate": 9.979652791339127e-05, |
| "loss": 0.0055, |
| "step": 7730 |
| }, |
| { |
| "epoch": 184.28571428571428, |
| "grad_norm": 0.16900651156902313, |
| "learning_rate": 9.97950350185686e-05, |
| "loss": 0.0059, |
| "step": 7740 |
| }, |
| { |
| "epoch": 184.52380952380952, |
| "grad_norm": 0.15225398540496826, |
| "learning_rate": 9.979353667823574e-05, |
| "loss": 0.0046, |
| "step": 7750 |
| }, |
| { |
| "epoch": 184.76190476190476, |
| "grad_norm": 0.15072883665561676, |
| "learning_rate": 9.979203289255658e-05, |
| "loss": 0.005, |
| "step": 7760 |
| }, |
| { |
| "epoch": 185.0, |
| "grad_norm": 0.14301857352256775, |
| "learning_rate": 9.979052366169557e-05, |
| "loss": 0.0054, |
| "step": 7770 |
| }, |
| { |
| "epoch": 185.23809523809524, |
| "grad_norm": 0.1323252022266388, |
| "learning_rate": 9.978900898581775e-05, |
| "loss": 0.0053, |
| "step": 7780 |
| }, |
| { |
| "epoch": 185.47619047619048, |
| "grad_norm": 0.11926697939634323, |
| "learning_rate": 9.978748886508875e-05, |
| "loss": 0.0046, |
| "step": 7790 |
| }, |
| { |
| "epoch": 185.71428571428572, |
| "grad_norm": 0.12691761553287506, |
| "learning_rate": 9.978596329967484e-05, |
| "loss": 0.0053, |
| "step": 7800 |
| }, |
| { |
| "epoch": 185.95238095238096, |
| "grad_norm": 0.12127958983182907, |
| "learning_rate": 9.978443228974284e-05, |
| "loss": 0.0044, |
| "step": 7810 |
| }, |
| { |
| "epoch": 186.1904761904762, |
| "grad_norm": 0.10053806751966476, |
| "learning_rate": 9.978289583546015e-05, |
| "loss": 0.0048, |
| "step": 7820 |
| }, |
| { |
| "epoch": 186.42857142857142, |
| "grad_norm": 0.11178812384605408, |
| "learning_rate": 9.978135393699484e-05, |
| "loss": 0.0046, |
| "step": 7830 |
| }, |
| { |
| "epoch": 186.66666666666666, |
| "grad_norm": 0.13410204648971558, |
| "learning_rate": 9.977980659451548e-05, |
| "loss": 0.0051, |
| "step": 7840 |
| }, |
| { |
| "epoch": 186.9047619047619, |
| "grad_norm": 0.13263118267059326, |
| "learning_rate": 9.977825380819135e-05, |
| "loss": 0.0051, |
| "step": 7850 |
| }, |
| { |
| "epoch": 187.14285714285714, |
| "grad_norm": 0.11616531759500504, |
| "learning_rate": 9.97766955781922e-05, |
| "loss": 0.0052, |
| "step": 7860 |
| }, |
| { |
| "epoch": 187.38095238095238, |
| "grad_norm": 0.13269078731536865, |
| "learning_rate": 9.977513190468848e-05, |
| "loss": 0.0046, |
| "step": 7870 |
| }, |
| { |
| "epoch": 187.61904761904762, |
| "grad_norm": 0.10913559049367905, |
| "learning_rate": 9.977356278785116e-05, |
| "loss": 0.0048, |
| "step": 7880 |
| }, |
| { |
| "epoch": 187.85714285714286, |
| "grad_norm": 0.1052507758140564, |
| "learning_rate": 9.977198822785184e-05, |
| "loss": 0.0048, |
| "step": 7890 |
| }, |
| { |
| "epoch": 188.0952380952381, |
| "grad_norm": 0.08623684197664261, |
| "learning_rate": 9.977040822486273e-05, |
| "loss": 0.0044, |
| "step": 7900 |
| }, |
| { |
| "epoch": 188.33333333333334, |
| "grad_norm": 0.1427624374628067, |
| "learning_rate": 9.97688227790566e-05, |
| "loss": 0.0049, |
| "step": 7910 |
| }, |
| { |
| "epoch": 188.57142857142858, |
| "grad_norm": 0.1597541868686676, |
| "learning_rate": 9.976723189060684e-05, |
| "loss": 0.0047, |
| "step": 7920 |
| }, |
| { |
| "epoch": 188.8095238095238, |
| "grad_norm": 0.10810401290655136, |
| "learning_rate": 9.976563555968742e-05, |
| "loss": 0.0046, |
| "step": 7930 |
| }, |
| { |
| "epoch": 189.04761904761904, |
| "grad_norm": 0.09952914714813232, |
| "learning_rate": 9.976403378647292e-05, |
| "loss": 0.0051, |
| "step": 7940 |
| }, |
| { |
| "epoch": 189.28571428571428, |
| "grad_norm": 0.09158805757761002, |
| "learning_rate": 9.97624265711385e-05, |
| "loss": 0.0054, |
| "step": 7950 |
| }, |
| { |
| "epoch": 189.52380952380952, |
| "grad_norm": 0.09442733228206635, |
| "learning_rate": 9.976081391385993e-05, |
| "loss": 0.0054, |
| "step": 7960 |
| }, |
| { |
| "epoch": 189.76190476190476, |
| "grad_norm": 0.12446358799934387, |
| "learning_rate": 9.975919581481356e-05, |
| "loss": 0.0053, |
| "step": 7970 |
| }, |
| { |
| "epoch": 190.0, |
| "grad_norm": 0.11789149791002274, |
| "learning_rate": 9.975757227417634e-05, |
| "loss": 0.0048, |
| "step": 7980 |
| }, |
| { |
| "epoch": 190.23809523809524, |
| "grad_norm": 0.11199343949556351, |
| "learning_rate": 9.975594329212586e-05, |
| "loss": 0.0053, |
| "step": 7990 |
| }, |
| { |
| "epoch": 190.47619047619048, |
| "grad_norm": 0.09516771882772446, |
| "learning_rate": 9.97543088688402e-05, |
| "loss": 0.005, |
| "step": 8000 |
| }, |
| { |
| "epoch": 190.71428571428572, |
| "grad_norm": 0.09163705259561539, |
| "learning_rate": 9.975266900449814e-05, |
| "loss": 0.0057, |
| "step": 8010 |
| }, |
| { |
| "epoch": 190.95238095238096, |
| "grad_norm": 0.09514744579792023, |
| "learning_rate": 9.975102369927898e-05, |
| "loss": 0.0046, |
| "step": 8020 |
| }, |
| { |
| "epoch": 191.1904761904762, |
| "grad_norm": 0.12602128088474274, |
| "learning_rate": 9.974937295336269e-05, |
| "loss": 0.005, |
| "step": 8030 |
| }, |
| { |
| "epoch": 191.42857142857142, |
| "grad_norm": 0.14039188623428345, |
| "learning_rate": 9.974771676692975e-05, |
| "loss": 0.005, |
| "step": 8040 |
| }, |
| { |
| "epoch": 191.66666666666666, |
| "grad_norm": 0.09004565328359604, |
| "learning_rate": 9.974605514016131e-05, |
| "loss": 0.0045, |
| "step": 8050 |
| }, |
| { |
| "epoch": 191.9047619047619, |
| "grad_norm": 0.07240217924118042, |
| "learning_rate": 9.974438807323907e-05, |
| "loss": 0.0044, |
| "step": 8060 |
| }, |
| { |
| "epoch": 192.14285714285714, |
| "grad_norm": 0.0770246759057045, |
| "learning_rate": 9.974271556634535e-05, |
| "loss": 0.0041, |
| "step": 8070 |
| }, |
| { |
| "epoch": 192.38095238095238, |
| "grad_norm": 0.13077087700366974, |
| "learning_rate": 9.974103761966302e-05, |
| "loss": 0.005, |
| "step": 8080 |
| }, |
| { |
| "epoch": 192.61904761904762, |
| "grad_norm": 0.10337863862514496, |
| "learning_rate": 9.973935423337563e-05, |
| "loss": 0.0047, |
| "step": 8090 |
| }, |
| { |
| "epoch": 192.85714285714286, |
| "grad_norm": 0.1273806095123291, |
| "learning_rate": 9.973766540766722e-05, |
| "loss": 0.0047, |
| "step": 8100 |
| }, |
| { |
| "epoch": 193.0952380952381, |
| "grad_norm": 0.09903520345687866, |
| "learning_rate": 9.97359711427225e-05, |
| "loss": 0.0057, |
| "step": 8110 |
| }, |
| { |
| "epoch": 193.33333333333334, |
| "grad_norm": 0.07901766896247864, |
| "learning_rate": 9.973427143872677e-05, |
| "loss": 0.0043, |
| "step": 8120 |
| }, |
| { |
| "epoch": 193.57142857142858, |
| "grad_norm": 0.1086260974407196, |
| "learning_rate": 9.973256629586589e-05, |
| "loss": 0.0051, |
| "step": 8130 |
| }, |
| { |
| "epoch": 193.8095238095238, |
| "grad_norm": 0.14257000386714935, |
| "learning_rate": 9.973085571432632e-05, |
| "loss": 0.0053, |
| "step": 8140 |
| }, |
| { |
| "epoch": 194.04761904761904, |
| "grad_norm": 0.11573798954486847, |
| "learning_rate": 9.972913969429513e-05, |
| "loss": 0.0052, |
| "step": 8150 |
| }, |
| { |
| "epoch": 194.28571428571428, |
| "grad_norm": 0.15264742076396942, |
| "learning_rate": 9.972741823596e-05, |
| "loss": 0.0048, |
| "step": 8160 |
| }, |
| { |
| "epoch": 194.52380952380952, |
| "grad_norm": 0.12986569106578827, |
| "learning_rate": 9.972569133950917e-05, |
| "loss": 0.0047, |
| "step": 8170 |
| }, |
| { |
| "epoch": 194.76190476190476, |
| "grad_norm": 0.13304449617862701, |
| "learning_rate": 9.972395900513151e-05, |
| "loss": 0.0047, |
| "step": 8180 |
| }, |
| { |
| "epoch": 195.0, |
| "grad_norm": 0.11462575197219849, |
| "learning_rate": 9.972222123301645e-05, |
| "loss": 0.005, |
| "step": 8190 |
| }, |
| { |
| "epoch": 195.23809523809524, |
| "grad_norm": 0.09287209808826447, |
| "learning_rate": 9.972047802335403e-05, |
| "loss": 0.0051, |
| "step": 8200 |
| }, |
| { |
| "epoch": 195.47619047619048, |
| "grad_norm": 0.1014489009976387, |
| "learning_rate": 9.971872937633488e-05, |
| "loss": 0.0046, |
| "step": 8210 |
| }, |
| { |
| "epoch": 195.71428571428572, |
| "grad_norm": 0.13084720075130463, |
| "learning_rate": 9.971697529215024e-05, |
| "loss": 0.005, |
| "step": 8220 |
| }, |
| { |
| "epoch": 195.95238095238096, |
| "grad_norm": 0.11124468594789505, |
| "learning_rate": 9.971521577099192e-05, |
| "loss": 0.0049, |
| "step": 8230 |
| }, |
| { |
| "epoch": 196.1904761904762, |
| "grad_norm": 0.14627546072006226, |
| "learning_rate": 9.971345081305236e-05, |
| "loss": 0.0049, |
| "step": 8240 |
| }, |
| { |
| "epoch": 196.42857142857142, |
| "grad_norm": 0.1390276402235031, |
| "learning_rate": 9.971168041852456e-05, |
| "loss": 0.0048, |
| "step": 8250 |
| }, |
| { |
| "epoch": 196.66666666666666, |
| "grad_norm": 0.1452191025018692, |
| "learning_rate": 9.970990458760215e-05, |
| "loss": 0.0042, |
| "step": 8260 |
| }, |
| { |
| "epoch": 196.9047619047619, |
| "grad_norm": 0.18676665425300598, |
| "learning_rate": 9.970812332047929e-05, |
| "loss": 0.0043, |
| "step": 8270 |
| }, |
| { |
| "epoch": 197.14285714285714, |
| "grad_norm": 0.1455295979976654, |
| "learning_rate": 9.97063366173508e-05, |
| "loss": 0.0042, |
| "step": 8280 |
| }, |
| { |
| "epoch": 197.38095238095238, |
| "grad_norm": 0.09625612944364548, |
| "learning_rate": 9.970454447841207e-05, |
| "loss": 0.0045, |
| "step": 8290 |
| }, |
| { |
| "epoch": 197.61904761904762, |
| "grad_norm": 0.15387007594108582, |
| "learning_rate": 9.970274690385909e-05, |
| "loss": 0.0048, |
| "step": 8300 |
| }, |
| { |
| "epoch": 197.85714285714286, |
| "grad_norm": 0.20444431900978088, |
| "learning_rate": 9.970094389388844e-05, |
| "loss": 0.0053, |
| "step": 8310 |
| }, |
| { |
| "epoch": 198.0952380952381, |
| "grad_norm": 0.16849547624588013, |
| "learning_rate": 9.969913544869728e-05, |
| "loss": 0.005, |
| "step": 8320 |
| }, |
| { |
| "epoch": 198.33333333333334, |
| "grad_norm": 0.12188586592674255, |
| "learning_rate": 9.96973215684834e-05, |
| "loss": 0.0052, |
| "step": 8330 |
| }, |
| { |
| "epoch": 198.57142857142858, |
| "grad_norm": 0.15573500096797943, |
| "learning_rate": 9.969550225344513e-05, |
| "loss": 0.0056, |
| "step": 8340 |
| }, |
| { |
| "epoch": 198.8095238095238, |
| "grad_norm": 0.18879929184913635, |
| "learning_rate": 9.969367750378147e-05, |
| "loss": 0.0041, |
| "step": 8350 |
| }, |
| { |
| "epoch": 199.04761904761904, |
| "grad_norm": 0.12765611708164215, |
| "learning_rate": 9.969184731969194e-05, |
| "loss": 0.0057, |
| "step": 8360 |
| }, |
| { |
| "epoch": 199.28571428571428, |
| "grad_norm": 0.15674729645252228, |
| "learning_rate": 9.96900117013767e-05, |
| "loss": 0.0052, |
| "step": 8370 |
| }, |
| { |
| "epoch": 199.52380952380952, |
| "grad_norm": 0.14586085081100464, |
| "learning_rate": 9.96881706490365e-05, |
| "loss": 0.005, |
| "step": 8380 |
| }, |
| { |
| "epoch": 199.76190476190476, |
| "grad_norm": 0.15121574699878693, |
| "learning_rate": 9.968632416287265e-05, |
| "loss": 0.0049, |
| "step": 8390 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 0.12582926452159882, |
| "learning_rate": 9.96844722430871e-05, |
| "loss": 0.0043, |
| "step": 8400 |
| }, |
| { |
| "epoch": 200.23809523809524, |
| "grad_norm": 0.07240427285432816, |
| "learning_rate": 9.968261488988235e-05, |
| "loss": 0.0054, |
| "step": 8410 |
| }, |
| { |
| "epoch": 200.47619047619048, |
| "grad_norm": 0.09521763026714325, |
| "learning_rate": 9.968075210346155e-05, |
| "loss": 0.0045, |
| "step": 8420 |
| }, |
| { |
| "epoch": 200.71428571428572, |
| "grad_norm": 0.09327897429466248, |
| "learning_rate": 9.967888388402839e-05, |
| "loss": 0.0051, |
| "step": 8430 |
| }, |
| { |
| "epoch": 200.95238095238096, |
| "grad_norm": 0.08536362648010254, |
| "learning_rate": 9.967701023178717e-05, |
| "loss": 0.0046, |
| "step": 8440 |
| }, |
| { |
| "epoch": 201.1904761904762, |
| "grad_norm": 0.09649348258972168, |
| "learning_rate": 9.967513114694282e-05, |
| "loss": 0.0047, |
| "step": 8450 |
| }, |
| { |
| "epoch": 201.42857142857142, |
| "grad_norm": 0.09735965728759766, |
| "learning_rate": 9.967324662970079e-05, |
| "loss": 0.0044, |
| "step": 8460 |
| }, |
| { |
| "epoch": 201.66666666666666, |
| "grad_norm": 0.08427132666110992, |
| "learning_rate": 9.96713566802672e-05, |
| "loss": 0.0046, |
| "step": 8470 |
| }, |
| { |
| "epoch": 201.9047619047619, |
| "grad_norm": 0.07609061151742935, |
| "learning_rate": 9.966946129884873e-05, |
| "loss": 0.0046, |
| "step": 8480 |
| }, |
| { |
| "epoch": 202.14285714285714, |
| "grad_norm": 0.09958251565694809, |
| "learning_rate": 9.966756048565265e-05, |
| "loss": 0.0049, |
| "step": 8490 |
| }, |
| { |
| "epoch": 202.38095238095238, |
| "grad_norm": 0.08163908123970032, |
| "learning_rate": 9.966565424088681e-05, |
| "loss": 0.0041, |
| "step": 8500 |
| }, |
| { |
| "epoch": 202.61904761904762, |
| "grad_norm": 0.10526051372289658, |
| "learning_rate": 9.96637425647597e-05, |
| "loss": 0.0046, |
| "step": 8510 |
| }, |
| { |
| "epoch": 202.85714285714286, |
| "grad_norm": 0.10554583370685577, |
| "learning_rate": 9.966182545748038e-05, |
| "loss": 0.0045, |
| "step": 8520 |
| }, |
| { |
| "epoch": 203.0952380952381, |
| "grad_norm": 0.12574857473373413, |
| "learning_rate": 9.96599029192585e-05, |
| "loss": 0.0046, |
| "step": 8530 |
| }, |
| { |
| "epoch": 203.33333333333334, |
| "grad_norm": 0.13355091214179993, |
| "learning_rate": 9.965797495030428e-05, |
| "loss": 0.0045, |
| "step": 8540 |
| }, |
| { |
| "epoch": 203.57142857142858, |
| "grad_norm": 0.1170630231499672, |
| "learning_rate": 9.96560415508286e-05, |
| "loss": 0.0051, |
| "step": 8550 |
| }, |
| { |
| "epoch": 203.8095238095238, |
| "grad_norm": 0.1267741471529007, |
| "learning_rate": 9.965410272104286e-05, |
| "loss": 0.0044, |
| "step": 8560 |
| }, |
| { |
| "epoch": 204.04761904761904, |
| "grad_norm": 0.1520925909280777, |
| "learning_rate": 9.96521584611591e-05, |
| "loss": 0.0048, |
| "step": 8570 |
| }, |
| { |
| "epoch": 204.28571428571428, |
| "grad_norm": 0.12087897211313248, |
| "learning_rate": 9.965020877138994e-05, |
| "loss": 0.0055, |
| "step": 8580 |
| }, |
| { |
| "epoch": 204.52380952380952, |
| "grad_norm": 0.12927661836147308, |
| "learning_rate": 9.964825365194861e-05, |
| "loss": 0.006, |
| "step": 8590 |
| }, |
| { |
| "epoch": 204.76190476190476, |
| "grad_norm": 0.16474094986915588, |
| "learning_rate": 9.96462931030489e-05, |
| "loss": 0.005, |
| "step": 8600 |
| }, |
| { |
| "epoch": 205.0, |
| "grad_norm": 0.14384043216705322, |
| "learning_rate": 9.96443271249052e-05, |
| "loss": 0.0052, |
| "step": 8610 |
| }, |
| { |
| "epoch": 205.23809523809524, |
| "grad_norm": 0.1830437183380127, |
| "learning_rate": 9.964235571773255e-05, |
| "loss": 0.0046, |
| "step": 8620 |
| }, |
| { |
| "epoch": 205.47619047619048, |
| "grad_norm": 0.11652304977178574, |
| "learning_rate": 9.96403788817465e-05, |
| "loss": 0.005, |
| "step": 8630 |
| }, |
| { |
| "epoch": 205.71428571428572, |
| "grad_norm": 0.14895133674144745, |
| "learning_rate": 9.963839661716325e-05, |
| "loss": 0.0058, |
| "step": 8640 |
| }, |
| { |
| "epoch": 205.95238095238096, |
| "grad_norm": 0.17469000816345215, |
| "learning_rate": 9.963640892419958e-05, |
| "loss": 0.0054, |
| "step": 8650 |
| }, |
| { |
| "epoch": 206.1904761904762, |
| "grad_norm": 0.1631147414445877, |
| "learning_rate": 9.963441580307286e-05, |
| "loss": 0.0043, |
| "step": 8660 |
| }, |
| { |
| "epoch": 206.42857142857142, |
| "grad_norm": 0.11974041163921356, |
| "learning_rate": 9.963241725400104e-05, |
| "loss": 0.0043, |
| "step": 8670 |
| }, |
| { |
| "epoch": 206.66666666666666, |
| "grad_norm": 0.11687395721673965, |
| "learning_rate": 9.963041327720271e-05, |
| "loss": 0.0039, |
| "step": 8680 |
| }, |
| { |
| "epoch": 206.9047619047619, |
| "grad_norm": 0.0925244614481926, |
| "learning_rate": 9.962840387289697e-05, |
| "loss": 0.0054, |
| "step": 8690 |
| }, |
| { |
| "epoch": 207.14285714285714, |
| "grad_norm": 0.12921832501888275, |
| "learning_rate": 9.962638904130363e-05, |
| "loss": 0.0044, |
| "step": 8700 |
| }, |
| { |
| "epoch": 207.38095238095238, |
| "grad_norm": 0.11292935162782669, |
| "learning_rate": 9.962436878264298e-05, |
| "loss": 0.0047, |
| "step": 8710 |
| }, |
| { |
| "epoch": 207.61904761904762, |
| "grad_norm": 0.09950345754623413, |
| "learning_rate": 9.962234309713598e-05, |
| "loss": 0.0046, |
| "step": 8720 |
| }, |
| { |
| "epoch": 207.85714285714286, |
| "grad_norm": 0.10734084993600845, |
| "learning_rate": 9.962031198500414e-05, |
| "loss": 0.0044, |
| "step": 8730 |
| }, |
| { |
| "epoch": 208.0952380952381, |
| "grad_norm": 0.12951508164405823, |
| "learning_rate": 9.961827544646958e-05, |
| "loss": 0.0043, |
| "step": 8740 |
| }, |
| { |
| "epoch": 208.33333333333334, |
| "grad_norm": 0.13186244666576385, |
| "learning_rate": 9.961623348175501e-05, |
| "loss": 0.0047, |
| "step": 8750 |
| }, |
| { |
| "epoch": 208.57142857142858, |
| "grad_norm": 0.08756158500909805, |
| "learning_rate": 9.961418609108377e-05, |
| "loss": 0.0047, |
| "step": 8760 |
| }, |
| { |
| "epoch": 208.8095238095238, |
| "grad_norm": 0.07815192639827728, |
| "learning_rate": 9.961213327467971e-05, |
| "loss": 0.0046, |
| "step": 8770 |
| }, |
| { |
| "epoch": 209.04761904761904, |
| "grad_norm": 0.13698315620422363, |
| "learning_rate": 9.961007503276736e-05, |
| "loss": 0.0044, |
| "step": 8780 |
| }, |
| { |
| "epoch": 209.28571428571428, |
| "grad_norm": 0.11950427293777466, |
| "learning_rate": 9.960801136557179e-05, |
| "loss": 0.0039, |
| "step": 8790 |
| }, |
| { |
| "epoch": 209.52380952380952, |
| "grad_norm": 0.0999923050403595, |
| "learning_rate": 9.960594227331866e-05, |
| "loss": 0.0037, |
| "step": 8800 |
| }, |
| { |
| "epoch": 209.76190476190476, |
| "grad_norm": 0.09262796491384506, |
| "learning_rate": 9.960386775623429e-05, |
| "loss": 0.0047, |
| "step": 8810 |
| }, |
| { |
| "epoch": 210.0, |
| "grad_norm": 0.11925182491540909, |
| "learning_rate": 9.96017878145455e-05, |
| "loss": 0.0041, |
| "step": 8820 |
| }, |
| { |
| "epoch": 210.23809523809524, |
| "grad_norm": 0.10378900170326233, |
| "learning_rate": 9.959970244847977e-05, |
| "loss": 0.0042, |
| "step": 8830 |
| }, |
| { |
| "epoch": 210.47619047619048, |
| "grad_norm": 0.1266898810863495, |
| "learning_rate": 9.959761165826518e-05, |
| "loss": 0.0048, |
| "step": 8840 |
| }, |
| { |
| "epoch": 210.71428571428572, |
| "grad_norm": 0.1144866868853569, |
| "learning_rate": 9.959551544413033e-05, |
| "loss": 0.0049, |
| "step": 8850 |
| }, |
| { |
| "epoch": 210.95238095238096, |
| "grad_norm": 0.13823789358139038, |
| "learning_rate": 9.959341380630448e-05, |
| "loss": 0.005, |
| "step": 8860 |
| }, |
| { |
| "epoch": 211.1904761904762, |
| "grad_norm": 0.09835786372423172, |
| "learning_rate": 9.959130674501746e-05, |
| "loss": 0.0044, |
| "step": 8870 |
| }, |
| { |
| "epoch": 211.42857142857142, |
| "grad_norm": 0.11006879806518555, |
| "learning_rate": 9.958919426049968e-05, |
| "loss": 0.0047, |
| "step": 8880 |
| }, |
| { |
| "epoch": 211.66666666666666, |
| "grad_norm": 0.12629656493663788, |
| "learning_rate": 9.958707635298219e-05, |
| "loss": 0.0047, |
| "step": 8890 |
| }, |
| { |
| "epoch": 211.9047619047619, |
| "grad_norm": 0.11541087925434113, |
| "learning_rate": 9.958495302269657e-05, |
| "loss": 0.0044, |
| "step": 8900 |
| }, |
| { |
| "epoch": 212.14285714285714, |
| "grad_norm": 0.09854788333177567, |
| "learning_rate": 9.958282426987503e-05, |
| "loss": 0.0042, |
| "step": 8910 |
| }, |
| { |
| "epoch": 212.38095238095238, |
| "grad_norm": 0.10035550594329834, |
| "learning_rate": 9.95806900947504e-05, |
| "loss": 0.0044, |
| "step": 8920 |
| }, |
| { |
| "epoch": 212.61904761904762, |
| "grad_norm": 0.09333580732345581, |
| "learning_rate": 9.957855049755604e-05, |
| "loss": 0.0045, |
| "step": 8930 |
| }, |
| { |
| "epoch": 212.85714285714286, |
| "grad_norm": 0.1039256900548935, |
| "learning_rate": 9.957640547852593e-05, |
| "loss": 0.0041, |
| "step": 8940 |
| }, |
| { |
| "epoch": 213.0952380952381, |
| "grad_norm": 0.1161140501499176, |
| "learning_rate": 9.957425503789466e-05, |
| "loss": 0.0039, |
| "step": 8950 |
| }, |
| { |
| "epoch": 213.33333333333334, |
| "grad_norm": 0.09973783791065216, |
| "learning_rate": 9.957209917589738e-05, |
| "loss": 0.0045, |
| "step": 8960 |
| }, |
| { |
| "epoch": 213.57142857142858, |
| "grad_norm": 0.0889749601483345, |
| "learning_rate": 9.956993789276987e-05, |
| "loss": 0.0043, |
| "step": 8970 |
| }, |
| { |
| "epoch": 213.8095238095238, |
| "grad_norm": 0.11952216178178787, |
| "learning_rate": 9.956777118874847e-05, |
| "loss": 0.0048, |
| "step": 8980 |
| }, |
| { |
| "epoch": 214.04761904761904, |
| "grad_norm": 0.10549954324960709, |
| "learning_rate": 9.956559906407016e-05, |
| "loss": 0.0042, |
| "step": 8990 |
| }, |
| { |
| "epoch": 214.28571428571428, |
| "grad_norm": 0.12812094390392303, |
| "learning_rate": 9.956342151897245e-05, |
| "loss": 0.0054, |
| "step": 9000 |
| }, |
| { |
| "epoch": 214.52380952380952, |
| "grad_norm": 0.08854644000530243, |
| "learning_rate": 9.956123855369346e-05, |
| "loss": 0.0039, |
| "step": 9010 |
| }, |
| { |
| "epoch": 214.76190476190476, |
| "grad_norm": 0.09350213408470154, |
| "learning_rate": 9.955905016847196e-05, |
| "loss": 0.0045, |
| "step": 9020 |
| }, |
| { |
| "epoch": 215.0, |
| "grad_norm": 0.10262349247932434, |
| "learning_rate": 9.955685636354723e-05, |
| "loss": 0.0041, |
| "step": 9030 |
| }, |
| { |
| "epoch": 215.23809523809524, |
| "grad_norm": 0.13133123517036438, |
| "learning_rate": 9.95546571391592e-05, |
| "loss": 0.0039, |
| "step": 9040 |
| }, |
| { |
| "epoch": 215.47619047619048, |
| "grad_norm": 0.15965388715267181, |
| "learning_rate": 9.955245249554837e-05, |
| "loss": 0.0045, |
| "step": 9050 |
| }, |
| { |
| "epoch": 215.71428571428572, |
| "grad_norm": 0.11264082789421082, |
| "learning_rate": 9.955024243295582e-05, |
| "loss": 0.0051, |
| "step": 9060 |
| }, |
| { |
| "epoch": 215.95238095238096, |
| "grad_norm": 0.14090965688228607, |
| "learning_rate": 9.954802695162328e-05, |
| "loss": 0.0048, |
| "step": 9070 |
| }, |
| { |
| "epoch": 216.1904761904762, |
| "grad_norm": 0.11634740978479385, |
| "learning_rate": 9.954580605179302e-05, |
| "loss": 0.0043, |
| "step": 9080 |
| }, |
| { |
| "epoch": 216.42857142857142, |
| "grad_norm": 0.09784486889839172, |
| "learning_rate": 9.954357973370788e-05, |
| "loss": 0.0043, |
| "step": 9090 |
| }, |
| { |
| "epoch": 216.66666666666666, |
| "grad_norm": 0.1520427167415619, |
| "learning_rate": 9.954134799761135e-05, |
| "loss": 0.006, |
| "step": 9100 |
| }, |
| { |
| "epoch": 216.9047619047619, |
| "grad_norm": 0.1524430215358734, |
| "learning_rate": 9.953911084374748e-05, |
| "loss": 0.0045, |
| "step": 9110 |
| }, |
| { |
| "epoch": 217.14285714285714, |
| "grad_norm": 0.14002160727977753, |
| "learning_rate": 9.953686827236093e-05, |
| "loss": 0.0044, |
| "step": 9120 |
| }, |
| { |
| "epoch": 217.38095238095238, |
| "grad_norm": 0.10343852639198303, |
| "learning_rate": 9.953462028369695e-05, |
| "loss": 0.0047, |
| "step": 9130 |
| }, |
| { |
| "epoch": 217.61904761904762, |
| "grad_norm": 0.11479546129703522, |
| "learning_rate": 9.953236687800136e-05, |
| "loss": 0.0045, |
| "step": 9140 |
| }, |
| { |
| "epoch": 217.85714285714286, |
| "grad_norm": 0.10094081610441208, |
| "learning_rate": 9.95301080555206e-05, |
| "loss": 0.0043, |
| "step": 9150 |
| }, |
| { |
| "epoch": 218.0952380952381, |
| "grad_norm": 0.07680132240056992, |
| "learning_rate": 9.952784381650171e-05, |
| "loss": 0.005, |
| "step": 9160 |
| }, |
| { |
| "epoch": 218.33333333333334, |
| "grad_norm": 0.10795367509126663, |
| "learning_rate": 9.952557416119226e-05, |
| "loss": 0.0041, |
| "step": 9170 |
| }, |
| { |
| "epoch": 218.57142857142858, |
| "grad_norm": 0.0853162407875061, |
| "learning_rate": 9.95232990898405e-05, |
| "loss": 0.0042, |
| "step": 9180 |
| }, |
| { |
| "epoch": 218.8095238095238, |
| "grad_norm": 0.09752203524112701, |
| "learning_rate": 9.95210186026952e-05, |
| "loss": 0.0042, |
| "step": 9190 |
| }, |
| { |
| "epoch": 219.04761904761904, |
| "grad_norm": 0.10427052527666092, |
| "learning_rate": 9.951873270000576e-05, |
| "loss": 0.0043, |
| "step": 9200 |
| }, |
| { |
| "epoch": 219.28571428571428, |
| "grad_norm": 0.07858555018901825, |
| "learning_rate": 9.951644138202216e-05, |
| "loss": 0.0047, |
| "step": 9210 |
| }, |
| { |
| "epoch": 219.52380952380952, |
| "grad_norm": 0.10338760167360306, |
| "learning_rate": 9.951414464899498e-05, |
| "loss": 0.0043, |
| "step": 9220 |
| }, |
| { |
| "epoch": 219.76190476190476, |
| "grad_norm": 0.1011209487915039, |
| "learning_rate": 9.951184250117538e-05, |
| "loss": 0.0046, |
| "step": 9230 |
| }, |
| { |
| "epoch": 220.0, |
| "grad_norm": 0.13130028545856476, |
| "learning_rate": 9.950953493881513e-05, |
| "loss": 0.0045, |
| "step": 9240 |
| }, |
| { |
| "epoch": 220.23809523809524, |
| "grad_norm": 0.09064214676618576, |
| "learning_rate": 9.950722196216658e-05, |
| "loss": 0.0043, |
| "step": 9250 |
| }, |
| { |
| "epoch": 220.47619047619048, |
| "grad_norm": 0.13138924539089203, |
| "learning_rate": 9.950490357148265e-05, |
| "loss": 0.0048, |
| "step": 9260 |
| }, |
| { |
| "epoch": 220.71428571428572, |
| "grad_norm": 0.13622823357582092, |
| "learning_rate": 9.950257976701692e-05, |
| "loss": 0.0048, |
| "step": 9270 |
| }, |
| { |
| "epoch": 220.95238095238096, |
| "grad_norm": 0.12317976355552673, |
| "learning_rate": 9.950025054902348e-05, |
| "loss": 0.0042, |
| "step": 9280 |
| }, |
| { |
| "epoch": 221.1904761904762, |
| "grad_norm": 0.1051967442035675, |
| "learning_rate": 9.949791591775706e-05, |
| "loss": 0.0042, |
| "step": 9290 |
| }, |
| { |
| "epoch": 221.42857142857142, |
| "grad_norm": 0.11493319272994995, |
| "learning_rate": 9.949557587347298e-05, |
| "loss": 0.0052, |
| "step": 9300 |
| }, |
| { |
| "epoch": 221.66666666666666, |
| "grad_norm": 0.11063271760940552, |
| "learning_rate": 9.949323041642713e-05, |
| "loss": 0.0042, |
| "step": 9310 |
| }, |
| { |
| "epoch": 221.9047619047619, |
| "grad_norm": 0.09882011264562607, |
| "learning_rate": 9.949087954687602e-05, |
| "loss": 0.0047, |
| "step": 9320 |
| }, |
| { |
| "epoch": 222.14285714285714, |
| "grad_norm": 0.11974307894706726, |
| "learning_rate": 9.948852326507672e-05, |
| "loss": 0.005, |
| "step": 9330 |
| }, |
| { |
| "epoch": 222.38095238095238, |
| "grad_norm": 0.10640609264373779, |
| "learning_rate": 9.948616157128694e-05, |
| "loss": 0.0041, |
| "step": 9340 |
| }, |
| { |
| "epoch": 222.61904761904762, |
| "grad_norm": 0.11453927308320999, |
| "learning_rate": 9.948379446576493e-05, |
| "loss": 0.0042, |
| "step": 9350 |
| }, |
| { |
| "epoch": 222.85714285714286, |
| "grad_norm": 0.13195329904556274, |
| "learning_rate": 9.948142194876952e-05, |
| "loss": 0.0043, |
| "step": 9360 |
| }, |
| { |
| "epoch": 223.0952380952381, |
| "grad_norm": 0.12089701741933823, |
| "learning_rate": 9.947904402056024e-05, |
| "loss": 0.0046, |
| "step": 9370 |
| }, |
| { |
| "epoch": 223.33333333333334, |
| "grad_norm": 0.12070542573928833, |
| "learning_rate": 9.947666068139708e-05, |
| "loss": 0.0057, |
| "step": 9380 |
| }, |
| { |
| "epoch": 223.57142857142858, |
| "grad_norm": 0.1582058221101761, |
| "learning_rate": 9.947427193154071e-05, |
| "loss": 0.0045, |
| "step": 9390 |
| }, |
| { |
| "epoch": 223.8095238095238, |
| "grad_norm": 0.10639505833387375, |
| "learning_rate": 9.947187777125233e-05, |
| "loss": 0.0056, |
| "step": 9400 |
| }, |
| { |
| "epoch": 224.04761904761904, |
| "grad_norm": 0.15308648347854614, |
| "learning_rate": 9.946947820079377e-05, |
| "loss": 0.0043, |
| "step": 9410 |
| }, |
| { |
| "epoch": 224.28571428571428, |
| "grad_norm": 0.10764076560735703, |
| "learning_rate": 9.946707322042747e-05, |
| "loss": 0.0041, |
| "step": 9420 |
| }, |
| { |
| "epoch": 224.52380952380952, |
| "grad_norm": 0.12859249114990234, |
| "learning_rate": 9.94646628304164e-05, |
| "loss": 0.0046, |
| "step": 9430 |
| }, |
| { |
| "epoch": 224.76190476190476, |
| "grad_norm": 0.12307706475257874, |
| "learning_rate": 9.946224703102418e-05, |
| "loss": 0.0047, |
| "step": 9440 |
| }, |
| { |
| "epoch": 225.0, |
| "grad_norm": 0.12424777448177338, |
| "learning_rate": 9.945982582251498e-05, |
| "loss": 0.0045, |
| "step": 9450 |
| }, |
| { |
| "epoch": 225.23809523809524, |
| "grad_norm": 0.14093747735023499, |
| "learning_rate": 9.94573992051536e-05, |
| "loss": 0.0048, |
| "step": 9460 |
| }, |
| { |
| "epoch": 225.47619047619048, |
| "grad_norm": 0.16558833420276642, |
| "learning_rate": 9.94549671792054e-05, |
| "loss": 0.0053, |
| "step": 9470 |
| }, |
| { |
| "epoch": 225.71428571428572, |
| "grad_norm": 0.12586626410484314, |
| "learning_rate": 9.945252974493635e-05, |
| "loss": 0.0044, |
| "step": 9480 |
| }, |
| { |
| "epoch": 225.95238095238096, |
| "grad_norm": 0.15383775532245636, |
| "learning_rate": 9.9450086902613e-05, |
| "loss": 0.0048, |
| "step": 9490 |
| }, |
| { |
| "epoch": 226.1904761904762, |
| "grad_norm": 0.11904188245534897, |
| "learning_rate": 9.944763865250248e-05, |
| "loss": 0.0048, |
| "step": 9500 |
| }, |
| { |
| "epoch": 226.42857142857142, |
| "grad_norm": 0.14110532402992249, |
| "learning_rate": 9.944518499487254e-05, |
| "loss": 0.0044, |
| "step": 9510 |
| }, |
| { |
| "epoch": 226.66666666666666, |
| "grad_norm": 0.17113442718982697, |
| "learning_rate": 9.944272592999151e-05, |
| "loss": 0.0051, |
| "step": 9520 |
| }, |
| { |
| "epoch": 226.9047619047619, |
| "grad_norm": 0.12410765886306763, |
| "learning_rate": 9.94402614581283e-05, |
| "loss": 0.0043, |
| "step": 9530 |
| }, |
| { |
| "epoch": 227.14285714285714, |
| "grad_norm": 0.09936399012804031, |
| "learning_rate": 9.943779157955244e-05, |
| "loss": 0.0054, |
| "step": 9540 |
| }, |
| { |
| "epoch": 227.38095238095238, |
| "grad_norm": 0.12387146800756454, |
| "learning_rate": 9.943531629453403e-05, |
| "loss": 0.0051, |
| "step": 9550 |
| }, |
| { |
| "epoch": 227.61904761904762, |
| "grad_norm": 0.1257483810186386, |
| "learning_rate": 9.943283560334375e-05, |
| "loss": 0.0042, |
| "step": 9560 |
| }, |
| { |
| "epoch": 227.85714285714286, |
| "grad_norm": 0.0885445699095726, |
| "learning_rate": 9.943034950625288e-05, |
| "loss": 0.0042, |
| "step": 9570 |
| }, |
| { |
| "epoch": 228.0952380952381, |
| "grad_norm": 0.11685638874769211, |
| "learning_rate": 9.942785800353332e-05, |
| "loss": 0.005, |
| "step": 9580 |
| }, |
| { |
| "epoch": 228.33333333333334, |
| "grad_norm": 0.09223392605781555, |
| "learning_rate": 9.942536109545751e-05, |
| "loss": 0.0044, |
| "step": 9590 |
| }, |
| { |
| "epoch": 228.57142857142858, |
| "grad_norm": 0.10098104178905487, |
| "learning_rate": 9.942285878229853e-05, |
| "loss": 0.0042, |
| "step": 9600 |
| }, |
| { |
| "epoch": 228.8095238095238, |
| "grad_norm": 0.1555575728416443, |
| "learning_rate": 9.942035106433001e-05, |
| "loss": 0.0045, |
| "step": 9610 |
| }, |
| { |
| "epoch": 229.04761904761904, |
| "grad_norm": 0.12151113152503967, |
| "learning_rate": 9.94178379418262e-05, |
| "loss": 0.005, |
| "step": 9620 |
| }, |
| { |
| "epoch": 229.28571428571428, |
| "grad_norm": 0.09024478495121002, |
| "learning_rate": 9.941531941506194e-05, |
| "loss": 0.0044, |
| "step": 9630 |
| }, |
| { |
| "epoch": 229.52380952380952, |
| "grad_norm": 0.10784675180912018, |
| "learning_rate": 9.941279548431263e-05, |
| "loss": 0.0046, |
| "step": 9640 |
| }, |
| { |
| "epoch": 229.76190476190476, |
| "grad_norm": 0.10189420729875565, |
| "learning_rate": 9.941026614985431e-05, |
| "loss": 0.0046, |
| "step": 9650 |
| }, |
| { |
| "epoch": 230.0, |
| "grad_norm": 0.08258704841136932, |
| "learning_rate": 9.940773141196357e-05, |
| "loss": 0.0041, |
| "step": 9660 |
| }, |
| { |
| "epoch": 230.23809523809524, |
| "grad_norm": 0.10900501161813736, |
| "learning_rate": 9.94051912709176e-05, |
| "loss": 0.0044, |
| "step": 9670 |
| }, |
| { |
| "epoch": 230.47619047619048, |
| "grad_norm": 0.10425899922847748, |
| "learning_rate": 9.940264572699421e-05, |
| "loss": 0.0038, |
| "step": 9680 |
| }, |
| { |
| "epoch": 230.71428571428572, |
| "grad_norm": 0.1166265681385994, |
| "learning_rate": 9.940009478047174e-05, |
| "loss": 0.0049, |
| "step": 9690 |
| }, |
| { |
| "epoch": 230.95238095238096, |
| "grad_norm": 0.09572005271911621, |
| "learning_rate": 9.939753843162918e-05, |
| "loss": 0.0036, |
| "step": 9700 |
| }, |
| { |
| "epoch": 231.1904761904762, |
| "grad_norm": 0.0830077975988388, |
| "learning_rate": 9.939497668074609e-05, |
| "loss": 0.0046, |
| "step": 9710 |
| }, |
| { |
| "epoch": 231.42857142857142, |
| "grad_norm": 0.10532094538211823, |
| "learning_rate": 9.93924095281026e-05, |
| "loss": 0.0047, |
| "step": 9720 |
| }, |
| { |
| "epoch": 231.66666666666666, |
| "grad_norm": 0.10421958565711975, |
| "learning_rate": 9.938983697397948e-05, |
| "loss": 0.0041, |
| "step": 9730 |
| }, |
| { |
| "epoch": 231.9047619047619, |
| "grad_norm": 0.1056913435459137, |
| "learning_rate": 9.938725901865805e-05, |
| "loss": 0.0043, |
| "step": 9740 |
| }, |
| { |
| "epoch": 232.14285714285714, |
| "grad_norm": 0.09242488443851471, |
| "learning_rate": 9.93846756624202e-05, |
| "loss": 0.0042, |
| "step": 9750 |
| }, |
| { |
| "epoch": 232.38095238095238, |
| "grad_norm": 0.10261107981204987, |
| "learning_rate": 9.938208690554849e-05, |
| "loss": 0.004, |
| "step": 9760 |
| }, |
| { |
| "epoch": 232.61904761904762, |
| "grad_norm": 0.09793470054864883, |
| "learning_rate": 9.9379492748326e-05, |
| "loss": 0.0039, |
| "step": 9770 |
| }, |
| { |
| "epoch": 232.85714285714286, |
| "grad_norm": 0.09269285947084427, |
| "learning_rate": 9.937689319103641e-05, |
| "loss": 0.0037, |
| "step": 9780 |
| }, |
| { |
| "epoch": 233.0952380952381, |
| "grad_norm": 0.10364263504743576, |
| "learning_rate": 9.937428823396404e-05, |
| "loss": 0.0041, |
| "step": 9790 |
| }, |
| { |
| "epoch": 233.33333333333334, |
| "grad_norm": 0.11572577059268951, |
| "learning_rate": 9.937167787739372e-05, |
| "loss": 0.0042, |
| "step": 9800 |
| }, |
| { |
| "epoch": 233.57142857142858, |
| "grad_norm": 0.07814784348011017, |
| "learning_rate": 9.936906212161095e-05, |
| "loss": 0.0045, |
| "step": 9810 |
| }, |
| { |
| "epoch": 233.8095238095238, |
| "grad_norm": 0.11831694841384888, |
| "learning_rate": 9.936644096690176e-05, |
| "loss": 0.0045, |
| "step": 9820 |
| }, |
| { |
| "epoch": 234.04761904761904, |
| "grad_norm": 0.10333330929279327, |
| "learning_rate": 9.936381441355282e-05, |
| "loss": 0.005, |
| "step": 9830 |
| }, |
| { |
| "epoch": 234.28571428571428, |
| "grad_norm": 0.08039465546607971, |
| "learning_rate": 9.936118246185136e-05, |
| "loss": 0.005, |
| "step": 9840 |
| }, |
| { |
| "epoch": 234.52380952380952, |
| "grad_norm": 0.08821109682321548, |
| "learning_rate": 9.935854511208518e-05, |
| "loss": 0.0048, |
| "step": 9850 |
| }, |
| { |
| "epoch": 234.76190476190476, |
| "grad_norm": 0.11313857138156891, |
| "learning_rate": 9.935590236454272e-05, |
| "loss": 0.0044, |
| "step": 9860 |
| }, |
| { |
| "epoch": 235.0, |
| "grad_norm": 0.10181090235710144, |
| "learning_rate": 9.935325421951298e-05, |
| "loss": 0.0041, |
| "step": 9870 |
| }, |
| { |
| "epoch": 235.23809523809524, |
| "grad_norm": 0.10484672337770462, |
| "learning_rate": 9.935060067728557e-05, |
| "loss": 0.0035, |
| "step": 9880 |
| }, |
| { |
| "epoch": 235.47619047619048, |
| "grad_norm": 0.11528967320919037, |
| "learning_rate": 9.934794173815067e-05, |
| "loss": 0.0045, |
| "step": 9890 |
| }, |
| { |
| "epoch": 235.71428571428572, |
| "grad_norm": 0.10276000201702118, |
| "learning_rate": 9.934527740239906e-05, |
| "loss": 0.0048, |
| "step": 9900 |
| }, |
| { |
| "epoch": 235.95238095238096, |
| "grad_norm": 0.15651634335517883, |
| "learning_rate": 9.934260767032209e-05, |
| "loss": 0.0047, |
| "step": 9910 |
| }, |
| { |
| "epoch": 236.1904761904762, |
| "grad_norm": 0.10621149092912674, |
| "learning_rate": 9.933993254221172e-05, |
| "loss": 0.0047, |
| "step": 9920 |
| }, |
| { |
| "epoch": 236.42857142857142, |
| "grad_norm": 0.10138719528913498, |
| "learning_rate": 9.933725201836053e-05, |
| "loss": 0.0051, |
| "step": 9930 |
| }, |
| { |
| "epoch": 236.66666666666666, |
| "grad_norm": 0.09064063429832458, |
| "learning_rate": 9.933456609906162e-05, |
| "loss": 0.0041, |
| "step": 9940 |
| }, |
| { |
| "epoch": 236.9047619047619, |
| "grad_norm": 0.08357132226228714, |
| "learning_rate": 9.933187478460875e-05, |
| "loss": 0.0047, |
| "step": 9950 |
| }, |
| { |
| "epoch": 237.14285714285714, |
| "grad_norm": 0.08740902692079544, |
| "learning_rate": 9.93291780752962e-05, |
| "loss": 0.0047, |
| "step": 9960 |
| }, |
| { |
| "epoch": 237.38095238095238, |
| "grad_norm": 0.12672919034957886, |
| "learning_rate": 9.932647597141893e-05, |
| "loss": 0.0041, |
| "step": 9970 |
| }, |
| { |
| "epoch": 237.61904761904762, |
| "grad_norm": 0.1262740045785904, |
| "learning_rate": 9.932376847327239e-05, |
| "loss": 0.0041, |
| "step": 9980 |
| }, |
| { |
| "epoch": 237.85714285714286, |
| "grad_norm": 0.06819355487823486, |
| "learning_rate": 9.932105558115268e-05, |
| "loss": 0.0036, |
| "step": 9990 |
| }, |
| { |
| "epoch": 238.0952380952381, |
| "grad_norm": 0.09985050559043884, |
| "learning_rate": 9.931833729535651e-05, |
| "loss": 0.005, |
| "step": 10000 |
| }, |
| { |
| "epoch": 238.33333333333334, |
| "grad_norm": 0.09196142107248306, |
| "learning_rate": 9.931561361618111e-05, |
| "loss": 0.0046, |
| "step": 10010 |
| }, |
| { |
| "epoch": 238.57142857142858, |
| "grad_norm": 0.0857197716832161, |
| "learning_rate": 9.931288454392435e-05, |
| "loss": 0.0051, |
| "step": 10020 |
| }, |
| { |
| "epoch": 238.8095238095238, |
| "grad_norm": 0.11631584912538528, |
| "learning_rate": 9.931015007888467e-05, |
| "loss": 0.0044, |
| "step": 10030 |
| }, |
| { |
| "epoch": 239.04761904761904, |
| "grad_norm": 0.09963522106409073, |
| "learning_rate": 9.930741022136112e-05, |
| "loss": 0.0047, |
| "step": 10040 |
| }, |
| { |
| "epoch": 239.28571428571428, |
| "grad_norm": 0.13286332786083221, |
| "learning_rate": 9.930466497165333e-05, |
| "loss": 0.0044, |
| "step": 10050 |
| }, |
| { |
| "epoch": 239.52380952380952, |
| "grad_norm": 0.10518523305654526, |
| "learning_rate": 9.93019143300615e-05, |
| "loss": 0.0045, |
| "step": 10060 |
| }, |
| { |
| "epoch": 239.76190476190476, |
| "grad_norm": 0.07977799326181412, |
| "learning_rate": 9.929915829688644e-05, |
| "loss": 0.0045, |
| "step": 10070 |
| }, |
| { |
| "epoch": 240.0, |
| "grad_norm": 0.10723453760147095, |
| "learning_rate": 9.929639687242955e-05, |
| "loss": 0.0051, |
| "step": 10080 |
| }, |
| { |
| "epoch": 240.23809523809524, |
| "grad_norm": 0.13088347017765045, |
| "learning_rate": 9.929363005699281e-05, |
| "loss": 0.0045, |
| "step": 10090 |
| }, |
| { |
| "epoch": 240.47619047619048, |
| "grad_norm": 0.10406295210123062, |
| "learning_rate": 9.92908578508788e-05, |
| "loss": 0.0045, |
| "step": 10100 |
| }, |
| { |
| "epoch": 240.71428571428572, |
| "grad_norm": 0.1223369762301445, |
| "learning_rate": 9.928808025439069e-05, |
| "loss": 0.0049, |
| "step": 10110 |
| }, |
| { |
| "epoch": 240.95238095238096, |
| "grad_norm": 0.13603714108467102, |
| "learning_rate": 9.928529726783223e-05, |
| "loss": 0.004, |
| "step": 10120 |
| }, |
| { |
| "epoch": 241.1904761904762, |
| "grad_norm": 0.12976428866386414, |
| "learning_rate": 9.928250889150774e-05, |
| "loss": 0.0043, |
| "step": 10130 |
| }, |
| { |
| "epoch": 241.42857142857142, |
| "grad_norm": 0.07905508577823639, |
| "learning_rate": 9.92797151257222e-05, |
| "loss": 0.0043, |
| "step": 10140 |
| }, |
| { |
| "epoch": 241.66666666666666, |
| "grad_norm": 0.1145826056599617, |
| "learning_rate": 9.927691597078108e-05, |
| "loss": 0.0043, |
| "step": 10150 |
| }, |
| { |
| "epoch": 241.9047619047619, |
| "grad_norm": 0.09356294572353363, |
| "learning_rate": 9.927411142699053e-05, |
| "loss": 0.0045, |
| "step": 10160 |
| }, |
| { |
| "epoch": 242.14285714285714, |
| "grad_norm": 0.1661330610513687, |
| "learning_rate": 9.927130149465725e-05, |
| "loss": 0.0041, |
| "step": 10170 |
| }, |
| { |
| "epoch": 242.38095238095238, |
| "grad_norm": 0.14731742441654205, |
| "learning_rate": 9.92684861740885e-05, |
| "loss": 0.0047, |
| "step": 10180 |
| }, |
| { |
| "epoch": 242.61904761904762, |
| "grad_norm": 0.12493482977151871, |
| "learning_rate": 9.926566546559217e-05, |
| "loss": 0.0054, |
| "step": 10190 |
| }, |
| { |
| "epoch": 242.85714285714286, |
| "grad_norm": 0.1424446702003479, |
| "learning_rate": 9.926283936947673e-05, |
| "loss": 0.0044, |
| "step": 10200 |
| }, |
| { |
| "epoch": 243.0952380952381, |
| "grad_norm": 0.12853041291236877, |
| "learning_rate": 9.926000788605126e-05, |
| "loss": 0.0046, |
| "step": 10210 |
| }, |
| { |
| "epoch": 243.33333333333334, |
| "grad_norm": 0.09398342669010162, |
| "learning_rate": 9.92571710156254e-05, |
| "loss": 0.0046, |
| "step": 10220 |
| }, |
| { |
| "epoch": 243.57142857142858, |
| "grad_norm": 0.11472086608409882, |
| "learning_rate": 9.925432875850936e-05, |
| "loss": 0.0043, |
| "step": 10230 |
| }, |
| { |
| "epoch": 243.8095238095238, |
| "grad_norm": 0.1416465789079666, |
| "learning_rate": 9.925148111501396e-05, |
| "loss": 0.0047, |
| "step": 10240 |
| }, |
| { |
| "epoch": 244.04761904761904, |
| "grad_norm": 0.15870623290538788, |
| "learning_rate": 9.924862808545066e-05, |
| "loss": 0.0046, |
| "step": 10250 |
| }, |
| { |
| "epoch": 244.28571428571428, |
| "grad_norm": 0.07772918790578842, |
| "learning_rate": 9.924576967013141e-05, |
| "loss": 0.0046, |
| "step": 10260 |
| }, |
| { |
| "epoch": 244.52380952380952, |
| "grad_norm": 0.11858698725700378, |
| "learning_rate": 9.924290586936887e-05, |
| "loss": 0.0044, |
| "step": 10270 |
| }, |
| { |
| "epoch": 244.76190476190476, |
| "grad_norm": 0.1498367339372635, |
| "learning_rate": 9.924003668347614e-05, |
| "loss": 0.0054, |
| "step": 10280 |
| }, |
| { |
| "epoch": 245.0, |
| "grad_norm": 0.14315880835056305, |
| "learning_rate": 9.923716211276704e-05, |
| "loss": 0.0043, |
| "step": 10290 |
| }, |
| { |
| "epoch": 245.23809523809524, |
| "grad_norm": 0.11861827224493027, |
| "learning_rate": 9.923428215755594e-05, |
| "loss": 0.0051, |
| "step": 10300 |
| }, |
| { |
| "epoch": 245.47619047619048, |
| "grad_norm": 0.12184726446866989, |
| "learning_rate": 9.923139681815775e-05, |
| "loss": 0.0043, |
| "step": 10310 |
| }, |
| { |
| "epoch": 245.71428571428572, |
| "grad_norm": 0.17061425745487213, |
| "learning_rate": 9.922850609488801e-05, |
| "loss": 0.0045, |
| "step": 10320 |
| }, |
| { |
| "epoch": 245.95238095238096, |
| "grad_norm": 0.11498675495386124, |
| "learning_rate": 9.922560998806287e-05, |
| "loss": 0.0047, |
| "step": 10330 |
| }, |
| { |
| "epoch": 246.1904761904762, |
| "grad_norm": 0.11102292686700821, |
| "learning_rate": 9.922270849799905e-05, |
| "loss": 0.0046, |
| "step": 10340 |
| }, |
| { |
| "epoch": 246.42857142857142, |
| "grad_norm": 0.15384326875209808, |
| "learning_rate": 9.92198016250138e-05, |
| "loss": 0.0056, |
| "step": 10350 |
| }, |
| { |
| "epoch": 246.66666666666666, |
| "grad_norm": 0.17679357528686523, |
| "learning_rate": 9.921688936942506e-05, |
| "loss": 0.0043, |
| "step": 10360 |
| }, |
| { |
| "epoch": 246.9047619047619, |
| "grad_norm": 0.17048697173595428, |
| "learning_rate": 9.921397173155129e-05, |
| "loss": 0.0047, |
| "step": 10370 |
| }, |
| { |
| "epoch": 247.14285714285714, |
| "grad_norm": 0.10143327713012695, |
| "learning_rate": 9.921104871171157e-05, |
| "loss": 0.0045, |
| "step": 10380 |
| }, |
| { |
| "epoch": 247.38095238095238, |
| "grad_norm": 0.10457612574100494, |
| "learning_rate": 9.920812031022554e-05, |
| "loss": 0.0046, |
| "step": 10390 |
| }, |
| { |
| "epoch": 247.61904761904762, |
| "grad_norm": 0.13025136291980743, |
| "learning_rate": 9.920518652741348e-05, |
| "loss": 0.0043, |
| "step": 10400 |
| }, |
| { |
| "epoch": 247.85714285714286, |
| "grad_norm": 0.1439047008752823, |
| "learning_rate": 9.920224736359618e-05, |
| "loss": 0.0045, |
| "step": 10410 |
| }, |
| { |
| "epoch": 248.0952380952381, |
| "grad_norm": 0.10041221231222153, |
| "learning_rate": 9.91993028190951e-05, |
| "loss": 0.004, |
| "step": 10420 |
| }, |
| { |
| "epoch": 248.33333333333334, |
| "grad_norm": 0.11778811365365982, |
| "learning_rate": 9.919635289423222e-05, |
| "loss": 0.0044, |
| "step": 10430 |
| }, |
| { |
| "epoch": 248.57142857142858, |
| "grad_norm": 0.10126891732215881, |
| "learning_rate": 9.919339758933015e-05, |
| "loss": 0.0044, |
| "step": 10440 |
| }, |
| { |
| "epoch": 248.8095238095238, |
| "grad_norm": 0.10975037515163422, |
| "learning_rate": 9.919043690471209e-05, |
| "loss": 0.0047, |
| "step": 10450 |
| }, |
| { |
| "epoch": 249.04761904761904, |
| "grad_norm": 0.14522452652454376, |
| "learning_rate": 9.91874708407018e-05, |
| "loss": 0.0043, |
| "step": 10460 |
| }, |
| { |
| "epoch": 249.28571428571428, |
| "grad_norm": 0.11540976911783218, |
| "learning_rate": 9.918449939762367e-05, |
| "loss": 0.0047, |
| "step": 10470 |
| }, |
| { |
| "epoch": 249.52380952380952, |
| "grad_norm": 0.08293992280960083, |
| "learning_rate": 9.91815225758026e-05, |
| "loss": 0.0042, |
| "step": 10480 |
| }, |
| { |
| "epoch": 249.76190476190476, |
| "grad_norm": 0.10331578552722931, |
| "learning_rate": 9.917854037556419e-05, |
| "loss": 0.0044, |
| "step": 10490 |
| }, |
| { |
| "epoch": 250.0, |
| "grad_norm": 0.05326927453279495, |
| "learning_rate": 9.917555279723454e-05, |
| "loss": 0.0039, |
| "step": 10500 |
| }, |
| { |
| "epoch": 250.23809523809524, |
| "grad_norm": 0.09290356189012527, |
| "learning_rate": 9.917255984114036e-05, |
| "loss": 0.0039, |
| "step": 10510 |
| }, |
| { |
| "epoch": 250.47619047619048, |
| "grad_norm": 0.08729972690343857, |
| "learning_rate": 9.916956150760896e-05, |
| "loss": 0.0053, |
| "step": 10520 |
| }, |
| { |
| "epoch": 250.71428571428572, |
| "grad_norm": 0.06848377734422684, |
| "learning_rate": 9.916655779696826e-05, |
| "loss": 0.0036, |
| "step": 10530 |
| }, |
| { |
| "epoch": 250.95238095238096, |
| "grad_norm": 0.1187238097190857, |
| "learning_rate": 9.916354870954671e-05, |
| "loss": 0.004, |
| "step": 10540 |
| }, |
| { |
| "epoch": 251.1904761904762, |
| "grad_norm": 0.0963253453373909, |
| "learning_rate": 9.91605342456734e-05, |
| "loss": 0.0041, |
| "step": 10550 |
| }, |
| { |
| "epoch": 251.42857142857142, |
| "grad_norm": 0.10367690771818161, |
| "learning_rate": 9.915751440567795e-05, |
| "loss": 0.004, |
| "step": 10560 |
| }, |
| { |
| "epoch": 251.66666666666666, |
| "grad_norm": 0.10077622532844543, |
| "learning_rate": 9.915448918989066e-05, |
| "loss": 0.0041, |
| "step": 10570 |
| }, |
| { |
| "epoch": 251.9047619047619, |
| "grad_norm": 0.10594525188207626, |
| "learning_rate": 9.915145859864232e-05, |
| "loss": 0.0039, |
| "step": 10580 |
| }, |
| { |
| "epoch": 252.14285714285714, |
| "grad_norm": 0.10971103608608246, |
| "learning_rate": 9.914842263226437e-05, |
| "loss": 0.0039, |
| "step": 10590 |
| }, |
| { |
| "epoch": 252.38095238095238, |
| "grad_norm": 0.10805618762969971, |
| "learning_rate": 9.914538129108882e-05, |
| "loss": 0.0047, |
| "step": 10600 |
| }, |
| { |
| "epoch": 252.61904761904762, |
| "grad_norm": 0.09153848886489868, |
| "learning_rate": 9.914233457544825e-05, |
| "loss": 0.0051, |
| "step": 10610 |
| }, |
| { |
| "epoch": 252.85714285714286, |
| "grad_norm": 0.0847465768456459, |
| "learning_rate": 9.913928248567586e-05, |
| "loss": 0.0043, |
| "step": 10620 |
| }, |
| { |
| "epoch": 253.0952380952381, |
| "grad_norm": 0.1000661626458168, |
| "learning_rate": 9.913622502210542e-05, |
| "loss": 0.0042, |
| "step": 10630 |
| }, |
| { |
| "epoch": 253.33333333333334, |
| "grad_norm": 0.08922554552555084, |
| "learning_rate": 9.913316218507128e-05, |
| "loss": 0.0045, |
| "step": 10640 |
| }, |
| { |
| "epoch": 253.57142857142858, |
| "grad_norm": 0.13087423145771027, |
| "learning_rate": 9.91300939749084e-05, |
| "loss": 0.0041, |
| "step": 10650 |
| }, |
| { |
| "epoch": 253.8095238095238, |
| "grad_norm": 0.11553340405225754, |
| "learning_rate": 9.91270203919523e-05, |
| "loss": 0.0041, |
| "step": 10660 |
| }, |
| { |
| "epoch": 254.04761904761904, |
| "grad_norm": 0.07585998624563217, |
| "learning_rate": 9.912394143653912e-05, |
| "loss": 0.0042, |
| "step": 10670 |
| }, |
| { |
| "epoch": 254.28571428571428, |
| "grad_norm": 0.09735798835754395, |
| "learning_rate": 9.912085710900555e-05, |
| "loss": 0.0044, |
| "step": 10680 |
| }, |
| { |
| "epoch": 254.52380952380952, |
| "grad_norm": 0.10190661251544952, |
| "learning_rate": 9.911776740968892e-05, |
| "loss": 0.004, |
| "step": 10690 |
| }, |
| { |
| "epoch": 254.76190476190476, |
| "grad_norm": 0.08707146346569061, |
| "learning_rate": 9.911467233892709e-05, |
| "loss": 0.0042, |
| "step": 10700 |
| }, |
| { |
| "epoch": 255.0, |
| "grad_norm": 0.10799738764762878, |
| "learning_rate": 9.911157189705853e-05, |
| "loss": 0.0037, |
| "step": 10710 |
| }, |
| { |
| "epoch": 255.23809523809524, |
| "grad_norm": 0.11791957169771194, |
| "learning_rate": 9.910846608442229e-05, |
| "loss": 0.0039, |
| "step": 10720 |
| }, |
| { |
| "epoch": 255.47619047619048, |
| "grad_norm": 0.0833316445350647, |
| "learning_rate": 9.910535490135805e-05, |
| "loss": 0.0047, |
| "step": 10730 |
| }, |
| { |
| "epoch": 255.71428571428572, |
| "grad_norm": 0.11213032156229019, |
| "learning_rate": 9.910223834820603e-05, |
| "loss": 0.0036, |
| "step": 10740 |
| }, |
| { |
| "epoch": 255.95238095238096, |
| "grad_norm": 0.10593509674072266, |
| "learning_rate": 9.909911642530703e-05, |
| "loss": 0.004, |
| "step": 10750 |
| }, |
| { |
| "epoch": 256.1904761904762, |
| "grad_norm": 0.11248364299535751, |
| "learning_rate": 9.909598913300249e-05, |
| "loss": 0.0045, |
| "step": 10760 |
| }, |
| { |
| "epoch": 256.42857142857144, |
| "grad_norm": 0.10015545785427094, |
| "learning_rate": 9.909285647163438e-05, |
| "loss": 0.0044, |
| "step": 10770 |
| }, |
| { |
| "epoch": 256.6666666666667, |
| "grad_norm": 0.11217257380485535, |
| "learning_rate": 9.908971844154531e-05, |
| "loss": 0.0041, |
| "step": 10780 |
| }, |
| { |
| "epoch": 256.9047619047619, |
| "grad_norm": 0.11641879379749298, |
| "learning_rate": 9.908657504307843e-05, |
| "loss": 0.0044, |
| "step": 10790 |
| }, |
| { |
| "epoch": 257.14285714285717, |
| "grad_norm": 0.14481626451015472, |
| "learning_rate": 9.908342627657751e-05, |
| "loss": 0.0049, |
| "step": 10800 |
| }, |
| { |
| "epoch": 257.3809523809524, |
| "grad_norm": 0.12930141389369965, |
| "learning_rate": 9.908027214238689e-05, |
| "loss": 0.005, |
| "step": 10810 |
| }, |
| { |
| "epoch": 257.6190476190476, |
| "grad_norm": 0.12717360258102417, |
| "learning_rate": 9.90771126408515e-05, |
| "loss": 0.0047, |
| "step": 10820 |
| }, |
| { |
| "epoch": 257.85714285714283, |
| "grad_norm": 0.12493440508842468, |
| "learning_rate": 9.907394777231685e-05, |
| "loss": 0.0043, |
| "step": 10830 |
| }, |
| { |
| "epoch": 258.0952380952381, |
| "grad_norm": 0.10448380559682846, |
| "learning_rate": 9.907077753712905e-05, |
| "loss": 0.0042, |
| "step": 10840 |
| }, |
| { |
| "epoch": 258.3333333333333, |
| "grad_norm": 0.10403750091791153, |
| "learning_rate": 9.906760193563482e-05, |
| "loss": 0.0043, |
| "step": 10850 |
| }, |
| { |
| "epoch": 258.57142857142856, |
| "grad_norm": 0.12101778388023376, |
| "learning_rate": 9.906442096818139e-05, |
| "loss": 0.0038, |
| "step": 10860 |
| }, |
| { |
| "epoch": 258.8095238095238, |
| "grad_norm": 0.11024534702301025, |
| "learning_rate": 9.906123463511665e-05, |
| "loss": 0.0039, |
| "step": 10870 |
| }, |
| { |
| "epoch": 259.04761904761904, |
| "grad_norm": 0.11846877634525299, |
| "learning_rate": 9.905804293678907e-05, |
| "loss": 0.0041, |
| "step": 10880 |
| }, |
| { |
| "epoch": 259.2857142857143, |
| "grad_norm": 0.12364278733730316, |
| "learning_rate": 9.905484587354766e-05, |
| "loss": 0.0049, |
| "step": 10890 |
| }, |
| { |
| "epoch": 259.5238095238095, |
| "grad_norm": 0.15018124878406525, |
| "learning_rate": 9.905164344574205e-05, |
| "loss": 0.0046, |
| "step": 10900 |
| }, |
| { |
| "epoch": 259.76190476190476, |
| "grad_norm": 0.13940197229385376, |
| "learning_rate": 9.904843565372248e-05, |
| "loss": 0.0045, |
| "step": 10910 |
| }, |
| { |
| "epoch": 260.0, |
| "grad_norm": 0.11086831986904144, |
| "learning_rate": 9.904522249783972e-05, |
| "loss": 0.004, |
| "step": 10920 |
| }, |
| { |
| "epoch": 260.23809523809524, |
| "grad_norm": 0.1152733713388443, |
| "learning_rate": 9.904200397844517e-05, |
| "loss": 0.0043, |
| "step": 10930 |
| }, |
| { |
| "epoch": 260.4761904761905, |
| "grad_norm": 0.09819278120994568, |
| "learning_rate": 9.903878009589078e-05, |
| "loss": 0.0041, |
| "step": 10940 |
| }, |
| { |
| "epoch": 260.7142857142857, |
| "grad_norm": 0.0979447141289711, |
| "learning_rate": 9.903555085052915e-05, |
| "loss": 0.0043, |
| "step": 10950 |
| }, |
| { |
| "epoch": 260.95238095238096, |
| "grad_norm": 0.08403604477643967, |
| "learning_rate": 9.903231624271338e-05, |
| "loss": 0.0039, |
| "step": 10960 |
| }, |
| { |
| "epoch": 261.1904761904762, |
| "grad_norm": 0.0956418439745903, |
| "learning_rate": 9.902907627279724e-05, |
| "loss": 0.0041, |
| "step": 10970 |
| }, |
| { |
| "epoch": 261.42857142857144, |
| "grad_norm": 0.07281234860420227, |
| "learning_rate": 9.902583094113504e-05, |
| "loss": 0.0044, |
| "step": 10980 |
| }, |
| { |
| "epoch": 261.6666666666667, |
| "grad_norm": 0.08977030217647552, |
| "learning_rate": 9.902258024808168e-05, |
| "loss": 0.004, |
| "step": 10990 |
| }, |
| { |
| "epoch": 261.9047619047619, |
| "grad_norm": 0.08575175702571869, |
| "learning_rate": 9.901932419399264e-05, |
| "loss": 0.0041, |
| "step": 11000 |
| }, |
| { |
| "epoch": 262.14285714285717, |
| "grad_norm": 0.06994577497243881, |
| "learning_rate": 9.9016062779224e-05, |
| "loss": 0.0034, |
| "step": 11010 |
| }, |
| { |
| "epoch": 262.3809523809524, |
| "grad_norm": 0.05822889134287834, |
| "learning_rate": 9.901279600413242e-05, |
| "loss": 0.0041, |
| "step": 11020 |
| }, |
| { |
| "epoch": 262.6190476190476, |
| "grad_norm": 0.06249088793992996, |
| "learning_rate": 9.900952386907518e-05, |
| "loss": 0.0048, |
| "step": 11030 |
| }, |
| { |
| "epoch": 262.85714285714283, |
| "grad_norm": 0.0755944773554802, |
| "learning_rate": 9.90062463744101e-05, |
| "loss": 0.0048, |
| "step": 11040 |
| }, |
| { |
| "epoch": 263.0952380952381, |
| "grad_norm": 0.09492585808038712, |
| "learning_rate": 9.900296352049558e-05, |
| "loss": 0.0038, |
| "step": 11050 |
| }, |
| { |
| "epoch": 263.3333333333333, |
| "grad_norm": 0.11966243386268616, |
| "learning_rate": 9.899967530769065e-05, |
| "loss": 0.005, |
| "step": 11060 |
| }, |
| { |
| "epoch": 263.57142857142856, |
| "grad_norm": 0.1069047749042511, |
| "learning_rate": 9.899638173635489e-05, |
| "loss": 0.0048, |
| "step": 11070 |
| }, |
| { |
| "epoch": 263.8095238095238, |
| "grad_norm": 0.0853164792060852, |
| "learning_rate": 9.899308280684849e-05, |
| "loss": 0.0036, |
| "step": 11080 |
| }, |
| { |
| "epoch": 264.04761904761904, |
| "grad_norm": 0.09381288290023804, |
| "learning_rate": 9.898977851953222e-05, |
| "loss": 0.0041, |
| "step": 11090 |
| }, |
| { |
| "epoch": 264.2857142857143, |
| "grad_norm": 0.1071372777223587, |
| "learning_rate": 9.898646887476741e-05, |
| "loss": 0.0038, |
| "step": 11100 |
| }, |
| { |
| "epoch": 264.5238095238095, |
| "grad_norm": 0.10993285477161407, |
| "learning_rate": 9.898315387291603e-05, |
| "loss": 0.0041, |
| "step": 11110 |
| }, |
| { |
| "epoch": 264.76190476190476, |
| "grad_norm": 0.08751342445611954, |
| "learning_rate": 9.89798335143406e-05, |
| "loss": 0.0042, |
| "step": 11120 |
| }, |
| { |
| "epoch": 265.0, |
| "grad_norm": 0.09537802636623383, |
| "learning_rate": 9.897650779940419e-05, |
| "loss": 0.0037, |
| "step": 11130 |
| }, |
| { |
| "epoch": 265.23809523809524, |
| "grad_norm": 0.10724399983882904, |
| "learning_rate": 9.897317672847054e-05, |
| "loss": 0.0048, |
| "step": 11140 |
| }, |
| { |
| "epoch": 265.4761904761905, |
| "grad_norm": 0.11723458766937256, |
| "learning_rate": 9.89698403019039e-05, |
| "loss": 0.004, |
| "step": 11150 |
| }, |
| { |
| "epoch": 265.7142857142857, |
| "grad_norm": 0.12238142639398575, |
| "learning_rate": 9.896649852006917e-05, |
| "loss": 0.0037, |
| "step": 11160 |
| }, |
| { |
| "epoch": 265.95238095238096, |
| "grad_norm": 0.13138741254806519, |
| "learning_rate": 9.896315138333177e-05, |
| "loss": 0.0047, |
| "step": 11170 |
| }, |
| { |
| "epoch": 266.1904761904762, |
| "grad_norm": 0.16651716828346252, |
| "learning_rate": 9.895979889205774e-05, |
| "loss": 0.005, |
| "step": 11180 |
| }, |
| { |
| "epoch": 266.42857142857144, |
| "grad_norm": 0.08300276100635529, |
| "learning_rate": 9.895644104661372e-05, |
| "loss": 0.0039, |
| "step": 11190 |
| }, |
| { |
| "epoch": 266.6666666666667, |
| "grad_norm": 0.07867535203695297, |
| "learning_rate": 9.895307784736691e-05, |
| "loss": 0.0036, |
| "step": 11200 |
| }, |
| { |
| "epoch": 266.9047619047619, |
| "grad_norm": 0.12637798488140106, |
| "learning_rate": 9.894970929468512e-05, |
| "loss": 0.0044, |
| "step": 11210 |
| }, |
| { |
| "epoch": 267.14285714285717, |
| "grad_norm": 0.0952150970697403, |
| "learning_rate": 9.89463353889367e-05, |
| "loss": 0.0035, |
| "step": 11220 |
| }, |
| { |
| "epoch": 267.3809523809524, |
| "grad_norm": 0.07330246269702911, |
| "learning_rate": 9.894295613049065e-05, |
| "loss": 0.0049, |
| "step": 11230 |
| }, |
| { |
| "epoch": 267.6190476190476, |
| "grad_norm": 0.08680123835802078, |
| "learning_rate": 9.893957151971649e-05, |
| "loss": 0.0039, |
| "step": 11240 |
| }, |
| { |
| "epoch": 267.85714285714283, |
| "grad_norm": 0.11474114656448364, |
| "learning_rate": 9.893618155698436e-05, |
| "loss": 0.004, |
| "step": 11250 |
| }, |
| { |
| "epoch": 268.0952380952381, |
| "grad_norm": 0.16576187312602997, |
| "learning_rate": 9.8932786242665e-05, |
| "loss": 0.0053, |
| "step": 11260 |
| }, |
| { |
| "epoch": 268.3333333333333, |
| "grad_norm": 0.13272294402122498, |
| "learning_rate": 9.89293855771297e-05, |
| "loss": 0.0045, |
| "step": 11270 |
| }, |
| { |
| "epoch": 268.57142857142856, |
| "grad_norm": 0.18816474080085754, |
| "learning_rate": 9.892597956075036e-05, |
| "loss": 0.0047, |
| "step": 11280 |
| }, |
| { |
| "epoch": 268.8095238095238, |
| "grad_norm": 0.12227193266153336, |
| "learning_rate": 9.892256819389947e-05, |
| "loss": 0.0044, |
| "step": 11290 |
| }, |
| { |
| "epoch": 269.04761904761904, |
| "grad_norm": 0.07768643647432327, |
| "learning_rate": 9.891915147695006e-05, |
| "loss": 0.004, |
| "step": 11300 |
| }, |
| { |
| "epoch": 269.2857142857143, |
| "grad_norm": 0.11112253367900848, |
| "learning_rate": 9.891572941027577e-05, |
| "loss": 0.0041, |
| "step": 11310 |
| }, |
| { |
| "epoch": 269.5238095238095, |
| "grad_norm": 0.10507705807685852, |
| "learning_rate": 9.89123019942509e-05, |
| "loss": 0.0048, |
| "step": 11320 |
| }, |
| { |
| "epoch": 269.76190476190476, |
| "grad_norm": 0.1404249668121338, |
| "learning_rate": 9.89088692292502e-05, |
| "loss": 0.0048, |
| "step": 11330 |
| }, |
| { |
| "epoch": 270.0, |
| "grad_norm": 0.11719421297311783, |
| "learning_rate": 9.89054311156491e-05, |
| "loss": 0.004, |
| "step": 11340 |
| }, |
| { |
| "epoch": 270.23809523809524, |
| "grad_norm": 0.0927252322435379, |
| "learning_rate": 9.890198765382357e-05, |
| "loss": 0.0038, |
| "step": 11350 |
| }, |
| { |
| "epoch": 270.4761904761905, |
| "grad_norm": 0.17926521599292755, |
| "learning_rate": 9.889853884415021e-05, |
| "loss": 0.0041, |
| "step": 11360 |
| }, |
| { |
| "epoch": 270.7142857142857, |
| "grad_norm": 0.1448417752981186, |
| "learning_rate": 9.889508468700614e-05, |
| "loss": 0.005, |
| "step": 11370 |
| }, |
| { |
| "epoch": 270.95238095238096, |
| "grad_norm": 0.11915785074234009, |
| "learning_rate": 9.889162518276915e-05, |
| "loss": 0.0046, |
| "step": 11380 |
| }, |
| { |
| "epoch": 271.1904761904762, |
| "grad_norm": 0.11126571893692017, |
| "learning_rate": 9.888816033181752e-05, |
| "loss": 0.0044, |
| "step": 11390 |
| }, |
| { |
| "epoch": 271.42857142857144, |
| "grad_norm": 0.09432601183652878, |
| "learning_rate": 9.888469013453018e-05, |
| "loss": 0.0037, |
| "step": 11400 |
| }, |
| { |
| "epoch": 271.6666666666667, |
| "grad_norm": 0.13234515488147736, |
| "learning_rate": 9.888121459128663e-05, |
| "loss": 0.0043, |
| "step": 11410 |
| }, |
| { |
| "epoch": 271.9047619047619, |
| "grad_norm": 0.11605100333690643, |
| "learning_rate": 9.887773370246693e-05, |
| "loss": 0.004, |
| "step": 11420 |
| }, |
| { |
| "epoch": 272.14285714285717, |
| "grad_norm": 0.12789122760295868, |
| "learning_rate": 9.887424746845177e-05, |
| "loss": 0.0037, |
| "step": 11430 |
| }, |
| { |
| "epoch": 272.3809523809524, |
| "grad_norm": 0.12439467757940292, |
| "learning_rate": 9.887075588962239e-05, |
| "loss": 0.0043, |
| "step": 11440 |
| }, |
| { |
| "epoch": 272.6190476190476, |
| "grad_norm": 0.1216873899102211, |
| "learning_rate": 9.88672589663606e-05, |
| "loss": 0.0041, |
| "step": 11450 |
| }, |
| { |
| "epoch": 272.85714285714283, |
| "grad_norm": 0.12306111305952072, |
| "learning_rate": 9.886375669904886e-05, |
| "loss": 0.0038, |
| "step": 11460 |
| }, |
| { |
| "epoch": 273.0952380952381, |
| "grad_norm": 0.09238433837890625, |
| "learning_rate": 9.886024908807014e-05, |
| "loss": 0.0037, |
| "step": 11470 |
| }, |
| { |
| "epoch": 273.3333333333333, |
| "grad_norm": 0.10716689378023148, |
| "learning_rate": 9.885673613380806e-05, |
| "loss": 0.0041, |
| "step": 11480 |
| }, |
| { |
| "epoch": 273.57142857142856, |
| "grad_norm": 0.09920501708984375, |
| "learning_rate": 9.885321783664676e-05, |
| "loss": 0.0037, |
| "step": 11490 |
| }, |
| { |
| "epoch": 273.8095238095238, |
| "grad_norm": 0.13613645732402802, |
| "learning_rate": 9.884969419697101e-05, |
| "loss": 0.0048, |
| "step": 11500 |
| }, |
| { |
| "epoch": 274.04761904761904, |
| "grad_norm": 0.1357085257768631, |
| "learning_rate": 9.884616521516614e-05, |
| "loss": 0.0046, |
| "step": 11510 |
| }, |
| { |
| "epoch": 274.2857142857143, |
| "grad_norm": 0.07006344199180603, |
| "learning_rate": 9.88426308916181e-05, |
| "loss": 0.0041, |
| "step": 11520 |
| }, |
| { |
| "epoch": 274.5238095238095, |
| "grad_norm": 0.09465203434228897, |
| "learning_rate": 9.883909122671335e-05, |
| "loss": 0.0037, |
| "step": 11530 |
| }, |
| { |
| "epoch": 274.76190476190476, |
| "grad_norm": 0.14068326354026794, |
| "learning_rate": 9.883554622083904e-05, |
| "loss": 0.004, |
| "step": 11540 |
| }, |
| { |
| "epoch": 275.0, |
| "grad_norm": 0.13355973362922668, |
| "learning_rate": 9.88319958743828e-05, |
| "loss": 0.004, |
| "step": 11550 |
| }, |
| { |
| "epoch": 275.23809523809524, |
| "grad_norm": 0.07187960296869278, |
| "learning_rate": 9.882844018773291e-05, |
| "loss": 0.004, |
| "step": 11560 |
| }, |
| { |
| "epoch": 275.4761904761905, |
| "grad_norm": 0.10519888252019882, |
| "learning_rate": 9.882487916127823e-05, |
| "loss": 0.0037, |
| "step": 11570 |
| }, |
| { |
| "epoch": 275.7142857142857, |
| "grad_norm": 0.12247087806463242, |
| "learning_rate": 9.882131279540815e-05, |
| "loss": 0.0042, |
| "step": 11580 |
| }, |
| { |
| "epoch": 275.95238095238096, |
| "grad_norm": 0.11365683376789093, |
| "learning_rate": 9.881774109051271e-05, |
| "loss": 0.0036, |
| "step": 11590 |
| }, |
| { |
| "epoch": 276.1904761904762, |
| "grad_norm": 0.1097637489438057, |
| "learning_rate": 9.881416404698252e-05, |
| "loss": 0.0039, |
| "step": 11600 |
| }, |
| { |
| "epoch": 276.42857142857144, |
| "grad_norm": 0.11944371461868286, |
| "learning_rate": 9.881058166520873e-05, |
| "loss": 0.0042, |
| "step": 11610 |
| }, |
| { |
| "epoch": 276.6666666666667, |
| "grad_norm": 0.10190605372190475, |
| "learning_rate": 9.880699394558311e-05, |
| "loss": 0.005, |
| "step": 11620 |
| }, |
| { |
| "epoch": 276.9047619047619, |
| "grad_norm": 0.10024614632129669, |
| "learning_rate": 9.880340088849801e-05, |
| "loss": 0.0034, |
| "step": 11630 |
| }, |
| { |
| "epoch": 277.14285714285717, |
| "grad_norm": 0.09103026241064072, |
| "learning_rate": 9.879980249434637e-05, |
| "loss": 0.0045, |
| "step": 11640 |
| }, |
| { |
| "epoch": 277.3809523809524, |
| "grad_norm": 0.12165787070989609, |
| "learning_rate": 9.879619876352168e-05, |
| "loss": 0.004, |
| "step": 11650 |
| }, |
| { |
| "epoch": 277.6190476190476, |
| "grad_norm": 0.11813631653785706, |
| "learning_rate": 9.879258969641809e-05, |
| "loss": 0.0039, |
| "step": 11660 |
| }, |
| { |
| "epoch": 277.85714285714283, |
| "grad_norm": 0.13087496161460876, |
| "learning_rate": 9.878897529343023e-05, |
| "loss": 0.0043, |
| "step": 11670 |
| }, |
| { |
| "epoch": 278.0952380952381, |
| "grad_norm": 0.10560193657875061, |
| "learning_rate": 9.878535555495338e-05, |
| "loss": 0.0036, |
| "step": 11680 |
| }, |
| { |
| "epoch": 278.3333333333333, |
| "grad_norm": 0.10994599759578705, |
| "learning_rate": 9.87817304813834e-05, |
| "loss": 0.0044, |
| "step": 11690 |
| }, |
| { |
| "epoch": 278.57142857142856, |
| "grad_norm": 0.1181885227560997, |
| "learning_rate": 9.877810007311671e-05, |
| "loss": 0.0044, |
| "step": 11700 |
| }, |
| { |
| "epoch": 278.8095238095238, |
| "grad_norm": 0.09444352239370346, |
| "learning_rate": 9.877446433055035e-05, |
| "loss": 0.004, |
| "step": 11710 |
| }, |
| { |
| "epoch": 279.04761904761904, |
| "grad_norm": 0.0838547796010971, |
| "learning_rate": 9.877082325408191e-05, |
| "loss": 0.0042, |
| "step": 11720 |
| }, |
| { |
| "epoch": 279.2857142857143, |
| "grad_norm": 0.11787308007478714, |
| "learning_rate": 9.876717684410954e-05, |
| "loss": 0.0047, |
| "step": 11730 |
| }, |
| { |
| "epoch": 279.5238095238095, |
| "grad_norm": 0.11910413950681686, |
| "learning_rate": 9.876352510103204e-05, |
| "loss": 0.0041, |
| "step": 11740 |
| }, |
| { |
| "epoch": 279.76190476190476, |
| "grad_norm": 0.10170426219701767, |
| "learning_rate": 9.875986802524875e-05, |
| "loss": 0.004, |
| "step": 11750 |
| }, |
| { |
| "epoch": 280.0, |
| "grad_norm": 0.0790335163474083, |
| "learning_rate": 9.87562056171596e-05, |
| "loss": 0.0036, |
| "step": 11760 |
| }, |
| { |
| "epoch": 280.23809523809524, |
| "grad_norm": 0.10070935636758804, |
| "learning_rate": 9.875253787716511e-05, |
| "loss": 0.0037, |
| "step": 11770 |
| }, |
| { |
| "epoch": 280.4761904761905, |
| "grad_norm": 0.07996337860822678, |
| "learning_rate": 9.874886480566637e-05, |
| "loss": 0.0035, |
| "step": 11780 |
| }, |
| { |
| "epoch": 280.7142857142857, |
| "grad_norm": 0.10315964370965958, |
| "learning_rate": 9.874518640306507e-05, |
| "loss": 0.004, |
| "step": 11790 |
| }, |
| { |
| "epoch": 280.95238095238096, |
| "grad_norm": 0.08542545884847641, |
| "learning_rate": 9.874150266976347e-05, |
| "loss": 0.0043, |
| "step": 11800 |
| }, |
| { |
| "epoch": 281.1904761904762, |
| "grad_norm": 0.08652926236391068, |
| "learning_rate": 9.873781360616443e-05, |
| "loss": 0.004, |
| "step": 11810 |
| }, |
| { |
| "epoch": 281.42857142857144, |
| "grad_norm": 0.07653894275426865, |
| "learning_rate": 9.873411921267137e-05, |
| "loss": 0.0038, |
| "step": 11820 |
| }, |
| { |
| "epoch": 281.6666666666667, |
| "grad_norm": 0.09147300571203232, |
| "learning_rate": 9.873041948968829e-05, |
| "loss": 0.0033, |
| "step": 11830 |
| }, |
| { |
| "epoch": 281.9047619047619, |
| "grad_norm": 0.10387254506349564, |
| "learning_rate": 9.872671443761981e-05, |
| "loss": 0.0036, |
| "step": 11840 |
| }, |
| { |
| "epoch": 282.14285714285717, |
| "grad_norm": 0.08796127885580063, |
| "learning_rate": 9.872300405687109e-05, |
| "loss": 0.0044, |
| "step": 11850 |
| }, |
| { |
| "epoch": 282.3809523809524, |
| "grad_norm": 0.06728366017341614, |
| "learning_rate": 9.871928834784792e-05, |
| "loss": 0.0044, |
| "step": 11860 |
| }, |
| { |
| "epoch": 282.6190476190476, |
| "grad_norm": 0.09203702211380005, |
| "learning_rate": 9.871556731095661e-05, |
| "loss": 0.0036, |
| "step": 11870 |
| }, |
| { |
| "epoch": 282.85714285714283, |
| "grad_norm": 0.07837321609258652, |
| "learning_rate": 9.871184094660411e-05, |
| "loss": 0.0045, |
| "step": 11880 |
| }, |
| { |
| "epoch": 283.0952380952381, |
| "grad_norm": 0.11514673382043839, |
| "learning_rate": 9.870810925519791e-05, |
| "loss": 0.0042, |
| "step": 11890 |
| }, |
| { |
| "epoch": 283.3333333333333, |
| "grad_norm": 0.10458526015281677, |
| "learning_rate": 9.870437223714612e-05, |
| "loss": 0.0044, |
| "step": 11900 |
| }, |
| { |
| "epoch": 283.57142857142856, |
| "grad_norm": 0.08962014317512512, |
| "learning_rate": 9.87006298928574e-05, |
| "loss": 0.0045, |
| "step": 11910 |
| }, |
| { |
| "epoch": 283.8095238095238, |
| "grad_norm": 0.07565009593963623, |
| "learning_rate": 9.869688222274103e-05, |
| "loss": 0.004, |
| "step": 11920 |
| }, |
| { |
| "epoch": 284.04761904761904, |
| "grad_norm": 0.07436763495206833, |
| "learning_rate": 9.869312922720681e-05, |
| "loss": 0.0038, |
| "step": 11930 |
| }, |
| { |
| "epoch": 284.2857142857143, |
| "grad_norm": 0.07898060977458954, |
| "learning_rate": 9.868937090666521e-05, |
| "loss": 0.0038, |
| "step": 11940 |
| }, |
| { |
| "epoch": 284.5238095238095, |
| "grad_norm": 0.08099555969238281, |
| "learning_rate": 9.86856072615272e-05, |
| "loss": 0.0038, |
| "step": 11950 |
| }, |
| { |
| "epoch": 284.76190476190476, |
| "grad_norm": 0.06122984737157822, |
| "learning_rate": 9.868183829220438e-05, |
| "loss": 0.0039, |
| "step": 11960 |
| }, |
| { |
| "epoch": 285.0, |
| "grad_norm": 0.0688677504658699, |
| "learning_rate": 9.867806399910893e-05, |
| "loss": 0.0036, |
| "step": 11970 |
| }, |
| { |
| "epoch": 285.23809523809524, |
| "grad_norm": 0.1081698089838028, |
| "learning_rate": 9.867428438265356e-05, |
| "loss": 0.0045, |
| "step": 11980 |
| }, |
| { |
| "epoch": 285.4761904761905, |
| "grad_norm": 0.1028362512588501, |
| "learning_rate": 9.867049944325165e-05, |
| "loss": 0.0041, |
| "step": 11990 |
| }, |
| { |
| "epoch": 285.7142857142857, |
| "grad_norm": 0.10315411537885666, |
| "learning_rate": 9.86667091813171e-05, |
| "loss": 0.0039, |
| "step": 12000 |
| }, |
| { |
| "epoch": 285.95238095238096, |
| "grad_norm": 0.09599002450704575, |
| "learning_rate": 9.866291359726438e-05, |
| "loss": 0.0037, |
| "step": 12010 |
| }, |
| { |
| "epoch": 286.1904761904762, |
| "grad_norm": 0.07697649300098419, |
| "learning_rate": 9.865911269150861e-05, |
| "loss": 0.0037, |
| "step": 12020 |
| }, |
| { |
| "epoch": 286.42857142857144, |
| "grad_norm": 0.07925060391426086, |
| "learning_rate": 9.865530646446544e-05, |
| "loss": 0.0039, |
| "step": 12030 |
| }, |
| { |
| "epoch": 286.6666666666667, |
| "grad_norm": 0.07081872224807739, |
| "learning_rate": 9.86514949165511e-05, |
| "loss": 0.0038, |
| "step": 12040 |
| }, |
| { |
| "epoch": 286.9047619047619, |
| "grad_norm": 0.09956687688827515, |
| "learning_rate": 9.864767804818243e-05, |
| "loss": 0.0038, |
| "step": 12050 |
| }, |
| { |
| "epoch": 287.14285714285717, |
| "grad_norm": 0.11957503855228424, |
| "learning_rate": 9.86438558597768e-05, |
| "loss": 0.0039, |
| "step": 12060 |
| }, |
| { |
| "epoch": 287.3809523809524, |
| "grad_norm": 0.11079724133014679, |
| "learning_rate": 9.864002835175225e-05, |
| "loss": 0.0038, |
| "step": 12070 |
| }, |
| { |
| "epoch": 287.6190476190476, |
| "grad_norm": 0.10100255161523819, |
| "learning_rate": 9.863619552452734e-05, |
| "loss": 0.0046, |
| "step": 12080 |
| }, |
| { |
| "epoch": 287.85714285714283, |
| "grad_norm": 0.09080150723457336, |
| "learning_rate": 9.863235737852119e-05, |
| "loss": 0.0033, |
| "step": 12090 |
| }, |
| { |
| "epoch": 288.0952380952381, |
| "grad_norm": 0.08868679404258728, |
| "learning_rate": 9.862851391415356e-05, |
| "loss": 0.0034, |
| "step": 12100 |
| }, |
| { |
| "epoch": 288.3333333333333, |
| "grad_norm": 0.09848558902740479, |
| "learning_rate": 9.862466513184477e-05, |
| "loss": 0.0038, |
| "step": 12110 |
| }, |
| { |
| "epoch": 288.57142857142856, |
| "grad_norm": 0.08394346386194229, |
| "learning_rate": 9.86208110320157e-05, |
| "loss": 0.0039, |
| "step": 12120 |
| }, |
| { |
| "epoch": 288.8095238095238, |
| "grad_norm": 0.11095961928367615, |
| "learning_rate": 9.861695161508784e-05, |
| "loss": 0.0041, |
| "step": 12130 |
| }, |
| { |
| "epoch": 289.04761904761904, |
| "grad_norm": 0.12209942936897278, |
| "learning_rate": 9.861308688148324e-05, |
| "loss": 0.0042, |
| "step": 12140 |
| }, |
| { |
| "epoch": 289.2857142857143, |
| "grad_norm": 0.09307516366243362, |
| "learning_rate": 9.860921683162455e-05, |
| "loss": 0.0042, |
| "step": 12150 |
| }, |
| { |
| "epoch": 289.5238095238095, |
| "grad_norm": 0.08344811201095581, |
| "learning_rate": 9.860534146593499e-05, |
| "loss": 0.0044, |
| "step": 12160 |
| }, |
| { |
| "epoch": 289.76190476190476, |
| "grad_norm": 0.0826755091547966, |
| "learning_rate": 9.860146078483836e-05, |
| "loss": 0.004, |
| "step": 12170 |
| }, |
| { |
| "epoch": 290.0, |
| "grad_norm": 0.08045458048582077, |
| "learning_rate": 9.859757478875905e-05, |
| "loss": 0.004, |
| "step": 12180 |
| }, |
| { |
| "epoch": 290.23809523809524, |
| "grad_norm": 0.10925319790840149, |
| "learning_rate": 9.859368347812204e-05, |
| "loss": 0.0042, |
| "step": 12190 |
| }, |
| { |
| "epoch": 290.4761904761905, |
| "grad_norm": 0.09253482520580292, |
| "learning_rate": 9.858978685335285e-05, |
| "loss": 0.0051, |
| "step": 12200 |
| }, |
| { |
| "epoch": 290.7142857142857, |
| "grad_norm": 0.11133506149053574, |
| "learning_rate": 9.858588491487763e-05, |
| "loss": 0.0041, |
| "step": 12210 |
| }, |
| { |
| "epoch": 290.95238095238096, |
| "grad_norm": 0.11015759408473969, |
| "learning_rate": 9.858197766312308e-05, |
| "loss": 0.0039, |
| "step": 12220 |
| }, |
| { |
| "epoch": 291.1904761904762, |
| "grad_norm": 0.14213238656520844, |
| "learning_rate": 9.857806509851649e-05, |
| "loss": 0.0034, |
| "step": 12230 |
| }, |
| { |
| "epoch": 291.42857142857144, |
| "grad_norm": 0.13276730477809906, |
| "learning_rate": 9.857414722148574e-05, |
| "loss": 0.004, |
| "step": 12240 |
| }, |
| { |
| "epoch": 291.6666666666667, |
| "grad_norm": 0.07546578347682953, |
| "learning_rate": 9.857022403245928e-05, |
| "loss": 0.0038, |
| "step": 12250 |
| }, |
| { |
| "epoch": 291.9047619047619, |
| "grad_norm": 0.10682432353496552, |
| "learning_rate": 9.856629553186615e-05, |
| "loss": 0.005, |
| "step": 12260 |
| }, |
| { |
| "epoch": 292.14285714285717, |
| "grad_norm": 0.13046984374523163, |
| "learning_rate": 9.856236172013595e-05, |
| "loss": 0.0044, |
| "step": 12270 |
| }, |
| { |
| "epoch": 292.3809523809524, |
| "grad_norm": 0.14381255209445953, |
| "learning_rate": 9.85584225976989e-05, |
| "loss": 0.004, |
| "step": 12280 |
| }, |
| { |
| "epoch": 292.6190476190476, |
| "grad_norm": 0.10259222984313965, |
| "learning_rate": 9.855447816498575e-05, |
| "loss": 0.004, |
| "step": 12290 |
| }, |
| { |
| "epoch": 292.85714285714283, |
| "grad_norm": 0.14369122684001923, |
| "learning_rate": 9.855052842242787e-05, |
| "loss": 0.0047, |
| "step": 12300 |
| }, |
| { |
| "epoch": 293.0952380952381, |
| "grad_norm": 0.10692545771598816, |
| "learning_rate": 9.85465733704572e-05, |
| "loss": 0.0043, |
| "step": 12310 |
| }, |
| { |
| "epoch": 293.3333333333333, |
| "grad_norm": 0.1140999123454094, |
| "learning_rate": 9.854261300950624e-05, |
| "loss": 0.0042, |
| "step": 12320 |
| }, |
| { |
| "epoch": 293.57142857142856, |
| "grad_norm": 0.10540694743394852, |
| "learning_rate": 9.853864734000813e-05, |
| "loss": 0.0041, |
| "step": 12330 |
| }, |
| { |
| "epoch": 293.8095238095238, |
| "grad_norm": 0.13039545714855194, |
| "learning_rate": 9.85346763623965e-05, |
| "loss": 0.0041, |
| "step": 12340 |
| }, |
| { |
| "epoch": 294.04761904761904, |
| "grad_norm": 0.1073756143450737, |
| "learning_rate": 9.853070007710564e-05, |
| "loss": 0.0034, |
| "step": 12350 |
| }, |
| { |
| "epoch": 294.2857142857143, |
| "grad_norm": 0.11053822189569473, |
| "learning_rate": 9.85267184845704e-05, |
| "loss": 0.0041, |
| "step": 12360 |
| }, |
| { |
| "epoch": 294.5238095238095, |
| "grad_norm": 0.11554089933633804, |
| "learning_rate": 9.852273158522616e-05, |
| "loss": 0.0038, |
| "step": 12370 |
| }, |
| { |
| "epoch": 294.76190476190476, |
| "grad_norm": 0.09359762817621231, |
| "learning_rate": 9.851873937950896e-05, |
| "loss": 0.0036, |
| "step": 12380 |
| }, |
| { |
| "epoch": 295.0, |
| "grad_norm": 0.11268861591815948, |
| "learning_rate": 9.851474186785537e-05, |
| "loss": 0.004, |
| "step": 12390 |
| }, |
| { |
| "epoch": 295.23809523809524, |
| "grad_norm": 0.10756248235702515, |
| "learning_rate": 9.851073905070254e-05, |
| "loss": 0.0042, |
| "step": 12400 |
| }, |
| { |
| "epoch": 295.4761904761905, |
| "grad_norm": 0.11122822761535645, |
| "learning_rate": 9.850673092848824e-05, |
| "loss": 0.0042, |
| "step": 12410 |
| }, |
| { |
| "epoch": 295.7142857142857, |
| "grad_norm": 0.1311500370502472, |
| "learning_rate": 9.850271750165077e-05, |
| "loss": 0.0038, |
| "step": 12420 |
| }, |
| { |
| "epoch": 295.95238095238096, |
| "grad_norm": 0.11282636225223541, |
| "learning_rate": 9.849869877062902e-05, |
| "loss": 0.0042, |
| "step": 12430 |
| }, |
| { |
| "epoch": 296.1904761904762, |
| "grad_norm": 0.15318289399147034, |
| "learning_rate": 9.849467473586252e-05, |
| "loss": 0.0042, |
| "step": 12440 |
| }, |
| { |
| "epoch": 296.42857142857144, |
| "grad_norm": 0.1262063980102539, |
| "learning_rate": 9.849064539779127e-05, |
| "loss": 0.0039, |
| "step": 12450 |
| }, |
| { |
| "epoch": 296.6666666666667, |
| "grad_norm": 0.11098899692296982, |
| "learning_rate": 9.848661075685594e-05, |
| "loss": 0.0045, |
| "step": 12460 |
| }, |
| { |
| "epoch": 296.9047619047619, |
| "grad_norm": 0.09523414075374603, |
| "learning_rate": 9.848257081349778e-05, |
| "loss": 0.0042, |
| "step": 12470 |
| }, |
| { |
| "epoch": 297.14285714285717, |
| "grad_norm": 0.07703749090433121, |
| "learning_rate": 9.847852556815856e-05, |
| "loss": 0.0037, |
| "step": 12480 |
| }, |
| { |
| "epoch": 297.3809523809524, |
| "grad_norm": 0.13233567774295807, |
| "learning_rate": 9.847447502128067e-05, |
| "loss": 0.004, |
| "step": 12490 |
| }, |
| { |
| "epoch": 297.6190476190476, |
| "grad_norm": 0.17217761278152466, |
| "learning_rate": 9.847041917330708e-05, |
| "loss": 0.0039, |
| "step": 12500 |
| }, |
| { |
| "epoch": 297.85714285714283, |
| "grad_norm": 0.12014293670654297, |
| "learning_rate": 9.846635802468132e-05, |
| "loss": 0.0039, |
| "step": 12510 |
| }, |
| { |
| "epoch": 298.0952380952381, |
| "grad_norm": 0.1257663518190384, |
| "learning_rate": 9.84622915758475e-05, |
| "loss": 0.0047, |
| "step": 12520 |
| }, |
| { |
| "epoch": 298.3333333333333, |
| "grad_norm": 0.12794090807437897, |
| "learning_rate": 9.845821982725034e-05, |
| "loss": 0.0038, |
| "step": 12530 |
| }, |
| { |
| "epoch": 298.57142857142856, |
| "grad_norm": 0.09410233795642853, |
| "learning_rate": 9.845414277933514e-05, |
| "loss": 0.004, |
| "step": 12540 |
| }, |
| { |
| "epoch": 298.8095238095238, |
| "grad_norm": 0.11727259308099747, |
| "learning_rate": 9.845006043254771e-05, |
| "loss": 0.0049, |
| "step": 12550 |
| }, |
| { |
| "epoch": 299.04761904761904, |
| "grad_norm": 0.0857493206858635, |
| "learning_rate": 9.844597278733451e-05, |
| "loss": 0.0035, |
| "step": 12560 |
| }, |
| { |
| "epoch": 299.2857142857143, |
| "grad_norm": 0.10817568749189377, |
| "learning_rate": 9.844187984414259e-05, |
| "loss": 0.0037, |
| "step": 12570 |
| }, |
| { |
| "epoch": 299.5238095238095, |
| "grad_norm": 0.12444339692592621, |
| "learning_rate": 9.84377816034195e-05, |
| "loss": 0.0038, |
| "step": 12580 |
| }, |
| { |
| "epoch": 299.76190476190476, |
| "grad_norm": 0.12441998720169067, |
| "learning_rate": 9.843367806561345e-05, |
| "loss": 0.0036, |
| "step": 12590 |
| }, |
| { |
| "epoch": 300.0, |
| "grad_norm": 0.07153680920600891, |
| "learning_rate": 9.842956923117317e-05, |
| "loss": 0.004, |
| "step": 12600 |
| }, |
| { |
| "epoch": 300.23809523809524, |
| "grad_norm": 0.13482527434825897, |
| "learning_rate": 9.842545510054802e-05, |
| "loss": 0.0038, |
| "step": 12610 |
| }, |
| { |
| "epoch": 300.4761904761905, |
| "grad_norm": 0.13475850224494934, |
| "learning_rate": 9.842133567418792e-05, |
| "loss": 0.005, |
| "step": 12620 |
| }, |
| { |
| "epoch": 300.7142857142857, |
| "grad_norm": 0.11899552494287491, |
| "learning_rate": 9.841721095254333e-05, |
| "loss": 0.0038, |
| "step": 12630 |
| }, |
| { |
| "epoch": 300.95238095238096, |
| "grad_norm": 0.07279888540506363, |
| "learning_rate": 9.841308093606537e-05, |
| "loss": 0.0039, |
| "step": 12640 |
| }, |
| { |
| "epoch": 301.1904761904762, |
| "grad_norm": 0.1672104299068451, |
| "learning_rate": 9.840894562520565e-05, |
| "loss": 0.0042, |
| "step": 12650 |
| }, |
| { |
| "epoch": 301.42857142857144, |
| "grad_norm": 0.17426931858062744, |
| "learning_rate": 9.840480502041642e-05, |
| "loss": 0.0046, |
| "step": 12660 |
| }, |
| { |
| "epoch": 301.6666666666667, |
| "grad_norm": 0.09579557925462723, |
| "learning_rate": 9.840065912215049e-05, |
| "loss": 0.0039, |
| "step": 12670 |
| }, |
| { |
| "epoch": 301.9047619047619, |
| "grad_norm": 0.09822675585746765, |
| "learning_rate": 9.839650793086124e-05, |
| "loss": 0.0041, |
| "step": 12680 |
| }, |
| { |
| "epoch": 302.14285714285717, |
| "grad_norm": 0.12241455167531967, |
| "learning_rate": 9.839235144700265e-05, |
| "loss": 0.0042, |
| "step": 12690 |
| }, |
| { |
| "epoch": 302.3809523809524, |
| "grad_norm": 0.14615283906459808, |
| "learning_rate": 9.838818967102926e-05, |
| "loss": 0.0049, |
| "step": 12700 |
| }, |
| { |
| "epoch": 302.6190476190476, |
| "grad_norm": 0.08859282732009888, |
| "learning_rate": 9.83840226033962e-05, |
| "loss": 0.0038, |
| "step": 12710 |
| }, |
| { |
| "epoch": 302.85714285714283, |
| "grad_norm": 0.11785812675952911, |
| "learning_rate": 9.837985024455918e-05, |
| "loss": 0.0039, |
| "step": 12720 |
| }, |
| { |
| "epoch": 303.0952380952381, |
| "grad_norm": 0.11543699353933334, |
| "learning_rate": 9.837567259497447e-05, |
| "loss": 0.0038, |
| "step": 12730 |
| }, |
| { |
| "epoch": 303.3333333333333, |
| "grad_norm": 0.1060631051659584, |
| "learning_rate": 9.837148965509894e-05, |
| "loss": 0.0042, |
| "step": 12740 |
| }, |
| { |
| "epoch": 303.57142857142856, |
| "grad_norm": 0.11200378090143204, |
| "learning_rate": 9.836730142539001e-05, |
| "loss": 0.0042, |
| "step": 12750 |
| }, |
| { |
| "epoch": 303.8095238095238, |
| "grad_norm": 0.09545325487852097, |
| "learning_rate": 9.836310790630574e-05, |
| "loss": 0.0034, |
| "step": 12760 |
| }, |
| { |
| "epoch": 304.04761904761904, |
| "grad_norm": 0.1236363872885704, |
| "learning_rate": 9.83589090983047e-05, |
| "loss": 0.0042, |
| "step": 12770 |
| }, |
| { |
| "epoch": 304.2857142857143, |
| "grad_norm": 0.09418749809265137, |
| "learning_rate": 9.835470500184605e-05, |
| "loss": 0.0035, |
| "step": 12780 |
| }, |
| { |
| "epoch": 304.5238095238095, |
| "grad_norm": 0.11235824972391129, |
| "learning_rate": 9.835049561738957e-05, |
| "loss": 0.0037, |
| "step": 12790 |
| }, |
| { |
| "epoch": 304.76190476190476, |
| "grad_norm": 0.13689745962619781, |
| "learning_rate": 9.834628094539558e-05, |
| "loss": 0.0042, |
| "step": 12800 |
| }, |
| { |
| "epoch": 305.0, |
| "grad_norm": 0.09412897378206253, |
| "learning_rate": 9.834206098632499e-05, |
| "loss": 0.0042, |
| "step": 12810 |
| }, |
| { |
| "epoch": 305.23809523809524, |
| "grad_norm": 0.0985070988535881, |
| "learning_rate": 9.833783574063931e-05, |
| "loss": 0.0037, |
| "step": 12820 |
| }, |
| { |
| "epoch": 305.4761904761905, |
| "grad_norm": 0.08770821988582611, |
| "learning_rate": 9.833360520880058e-05, |
| "loss": 0.0039, |
| "step": 12830 |
| }, |
| { |
| "epoch": 305.7142857142857, |
| "grad_norm": 0.10579407960176468, |
| "learning_rate": 9.832936939127144e-05, |
| "loss": 0.0037, |
| "step": 12840 |
| }, |
| { |
| "epoch": 305.95238095238096, |
| "grad_norm": 0.10513468086719513, |
| "learning_rate": 9.832512828851515e-05, |
| "loss": 0.0041, |
| "step": 12850 |
| }, |
| { |
| "epoch": 306.1904761904762, |
| "grad_norm": 0.09064383804798126, |
| "learning_rate": 9.832088190099546e-05, |
| "loss": 0.0037, |
| "step": 12860 |
| }, |
| { |
| "epoch": 306.42857142857144, |
| "grad_norm": 0.12547598779201508, |
| "learning_rate": 9.831663022917679e-05, |
| "loss": 0.0041, |
| "step": 12870 |
| }, |
| { |
| "epoch": 306.6666666666667, |
| "grad_norm": 0.1223834902048111, |
| "learning_rate": 9.831237327352407e-05, |
| "loss": 0.0036, |
| "step": 12880 |
| }, |
| { |
| "epoch": 306.9047619047619, |
| "grad_norm": 0.12701621651649475, |
| "learning_rate": 9.830811103450286e-05, |
| "loss": 0.0044, |
| "step": 12890 |
| }, |
| { |
| "epoch": 307.14285714285717, |
| "grad_norm": 0.1263977438211441, |
| "learning_rate": 9.830384351257924e-05, |
| "loss": 0.0038, |
| "step": 12900 |
| }, |
| { |
| "epoch": 307.3809523809524, |
| "grad_norm": 0.12813256680965424, |
| "learning_rate": 9.829957070821993e-05, |
| "loss": 0.0053, |
| "step": 12910 |
| }, |
| { |
| "epoch": 307.6190476190476, |
| "grad_norm": 0.08021179586648941, |
| "learning_rate": 9.829529262189218e-05, |
| "loss": 0.0038, |
| "step": 12920 |
| }, |
| { |
| "epoch": 307.85714285714283, |
| "grad_norm": 0.11336914449930191, |
| "learning_rate": 9.829100925406385e-05, |
| "loss": 0.004, |
| "step": 12930 |
| }, |
| { |
| "epoch": 308.0952380952381, |
| "grad_norm": 0.10496781021356583, |
| "learning_rate": 9.828672060520333e-05, |
| "loss": 0.0037, |
| "step": 12940 |
| }, |
| { |
| "epoch": 308.3333333333333, |
| "grad_norm": 0.07546764612197876, |
| "learning_rate": 9.828242667577966e-05, |
| "loss": 0.0035, |
| "step": 12950 |
| }, |
| { |
| "epoch": 308.57142857142856, |
| "grad_norm": 0.09004230797290802, |
| "learning_rate": 9.82781274662624e-05, |
| "loss": 0.0034, |
| "step": 12960 |
| }, |
| { |
| "epoch": 308.8095238095238, |
| "grad_norm": 0.11850307881832123, |
| "learning_rate": 9.82738229771217e-05, |
| "loss": 0.0043, |
| "step": 12970 |
| }, |
| { |
| "epoch": 309.04761904761904, |
| "grad_norm": 0.08362831175327301, |
| "learning_rate": 9.826951320882829e-05, |
| "loss": 0.0031, |
| "step": 12980 |
| }, |
| { |
| "epoch": 309.2857142857143, |
| "grad_norm": 0.06721550971269608, |
| "learning_rate": 9.826519816185351e-05, |
| "loss": 0.0039, |
| "step": 12990 |
| }, |
| { |
| "epoch": 309.5238095238095, |
| "grad_norm": 0.082874596118927, |
| "learning_rate": 9.826087783666921e-05, |
| "loss": 0.0041, |
| "step": 13000 |
| }, |
| { |
| "epoch": 309.76190476190476, |
| "grad_norm": 0.07900836318731308, |
| "learning_rate": 9.825655223374787e-05, |
| "loss": 0.0037, |
| "step": 13010 |
| }, |
| { |
| "epoch": 310.0, |
| "grad_norm": 0.06957385689020157, |
| "learning_rate": 9.825222135356253e-05, |
| "loss": 0.0031, |
| "step": 13020 |
| }, |
| { |
| "epoch": 310.23809523809524, |
| "grad_norm": 0.07454993575811386, |
| "learning_rate": 9.82478851965868e-05, |
| "loss": 0.0036, |
| "step": 13030 |
| }, |
| { |
| "epoch": 310.4761904761905, |
| "grad_norm": 0.06368528306484222, |
| "learning_rate": 9.82435437632949e-05, |
| "loss": 0.0032, |
| "step": 13040 |
| }, |
| { |
| "epoch": 310.7142857142857, |
| "grad_norm": 0.080793097615242, |
| "learning_rate": 9.823919705416158e-05, |
| "loss": 0.0038, |
| "step": 13050 |
| }, |
| { |
| "epoch": 310.95238095238096, |
| "grad_norm": 0.08735798299312592, |
| "learning_rate": 9.82348450696622e-05, |
| "loss": 0.004, |
| "step": 13060 |
| }, |
| { |
| "epoch": 311.1904761904762, |
| "grad_norm": 0.09717802703380585, |
| "learning_rate": 9.823048781027268e-05, |
| "loss": 0.0034, |
| "step": 13070 |
| }, |
| { |
| "epoch": 311.42857142857144, |
| "grad_norm": 0.07490011304616928, |
| "learning_rate": 9.822612527646953e-05, |
| "loss": 0.0038, |
| "step": 13080 |
| }, |
| { |
| "epoch": 311.6666666666667, |
| "grad_norm": 0.10628506541252136, |
| "learning_rate": 9.822175746872984e-05, |
| "loss": 0.0034, |
| "step": 13090 |
| }, |
| { |
| "epoch": 311.9047619047619, |
| "grad_norm": 0.10404651612043381, |
| "learning_rate": 9.821738438753123e-05, |
| "loss": 0.004, |
| "step": 13100 |
| }, |
| { |
| "epoch": 312.14285714285717, |
| "grad_norm": 0.08945135027170181, |
| "learning_rate": 9.821300603335196e-05, |
| "loss": 0.0037, |
| "step": 13110 |
| }, |
| { |
| "epoch": 312.3809523809524, |
| "grad_norm": 0.0790078416466713, |
| "learning_rate": 9.820862240667085e-05, |
| "loss": 0.0035, |
| "step": 13120 |
| }, |
| { |
| "epoch": 312.6190476190476, |
| "grad_norm": 0.08452211320400238, |
| "learning_rate": 9.820423350796726e-05, |
| "loss": 0.0039, |
| "step": 13130 |
| }, |
| { |
| "epoch": 312.85714285714283, |
| "grad_norm": 0.11405911296606064, |
| "learning_rate": 9.819983933772118e-05, |
| "loss": 0.0032, |
| "step": 13140 |
| }, |
| { |
| "epoch": 313.0952380952381, |
| "grad_norm": 0.11462898552417755, |
| "learning_rate": 9.819543989641314e-05, |
| "loss": 0.004, |
| "step": 13150 |
| }, |
| { |
| "epoch": 313.3333333333333, |
| "grad_norm": 0.10320712625980377, |
| "learning_rate": 9.819103518452423e-05, |
| "loss": 0.0039, |
| "step": 13160 |
| }, |
| { |
| "epoch": 313.57142857142856, |
| "grad_norm": 0.07565370947122574, |
| "learning_rate": 9.818662520253618e-05, |
| "loss": 0.0046, |
| "step": 13170 |
| }, |
| { |
| "epoch": 313.8095238095238, |
| "grad_norm": 0.12637363374233246, |
| "learning_rate": 9.818220995093126e-05, |
| "loss": 0.0038, |
| "step": 13180 |
| }, |
| { |
| "epoch": 314.04761904761904, |
| "grad_norm": 0.08819403499364853, |
| "learning_rate": 9.817778943019228e-05, |
| "loss": 0.0036, |
| "step": 13190 |
| }, |
| { |
| "epoch": 314.2857142857143, |
| "grad_norm": 0.07170990854501724, |
| "learning_rate": 9.81733636408027e-05, |
| "loss": 0.0035, |
| "step": 13200 |
| }, |
| { |
| "epoch": 314.5238095238095, |
| "grad_norm": 0.08225242048501968, |
| "learning_rate": 9.816893258324649e-05, |
| "loss": 0.0038, |
| "step": 13210 |
| }, |
| { |
| "epoch": 314.76190476190476, |
| "grad_norm": 0.09398693591356277, |
| "learning_rate": 9.816449625800823e-05, |
| "loss": 0.0032, |
| "step": 13220 |
| }, |
| { |
| "epoch": 315.0, |
| "grad_norm": 0.11972594261169434, |
| "learning_rate": 9.816005466557308e-05, |
| "loss": 0.0034, |
| "step": 13230 |
| }, |
| { |
| "epoch": 315.23809523809524, |
| "grad_norm": 0.11073167622089386, |
| "learning_rate": 9.815560780642674e-05, |
| "loss": 0.0039, |
| "step": 13240 |
| }, |
| { |
| "epoch": 315.4761904761905, |
| "grad_norm": 0.12790706753730774, |
| "learning_rate": 9.815115568105555e-05, |
| "loss": 0.0034, |
| "step": 13250 |
| }, |
| { |
| "epoch": 315.7142857142857, |
| "grad_norm": 0.07783694565296173, |
| "learning_rate": 9.814669828994638e-05, |
| "loss": 0.0042, |
| "step": 13260 |
| }, |
| { |
| "epoch": 315.95238095238096, |
| "grad_norm": 0.12165135890245438, |
| "learning_rate": 9.814223563358665e-05, |
| "loss": 0.004, |
| "step": 13270 |
| }, |
| { |
| "epoch": 316.1904761904762, |
| "grad_norm": 0.13635115325450897, |
| "learning_rate": 9.813776771246443e-05, |
| "loss": 0.0041, |
| "step": 13280 |
| }, |
| { |
| "epoch": 316.42857142857144, |
| "grad_norm": 0.10919512808322906, |
| "learning_rate": 9.813329452706829e-05, |
| "loss": 0.0033, |
| "step": 13290 |
| }, |
| { |
| "epoch": 316.6666666666667, |
| "grad_norm": 0.11394019424915314, |
| "learning_rate": 9.812881607788744e-05, |
| "loss": 0.004, |
| "step": 13300 |
| }, |
| { |
| "epoch": 316.9047619047619, |
| "grad_norm": 0.09364115446805954, |
| "learning_rate": 9.812433236541163e-05, |
| "loss": 0.0037, |
| "step": 13310 |
| }, |
| { |
| "epoch": 317.14285714285717, |
| "grad_norm": 0.12433009594678879, |
| "learning_rate": 9.811984339013116e-05, |
| "loss": 0.0037, |
| "step": 13320 |
| }, |
| { |
| "epoch": 317.3809523809524, |
| "grad_norm": 0.14501987397670746, |
| "learning_rate": 9.811534915253698e-05, |
| "loss": 0.0049, |
| "step": 13330 |
| }, |
| { |
| "epoch": 317.6190476190476, |
| "grad_norm": 0.12047111243009567, |
| "learning_rate": 9.811084965312056e-05, |
| "loss": 0.004, |
| "step": 13340 |
| }, |
| { |
| "epoch": 317.85714285714283, |
| "grad_norm": 0.10728151351213455, |
| "learning_rate": 9.810634489237396e-05, |
| "loss": 0.0039, |
| "step": 13350 |
| }, |
| { |
| "epoch": 318.0952380952381, |
| "grad_norm": 0.12313900887966156, |
| "learning_rate": 9.81018348707898e-05, |
| "loss": 0.0037, |
| "step": 13360 |
| }, |
| { |
| "epoch": 318.3333333333333, |
| "grad_norm": 0.10879870504140854, |
| "learning_rate": 9.809731958886131e-05, |
| "loss": 0.0035, |
| "step": 13370 |
| }, |
| { |
| "epoch": 318.57142857142856, |
| "grad_norm": 0.1128934696316719, |
| "learning_rate": 9.809279904708224e-05, |
| "loss": 0.0041, |
| "step": 13380 |
| }, |
| { |
| "epoch": 318.8095238095238, |
| "grad_norm": 0.10997622460126877, |
| "learning_rate": 9.808827324594699e-05, |
| "loss": 0.0038, |
| "step": 13390 |
| }, |
| { |
| "epoch": 319.04761904761904, |
| "grad_norm": 0.12310972064733505, |
| "learning_rate": 9.808374218595046e-05, |
| "loss": 0.0036, |
| "step": 13400 |
| }, |
| { |
| "epoch": 319.2857142857143, |
| "grad_norm": 0.10395687073469162, |
| "learning_rate": 9.80792058675882e-05, |
| "loss": 0.0039, |
| "step": 13410 |
| }, |
| { |
| "epoch": 319.5238095238095, |
| "grad_norm": 0.1012033000588417, |
| "learning_rate": 9.807466429135627e-05, |
| "loss": 0.0041, |
| "step": 13420 |
| }, |
| { |
| "epoch": 319.76190476190476, |
| "grad_norm": 0.09394651651382446, |
| "learning_rate": 9.807011745775132e-05, |
| "loss": 0.0039, |
| "step": 13430 |
| }, |
| { |
| "epoch": 320.0, |
| "grad_norm": 0.08031048625707626, |
| "learning_rate": 9.806556536727061e-05, |
| "loss": 0.0036, |
| "step": 13440 |
| }, |
| { |
| "epoch": 320.23809523809524, |
| "grad_norm": 0.1040155366063118, |
| "learning_rate": 9.806100802041193e-05, |
| "loss": 0.0037, |
| "step": 13450 |
| }, |
| { |
| "epoch": 320.4761904761905, |
| "grad_norm": 0.11000386625528336, |
| "learning_rate": 9.805644541767368e-05, |
| "loss": 0.0035, |
| "step": 13460 |
| }, |
| { |
| "epoch": 320.7142857142857, |
| "grad_norm": 0.12757132947444916, |
| "learning_rate": 9.805187755955478e-05, |
| "loss": 0.0039, |
| "step": 13470 |
| }, |
| { |
| "epoch": 320.95238095238096, |
| "grad_norm": 0.11333800107240677, |
| "learning_rate": 9.804730444655483e-05, |
| "loss": 0.0044, |
| "step": 13480 |
| }, |
| { |
| "epoch": 321.1904761904762, |
| "grad_norm": 0.098880834877491, |
| "learning_rate": 9.804272607917388e-05, |
| "loss": 0.0037, |
| "step": 13490 |
| }, |
| { |
| "epoch": 321.42857142857144, |
| "grad_norm": 0.10119649767875671, |
| "learning_rate": 9.803814245791265e-05, |
| "loss": 0.0045, |
| "step": 13500 |
| }, |
| { |
| "epoch": 321.6666666666667, |
| "grad_norm": 0.0872950404882431, |
| "learning_rate": 9.803355358327239e-05, |
| "loss": 0.0043, |
| "step": 13510 |
| }, |
| { |
| "epoch": 321.9047619047619, |
| "grad_norm": 0.0718102753162384, |
| "learning_rate": 9.802895945575492e-05, |
| "loss": 0.0034, |
| "step": 13520 |
| }, |
| { |
| "epoch": 322.14285714285717, |
| "grad_norm": 0.12764829397201538, |
| "learning_rate": 9.802436007586266e-05, |
| "loss": 0.0042, |
| "step": 13530 |
| }, |
| { |
| "epoch": 322.3809523809524, |
| "grad_norm": 0.13190992176532745, |
| "learning_rate": 9.801975544409858e-05, |
| "loss": 0.0041, |
| "step": 13540 |
| }, |
| { |
| "epoch": 322.6190476190476, |
| "grad_norm": 0.1053733378648758, |
| "learning_rate": 9.801514556096625e-05, |
| "loss": 0.0037, |
| "step": 13550 |
| }, |
| { |
| "epoch": 322.85714285714283, |
| "grad_norm": 0.12765581905841827, |
| "learning_rate": 9.801053042696977e-05, |
| "loss": 0.0038, |
| "step": 13560 |
| }, |
| { |
| "epoch": 323.0952380952381, |
| "grad_norm": 0.10003163665533066, |
| "learning_rate": 9.800591004261388e-05, |
| "loss": 0.0036, |
| "step": 13570 |
| }, |
| { |
| "epoch": 323.3333333333333, |
| "grad_norm": 0.15174482762813568, |
| "learning_rate": 9.800128440840385e-05, |
| "loss": 0.0041, |
| "step": 13580 |
| }, |
| { |
| "epoch": 323.57142857142856, |
| "grad_norm": 0.1300843060016632, |
| "learning_rate": 9.799665352484552e-05, |
| "loss": 0.0044, |
| "step": 13590 |
| }, |
| { |
| "epoch": 323.8095238095238, |
| "grad_norm": 0.10586731135845184, |
| "learning_rate": 9.799201739244532e-05, |
| "loss": 0.0037, |
| "step": 13600 |
| }, |
| { |
| "epoch": 324.04761904761904, |
| "grad_norm": 0.14149481058120728, |
| "learning_rate": 9.798737601171025e-05, |
| "loss": 0.0041, |
| "step": 13610 |
| }, |
| { |
| "epoch": 324.2857142857143, |
| "grad_norm": 0.10894253104925156, |
| "learning_rate": 9.79827293831479e-05, |
| "loss": 0.0043, |
| "step": 13620 |
| }, |
| { |
| "epoch": 324.5238095238095, |
| "grad_norm": 0.08938633650541306, |
| "learning_rate": 9.797807750726638e-05, |
| "loss": 0.0036, |
| "step": 13630 |
| }, |
| { |
| "epoch": 324.76190476190476, |
| "grad_norm": 0.11075068265199661, |
| "learning_rate": 9.797342038457446e-05, |
| "loss": 0.0037, |
| "step": 13640 |
| }, |
| { |
| "epoch": 325.0, |
| "grad_norm": 0.13086655735969543, |
| "learning_rate": 9.796875801558141e-05, |
| "loss": 0.0035, |
| "step": 13650 |
| }, |
| { |
| "epoch": 325.23809523809524, |
| "grad_norm": 0.11114643514156342, |
| "learning_rate": 9.79640904007971e-05, |
| "loss": 0.0039, |
| "step": 13660 |
| }, |
| { |
| "epoch": 325.4761904761905, |
| "grad_norm": 0.13546904921531677, |
| "learning_rate": 9.795941754073199e-05, |
| "loss": 0.0039, |
| "step": 13670 |
| }, |
| { |
| "epoch": 325.7142857142857, |
| "grad_norm": 0.10233338922262192, |
| "learning_rate": 9.795473943589705e-05, |
| "loss": 0.0033, |
| "step": 13680 |
| }, |
| { |
| "epoch": 325.95238095238096, |
| "grad_norm": 0.11478246748447418, |
| "learning_rate": 9.795005608680394e-05, |
| "loss": 0.0046, |
| "step": 13690 |
| }, |
| { |
| "epoch": 326.1904761904762, |
| "grad_norm": 0.06689493358135223, |
| "learning_rate": 9.794536749396477e-05, |
| "loss": 0.0037, |
| "step": 13700 |
| }, |
| { |
| "epoch": 326.42857142857144, |
| "grad_norm": 0.09696056693792343, |
| "learning_rate": 9.79406736578923e-05, |
| "loss": 0.0039, |
| "step": 13710 |
| }, |
| { |
| "epoch": 326.6666666666667, |
| "grad_norm": 0.1278267800807953, |
| "learning_rate": 9.793597457909984e-05, |
| "loss": 0.0036, |
| "step": 13720 |
| }, |
| { |
| "epoch": 326.9047619047619, |
| "grad_norm": 0.10880826413631439, |
| "learning_rate": 9.793127025810127e-05, |
| "loss": 0.0046, |
| "step": 13730 |
| }, |
| { |
| "epoch": 327.14285714285717, |
| "grad_norm": 0.09085580706596375, |
| "learning_rate": 9.792656069541104e-05, |
| "loss": 0.0038, |
| "step": 13740 |
| }, |
| { |
| "epoch": 327.3809523809524, |
| "grad_norm": 0.11140697449445724, |
| "learning_rate": 9.79218458915442e-05, |
| "loss": 0.0035, |
| "step": 13750 |
| }, |
| { |
| "epoch": 327.6190476190476, |
| "grad_norm": 0.13374167680740356, |
| "learning_rate": 9.791712584701634e-05, |
| "loss": 0.0043, |
| "step": 13760 |
| }, |
| { |
| "epoch": 327.85714285714283, |
| "grad_norm": 0.09034523367881775, |
| "learning_rate": 9.791240056234364e-05, |
| "loss": 0.0035, |
| "step": 13770 |
| }, |
| { |
| "epoch": 328.0952380952381, |
| "grad_norm": 0.09665458649396896, |
| "learning_rate": 9.790767003804283e-05, |
| "loss": 0.004, |
| "step": 13780 |
| }, |
| { |
| "epoch": 328.3333333333333, |
| "grad_norm": 0.0883466899394989, |
| "learning_rate": 9.790293427463126e-05, |
| "loss": 0.0036, |
| "step": 13790 |
| }, |
| { |
| "epoch": 328.57142857142856, |
| "grad_norm": 0.09167914092540741, |
| "learning_rate": 9.789819327262684e-05, |
| "loss": 0.0042, |
| "step": 13800 |
| }, |
| { |
| "epoch": 328.8095238095238, |
| "grad_norm": 0.10648363828659058, |
| "learning_rate": 9.7893447032548e-05, |
| "loss": 0.0032, |
| "step": 13810 |
| }, |
| { |
| "epoch": 329.04761904761904, |
| "grad_norm": 0.09039238095283508, |
| "learning_rate": 9.78886955549138e-05, |
| "loss": 0.004, |
| "step": 13820 |
| }, |
| { |
| "epoch": 329.2857142857143, |
| "grad_norm": 0.09328264743089676, |
| "learning_rate": 9.788393884024387e-05, |
| "loss": 0.0039, |
| "step": 13830 |
| }, |
| { |
| "epoch": 329.5238095238095, |
| "grad_norm": 0.07586466521024704, |
| "learning_rate": 9.787917688905836e-05, |
| "loss": 0.0039, |
| "step": 13840 |
| }, |
| { |
| "epoch": 329.76190476190476, |
| "grad_norm": 0.07730385661125183, |
| "learning_rate": 9.787440970187807e-05, |
| "loss": 0.0042, |
| "step": 13850 |
| }, |
| { |
| "epoch": 330.0, |
| "grad_norm": 0.07398882508277893, |
| "learning_rate": 9.786963727922429e-05, |
| "loss": 0.0033, |
| "step": 13860 |
| }, |
| { |
| "epoch": 330.23809523809524, |
| "grad_norm": 0.10702811926603317, |
| "learning_rate": 9.786485962161897e-05, |
| "loss": 0.0033, |
| "step": 13870 |
| }, |
| { |
| "epoch": 330.4761904761905, |
| "grad_norm": 0.07463377714157104, |
| "learning_rate": 9.786007672958455e-05, |
| "loss": 0.0039, |
| "step": 13880 |
| }, |
| { |
| "epoch": 330.7142857142857, |
| "grad_norm": 0.08346113562583923, |
| "learning_rate": 9.78552886036441e-05, |
| "loss": 0.0031, |
| "step": 13890 |
| }, |
| { |
| "epoch": 330.95238095238096, |
| "grad_norm": 0.0860011875629425, |
| "learning_rate": 9.785049524432124e-05, |
| "loss": 0.0043, |
| "step": 13900 |
| }, |
| { |
| "epoch": 331.1904761904762, |
| "grad_norm": 0.06752394884824753, |
| "learning_rate": 9.784569665214016e-05, |
| "loss": 0.0045, |
| "step": 13910 |
| }, |
| { |
| "epoch": 331.42857142857144, |
| "grad_norm": 0.08143623173236847, |
| "learning_rate": 9.784089282762563e-05, |
| "loss": 0.004, |
| "step": 13920 |
| }, |
| { |
| "epoch": 331.6666666666667, |
| "grad_norm": 0.11127059906721115, |
| "learning_rate": 9.7836083771303e-05, |
| "loss": 0.0037, |
| "step": 13930 |
| }, |
| { |
| "epoch": 331.9047619047619, |
| "grad_norm": 0.11340519040822983, |
| "learning_rate": 9.783126948369817e-05, |
| "loss": 0.0048, |
| "step": 13940 |
| }, |
| { |
| "epoch": 332.14285714285717, |
| "grad_norm": 0.10253134369850159, |
| "learning_rate": 9.78264499653376e-05, |
| "loss": 0.004, |
| "step": 13950 |
| }, |
| { |
| "epoch": 332.3809523809524, |
| "grad_norm": 0.09692832827568054, |
| "learning_rate": 9.782162521674838e-05, |
| "loss": 0.0037, |
| "step": 13960 |
| }, |
| { |
| "epoch": 332.6190476190476, |
| "grad_norm": 0.12013217061758041, |
| "learning_rate": 9.781679523845812e-05, |
| "loss": 0.0038, |
| "step": 13970 |
| }, |
| { |
| "epoch": 332.85714285714283, |
| "grad_norm": 0.12217757105827332, |
| "learning_rate": 9.781196003099502e-05, |
| "loss": 0.0037, |
| "step": 13980 |
| }, |
| { |
| "epoch": 333.0952380952381, |
| "grad_norm": 0.10317707061767578, |
| "learning_rate": 9.780711959488786e-05, |
| "loss": 0.0039, |
| "step": 13990 |
| }, |
| { |
| "epoch": 333.3333333333333, |
| "grad_norm": 0.08228125423192978, |
| "learning_rate": 9.780227393066599e-05, |
| "loss": 0.0037, |
| "step": 14000 |
| }, |
| { |
| "epoch": 333.57142857142856, |
| "grad_norm": 0.10725780576467514, |
| "learning_rate": 9.77974230388593e-05, |
| "loss": 0.0041, |
| "step": 14010 |
| }, |
| { |
| "epoch": 333.8095238095238, |
| "grad_norm": 0.10591963678598404, |
| "learning_rate": 9.779256691999829e-05, |
| "loss": 0.004, |
| "step": 14020 |
| }, |
| { |
| "epoch": 334.04761904761904, |
| "grad_norm": 0.11263373494148254, |
| "learning_rate": 9.778770557461403e-05, |
| "loss": 0.0044, |
| "step": 14030 |
| }, |
| { |
| "epoch": 334.2857142857143, |
| "grad_norm": 0.0978194996714592, |
| "learning_rate": 9.778283900323812e-05, |
| "loss": 0.0041, |
| "step": 14040 |
| }, |
| { |
| "epoch": 334.5238095238095, |
| "grad_norm": 0.0652252659201622, |
| "learning_rate": 9.777796720640277e-05, |
| "loss": 0.0035, |
| "step": 14050 |
| }, |
| { |
| "epoch": 334.76190476190476, |
| "grad_norm": 0.07805225998163223, |
| "learning_rate": 9.777309018464078e-05, |
| "loss": 0.004, |
| "step": 14060 |
| }, |
| { |
| "epoch": 335.0, |
| "grad_norm": 0.07953700423240662, |
| "learning_rate": 9.776820793848547e-05, |
| "loss": 0.0039, |
| "step": 14070 |
| }, |
| { |
| "epoch": 335.23809523809524, |
| "grad_norm": 0.12268704921007156, |
| "learning_rate": 9.776332046847075e-05, |
| "loss": 0.0047, |
| "step": 14080 |
| }, |
| { |
| "epoch": 335.4761904761905, |
| "grad_norm": 0.11575701087713242, |
| "learning_rate": 9.775842777513111e-05, |
| "loss": 0.0042, |
| "step": 14090 |
| }, |
| { |
| "epoch": 335.7142857142857, |
| "grad_norm": 0.12150094658136368, |
| "learning_rate": 9.775352985900163e-05, |
| "loss": 0.0047, |
| "step": 14100 |
| }, |
| { |
| "epoch": 335.95238095238096, |
| "grad_norm": 0.14475630223751068, |
| "learning_rate": 9.774862672061791e-05, |
| "loss": 0.0048, |
| "step": 14110 |
| }, |
| { |
| "epoch": 336.1904761904762, |
| "grad_norm": 0.11597339063882828, |
| "learning_rate": 9.774371836051616e-05, |
| "loss": 0.0039, |
| "step": 14120 |
| }, |
| { |
| "epoch": 336.42857142857144, |
| "grad_norm": 0.14511676132678986, |
| "learning_rate": 9.773880477923315e-05, |
| "loss": 0.0038, |
| "step": 14130 |
| }, |
| { |
| "epoch": 336.6666666666667, |
| "grad_norm": 0.11303330212831497, |
| "learning_rate": 9.773388597730623e-05, |
| "loss": 0.004, |
| "step": 14140 |
| }, |
| { |
| "epoch": 336.9047619047619, |
| "grad_norm": 0.08157303184270859, |
| "learning_rate": 9.77289619552733e-05, |
| "loss": 0.0036, |
| "step": 14150 |
| }, |
| { |
| "epoch": 337.14285714285717, |
| "grad_norm": 0.09116984158754349, |
| "learning_rate": 9.772403271367285e-05, |
| "loss": 0.004, |
| "step": 14160 |
| }, |
| { |
| "epoch": 337.3809523809524, |
| "grad_norm": 0.08642767369747162, |
| "learning_rate": 9.771909825304396e-05, |
| "loss": 0.0042, |
| "step": 14170 |
| }, |
| { |
| "epoch": 337.6190476190476, |
| "grad_norm": 0.09959287196397781, |
| "learning_rate": 9.771415857392619e-05, |
| "loss": 0.0032, |
| "step": 14180 |
| }, |
| { |
| "epoch": 337.85714285714283, |
| "grad_norm": 0.0777825191617012, |
| "learning_rate": 9.770921367685978e-05, |
| "loss": 0.0038, |
| "step": 14190 |
| }, |
| { |
| "epoch": 338.0952380952381, |
| "grad_norm": 0.07987309247255325, |
| "learning_rate": 9.770426356238551e-05, |
| "loss": 0.0036, |
| "step": 14200 |
| }, |
| { |
| "epoch": 338.3333333333333, |
| "grad_norm": 0.08655369281768799, |
| "learning_rate": 9.769930823104469e-05, |
| "loss": 0.0037, |
| "step": 14210 |
| }, |
| { |
| "epoch": 338.57142857142856, |
| "grad_norm": 0.10098491609096527, |
| "learning_rate": 9.769434768337926e-05, |
| "loss": 0.0038, |
| "step": 14220 |
| }, |
| { |
| "epoch": 338.8095238095238, |
| "grad_norm": 0.12017781287431717, |
| "learning_rate": 9.768938191993164e-05, |
| "loss": 0.0035, |
| "step": 14230 |
| }, |
| { |
| "epoch": 339.04761904761904, |
| "grad_norm": 0.09659506380558014, |
| "learning_rate": 9.768441094124494e-05, |
| "loss": 0.0037, |
| "step": 14240 |
| }, |
| { |
| "epoch": 339.2857142857143, |
| "grad_norm": 0.07917569577693939, |
| "learning_rate": 9.767943474786275e-05, |
| "loss": 0.0035, |
| "step": 14250 |
| }, |
| { |
| "epoch": 339.5238095238095, |
| "grad_norm": 0.09511459618806839, |
| "learning_rate": 9.767445334032923e-05, |
| "loss": 0.0036, |
| "step": 14260 |
| }, |
| { |
| "epoch": 339.76190476190476, |
| "grad_norm": 0.08799537271261215, |
| "learning_rate": 9.766946671918919e-05, |
| "loss": 0.0033, |
| "step": 14270 |
| }, |
| { |
| "epoch": 340.0, |
| "grad_norm": 0.09190718084573746, |
| "learning_rate": 9.766447488498796e-05, |
| "loss": 0.0037, |
| "step": 14280 |
| }, |
| { |
| "epoch": 340.23809523809524, |
| "grad_norm": 0.12306186556816101, |
| "learning_rate": 9.765947783827139e-05, |
| "loss": 0.0038, |
| "step": 14290 |
| }, |
| { |
| "epoch": 340.4761904761905, |
| "grad_norm": 0.11185488104820251, |
| "learning_rate": 9.765447557958599e-05, |
| "loss": 0.0037, |
| "step": 14300 |
| }, |
| { |
| "epoch": 340.7142857142857, |
| "grad_norm": 0.12077915668487549, |
| "learning_rate": 9.764946810947879e-05, |
| "loss": 0.0041, |
| "step": 14310 |
| }, |
| { |
| "epoch": 340.95238095238096, |
| "grad_norm": 0.09284576028585434, |
| "learning_rate": 9.764445542849738e-05, |
| "loss": 0.0038, |
| "step": 14320 |
| }, |
| { |
| "epoch": 341.1904761904762, |
| "grad_norm": 0.0791429802775383, |
| "learning_rate": 9.763943753718998e-05, |
| "loss": 0.0033, |
| "step": 14330 |
| }, |
| { |
| "epoch": 341.42857142857144, |
| "grad_norm": 0.11543906480073929, |
| "learning_rate": 9.76344144361053e-05, |
| "loss": 0.0039, |
| "step": 14340 |
| }, |
| { |
| "epoch": 341.6666666666667, |
| "grad_norm": 0.11801048368215561, |
| "learning_rate": 9.762938612579269e-05, |
| "loss": 0.0042, |
| "step": 14350 |
| }, |
| { |
| "epoch": 341.9047619047619, |
| "grad_norm": 0.11671343445777893, |
| "learning_rate": 9.762435260680202e-05, |
| "loss": 0.0038, |
| "step": 14360 |
| }, |
| { |
| "epoch": 342.14285714285717, |
| "grad_norm": 0.10719845443964005, |
| "learning_rate": 9.761931387968373e-05, |
| "loss": 0.0037, |
| "step": 14370 |
| }, |
| { |
| "epoch": 342.3809523809524, |
| "grad_norm": 0.107689268887043, |
| "learning_rate": 9.76142699449889e-05, |
| "loss": 0.0032, |
| "step": 14380 |
| }, |
| { |
| "epoch": 342.6190476190476, |
| "grad_norm": 0.09465865790843964, |
| "learning_rate": 9.760922080326908e-05, |
| "loss": 0.0038, |
| "step": 14390 |
| }, |
| { |
| "epoch": 342.85714285714283, |
| "grad_norm": 0.09146640449762344, |
| "learning_rate": 9.760416645507644e-05, |
| "loss": 0.0033, |
| "step": 14400 |
| }, |
| { |
| "epoch": 343.0952380952381, |
| "grad_norm": 0.10059428215026855, |
| "learning_rate": 9.759910690096375e-05, |
| "loss": 0.0033, |
| "step": 14410 |
| }, |
| { |
| "epoch": 343.3333333333333, |
| "grad_norm": 0.07379937171936035, |
| "learning_rate": 9.759404214148429e-05, |
| "loss": 0.0032, |
| "step": 14420 |
| }, |
| { |
| "epoch": 343.57142857142856, |
| "grad_norm": 0.12462940067052841, |
| "learning_rate": 9.758897217719191e-05, |
| "loss": 0.0032, |
| "step": 14430 |
| }, |
| { |
| "epoch": 343.8095238095238, |
| "grad_norm": 0.09495638310909271, |
| "learning_rate": 9.758389700864113e-05, |
| "loss": 0.0035, |
| "step": 14440 |
| }, |
| { |
| "epoch": 344.04761904761904, |
| "grad_norm": 0.09234674274921417, |
| "learning_rate": 9.757881663638688e-05, |
| "loss": 0.0035, |
| "step": 14450 |
| }, |
| { |
| "epoch": 344.2857142857143, |
| "grad_norm": 0.0969182550907135, |
| "learning_rate": 9.757373106098478e-05, |
| "loss": 0.003, |
| "step": 14460 |
| }, |
| { |
| "epoch": 344.5238095238095, |
| "grad_norm": 0.10743220150470734, |
| "learning_rate": 9.756864028299097e-05, |
| "loss": 0.0033, |
| "step": 14470 |
| }, |
| { |
| "epoch": 344.76190476190476, |
| "grad_norm": 0.09900861233472824, |
| "learning_rate": 9.75635443029622e-05, |
| "loss": 0.0035, |
| "step": 14480 |
| }, |
| { |
| "epoch": 345.0, |
| "grad_norm": 0.08159717917442322, |
| "learning_rate": 9.755844312145572e-05, |
| "loss": 0.0031, |
| "step": 14490 |
| }, |
| { |
| "epoch": 345.23809523809524, |
| "grad_norm": 0.08818849921226501, |
| "learning_rate": 9.755333673902941e-05, |
| "loss": 0.0033, |
| "step": 14500 |
| }, |
| { |
| "epoch": 345.4761904761905, |
| "grad_norm": 0.112883560359478, |
| "learning_rate": 9.75482251562417e-05, |
| "loss": 0.0031, |
| "step": 14510 |
| }, |
| { |
| "epoch": 345.7142857142857, |
| "grad_norm": 0.07299734652042389, |
| "learning_rate": 9.754310837365155e-05, |
| "loss": 0.004, |
| "step": 14520 |
| }, |
| { |
| "epoch": 345.95238095238096, |
| "grad_norm": 0.08421136438846588, |
| "learning_rate": 9.753798639181856e-05, |
| "loss": 0.0035, |
| "step": 14530 |
| }, |
| { |
| "epoch": 346.1904761904762, |
| "grad_norm": 0.09571073204278946, |
| "learning_rate": 9.753285921130286e-05, |
| "loss": 0.0034, |
| "step": 14540 |
| }, |
| { |
| "epoch": 346.42857142857144, |
| "grad_norm": 0.10592791438102722, |
| "learning_rate": 9.752772683266512e-05, |
| "loss": 0.0044, |
| "step": 14550 |
| }, |
| { |
| "epoch": 346.6666666666667, |
| "grad_norm": 0.10883957892656326, |
| "learning_rate": 9.752258925646665e-05, |
| "loss": 0.004, |
| "step": 14560 |
| }, |
| { |
| "epoch": 346.9047619047619, |
| "grad_norm": 0.08009742200374603, |
| "learning_rate": 9.751744648326926e-05, |
| "loss": 0.0032, |
| "step": 14570 |
| }, |
| { |
| "epoch": 347.14285714285717, |
| "grad_norm": 0.0962366834282875, |
| "learning_rate": 9.751229851363536e-05, |
| "loss": 0.0042, |
| "step": 14580 |
| }, |
| { |
| "epoch": 347.3809523809524, |
| "grad_norm": 0.10161946713924408, |
| "learning_rate": 9.750714534812793e-05, |
| "loss": 0.0038, |
| "step": 14590 |
| }, |
| { |
| "epoch": 347.6190476190476, |
| "grad_norm": 0.07080286741256714, |
| "learning_rate": 9.750198698731053e-05, |
| "loss": 0.0035, |
| "step": 14600 |
| }, |
| { |
| "epoch": 347.85714285714283, |
| "grad_norm": 0.07838164269924164, |
| "learning_rate": 9.749682343174722e-05, |
| "loss": 0.0036, |
| "step": 14610 |
| }, |
| { |
| "epoch": 348.0952380952381, |
| "grad_norm": 0.0944102555513382, |
| "learning_rate": 9.749165468200272e-05, |
| "loss": 0.0034, |
| "step": 14620 |
| }, |
| { |
| "epoch": 348.3333333333333, |
| "grad_norm": 0.0876578837633133, |
| "learning_rate": 9.748648073864229e-05, |
| "loss": 0.003, |
| "step": 14630 |
| }, |
| { |
| "epoch": 348.57142857142856, |
| "grad_norm": 0.12015324085950851, |
| "learning_rate": 9.748130160223168e-05, |
| "loss": 0.0043, |
| "step": 14640 |
| }, |
| { |
| "epoch": 348.8095238095238, |
| "grad_norm": 0.12887024879455566, |
| "learning_rate": 9.747611727333734e-05, |
| "loss": 0.0036, |
| "step": 14650 |
| }, |
| { |
| "epoch": 349.04761904761904, |
| "grad_norm": 0.06771416962146759, |
| "learning_rate": 9.74709277525262e-05, |
| "loss": 0.0033, |
| "step": 14660 |
| }, |
| { |
| "epoch": 349.2857142857143, |
| "grad_norm": 0.09972390532493591, |
| "learning_rate": 9.746573304036576e-05, |
| "loss": 0.0037, |
| "step": 14670 |
| }, |
| { |
| "epoch": 349.5238095238095, |
| "grad_norm": 0.14044949412345886, |
| "learning_rate": 9.746053313742412e-05, |
| "loss": 0.0034, |
| "step": 14680 |
| }, |
| { |
| "epoch": 349.76190476190476, |
| "grad_norm": 0.14954344928264618, |
| "learning_rate": 9.745532804426994e-05, |
| "loss": 0.0034, |
| "step": 14690 |
| }, |
| { |
| "epoch": 350.0, |
| "grad_norm": 0.1142321452498436, |
| "learning_rate": 9.745011776147242e-05, |
| "loss": 0.0044, |
| "step": 14700 |
| }, |
| { |
| "epoch": 350.23809523809524, |
| "grad_norm": 0.07685929536819458, |
| "learning_rate": 9.744490228960138e-05, |
| "loss": 0.0037, |
| "step": 14710 |
| }, |
| { |
| "epoch": 350.4761904761905, |
| "grad_norm": 0.11968331784009933, |
| "learning_rate": 9.743968162922713e-05, |
| "loss": 0.0035, |
| "step": 14720 |
| }, |
| { |
| "epoch": 350.7142857142857, |
| "grad_norm": 0.11256229877471924, |
| "learning_rate": 9.743445578092064e-05, |
| "loss": 0.0038, |
| "step": 14730 |
| }, |
| { |
| "epoch": 350.95238095238096, |
| "grad_norm": 0.11302020400762558, |
| "learning_rate": 9.742922474525338e-05, |
| "loss": 0.0041, |
| "step": 14740 |
| }, |
| { |
| "epoch": 351.1904761904762, |
| "grad_norm": 0.11832022666931152, |
| "learning_rate": 9.742398852279741e-05, |
| "loss": 0.003, |
| "step": 14750 |
| }, |
| { |
| "epoch": 351.42857142857144, |
| "grad_norm": 0.10429254919290543, |
| "learning_rate": 9.741874711412535e-05, |
| "loss": 0.0039, |
| "step": 14760 |
| }, |
| { |
| "epoch": 351.6666666666667, |
| "grad_norm": 0.1117064356803894, |
| "learning_rate": 9.741350051981042e-05, |
| "loss": 0.0038, |
| "step": 14770 |
| }, |
| { |
| "epoch": 351.9047619047619, |
| "grad_norm": 0.07747708261013031, |
| "learning_rate": 9.740824874042633e-05, |
| "loss": 0.0029, |
| "step": 14780 |
| }, |
| { |
| "epoch": 352.14285714285717, |
| "grad_norm": 0.08410920202732086, |
| "learning_rate": 9.740299177654746e-05, |
| "loss": 0.0033, |
| "step": 14790 |
| }, |
| { |
| "epoch": 352.3809523809524, |
| "grad_norm": 0.1147940531373024, |
| "learning_rate": 9.739772962874867e-05, |
| "loss": 0.0045, |
| "step": 14800 |
| }, |
| { |
| "epoch": 352.6190476190476, |
| "grad_norm": 0.08659689873456955, |
| "learning_rate": 9.739246229760541e-05, |
| "loss": 0.0032, |
| "step": 14810 |
| }, |
| { |
| "epoch": 352.85714285714283, |
| "grad_norm": 0.09147448092699051, |
| "learning_rate": 9.738718978369376e-05, |
| "loss": 0.0034, |
| "step": 14820 |
| }, |
| { |
| "epoch": 353.0952380952381, |
| "grad_norm": 0.0937904641032219, |
| "learning_rate": 9.738191208759025e-05, |
| "loss": 0.0035, |
| "step": 14830 |
| }, |
| { |
| "epoch": 353.3333333333333, |
| "grad_norm": 0.0899277925491333, |
| "learning_rate": 9.73766292098721e-05, |
| "loss": 0.0039, |
| "step": 14840 |
| }, |
| { |
| "epoch": 353.57142857142856, |
| "grad_norm": 0.08176226913928986, |
| "learning_rate": 9.737134115111699e-05, |
| "loss": 0.0031, |
| "step": 14850 |
| }, |
| { |
| "epoch": 353.8095238095238, |
| "grad_norm": 0.11066190898418427, |
| "learning_rate": 9.736604791190323e-05, |
| "loss": 0.0038, |
| "step": 14860 |
| }, |
| { |
| "epoch": 354.04761904761904, |
| "grad_norm": 0.0816953033208847, |
| "learning_rate": 9.73607494928097e-05, |
| "loss": 0.0037, |
| "step": 14870 |
| }, |
| { |
| "epoch": 354.2857142857143, |
| "grad_norm": 0.10273134708404541, |
| "learning_rate": 9.735544589441581e-05, |
| "loss": 0.0037, |
| "step": 14880 |
| }, |
| { |
| "epoch": 354.5238095238095, |
| "grad_norm": 0.1048540398478508, |
| "learning_rate": 9.735013711730154e-05, |
| "loss": 0.0039, |
| "step": 14890 |
| }, |
| { |
| "epoch": 354.76190476190476, |
| "grad_norm": 0.0666409358382225, |
| "learning_rate": 9.734482316204747e-05, |
| "loss": 0.0038, |
| "step": 14900 |
| }, |
| { |
| "epoch": 355.0, |
| "grad_norm": 0.0592932254076004, |
| "learning_rate": 9.733950402923473e-05, |
| "loss": 0.0034, |
| "step": 14910 |
| }, |
| { |
| "epoch": 355.23809523809524, |
| "grad_norm": 0.07716116309165955, |
| "learning_rate": 9.7334179719445e-05, |
| "loss": 0.0034, |
| "step": 14920 |
| }, |
| { |
| "epoch": 355.4761904761905, |
| "grad_norm": 0.09460759162902832, |
| "learning_rate": 9.732885023326053e-05, |
| "loss": 0.0031, |
| "step": 14930 |
| }, |
| { |
| "epoch": 355.7142857142857, |
| "grad_norm": 0.08200521767139435, |
| "learning_rate": 9.732351557126418e-05, |
| "loss": 0.003, |
| "step": 14940 |
| }, |
| { |
| "epoch": 355.95238095238096, |
| "grad_norm": 0.09417081624269485, |
| "learning_rate": 9.731817573403929e-05, |
| "loss": 0.0036, |
| "step": 14950 |
| }, |
| { |
| "epoch": 356.1904761904762, |
| "grad_norm": 0.07144355773925781, |
| "learning_rate": 9.731283072216985e-05, |
| "loss": 0.0037, |
| "step": 14960 |
| }, |
| { |
| "epoch": 356.42857142857144, |
| "grad_norm": 0.09534378349781036, |
| "learning_rate": 9.730748053624039e-05, |
| "loss": 0.0036, |
| "step": 14970 |
| }, |
| { |
| "epoch": 356.6666666666667, |
| "grad_norm": 0.11141806840896606, |
| "learning_rate": 9.730212517683598e-05, |
| "loss": 0.0037, |
| "step": 14980 |
| }, |
| { |
| "epoch": 356.9047619047619, |
| "grad_norm": 0.10325188934803009, |
| "learning_rate": 9.729676464454228e-05, |
| "loss": 0.0033, |
| "step": 14990 |
| }, |
| { |
| "epoch": 357.14285714285717, |
| "grad_norm": 0.08089697360992432, |
| "learning_rate": 9.72913989399455e-05, |
| "loss": 0.0039, |
| "step": 15000 |
| }, |
| { |
| "epoch": 357.3809523809524, |
| "grad_norm": 0.10261926054954529, |
| "learning_rate": 9.728602806363242e-05, |
| "loss": 0.0038, |
| "step": 15010 |
| }, |
| { |
| "epoch": 357.6190476190476, |
| "grad_norm": 0.10960468649864197, |
| "learning_rate": 9.728065201619043e-05, |
| "loss": 0.0035, |
| "step": 15020 |
| }, |
| { |
| "epoch": 357.85714285714283, |
| "grad_norm": 0.11252915859222412, |
| "learning_rate": 9.727527079820742e-05, |
| "loss": 0.0032, |
| "step": 15030 |
| }, |
| { |
| "epoch": 358.0952380952381, |
| "grad_norm": 0.07965602725744247, |
| "learning_rate": 9.726988441027186e-05, |
| "loss": 0.0035, |
| "step": 15040 |
| }, |
| { |
| "epoch": 358.3333333333333, |
| "grad_norm": 0.0960497036576271, |
| "learning_rate": 9.726449285297281e-05, |
| "loss": 0.0033, |
| "step": 15050 |
| }, |
| { |
| "epoch": 358.57142857142856, |
| "grad_norm": 0.14710824191570282, |
| "learning_rate": 9.72590961268999e-05, |
| "loss": 0.004, |
| "step": 15060 |
| }, |
| { |
| "epoch": 358.8095238095238, |
| "grad_norm": 0.12923362851142883, |
| "learning_rate": 9.725369423264328e-05, |
| "loss": 0.0039, |
| "step": 15070 |
| }, |
| { |
| "epoch": 359.04761904761904, |
| "grad_norm": 0.11284012347459793, |
| "learning_rate": 9.72482871707937e-05, |
| "loss": 0.0033, |
| "step": 15080 |
| }, |
| { |
| "epoch": 359.2857142857143, |
| "grad_norm": 0.12159992754459381, |
| "learning_rate": 9.724287494194247e-05, |
| "loss": 0.0038, |
| "step": 15090 |
| }, |
| { |
| "epoch": 359.5238095238095, |
| "grad_norm": 0.0983424037694931, |
| "learning_rate": 9.723745754668147e-05, |
| "loss": 0.0044, |
| "step": 15100 |
| }, |
| { |
| "epoch": 359.76190476190476, |
| "grad_norm": 0.12507230043411255, |
| "learning_rate": 9.723203498560313e-05, |
| "loss": 0.0044, |
| "step": 15110 |
| }, |
| { |
| "epoch": 360.0, |
| "grad_norm": 0.15976129472255707, |
| "learning_rate": 9.722660725930046e-05, |
| "loss": 0.004, |
| "step": 15120 |
| }, |
| { |
| "epoch": 360.23809523809524, |
| "grad_norm": 0.13901469111442566, |
| "learning_rate": 9.722117436836702e-05, |
| "loss": 0.0039, |
| "step": 15130 |
| }, |
| { |
| "epoch": 360.4761904761905, |
| "grad_norm": 0.10722213983535767, |
| "learning_rate": 9.721573631339696e-05, |
| "loss": 0.0035, |
| "step": 15140 |
| }, |
| { |
| "epoch": 360.7142857142857, |
| "grad_norm": 0.1445736289024353, |
| "learning_rate": 9.721029309498494e-05, |
| "loss": 0.004, |
| "step": 15150 |
| }, |
| { |
| "epoch": 360.95238095238096, |
| "grad_norm": 0.16617314517498016, |
| "learning_rate": 9.720484471372627e-05, |
| "loss": 0.0047, |
| "step": 15160 |
| }, |
| { |
| "epoch": 361.1904761904762, |
| "grad_norm": 0.15940237045288086, |
| "learning_rate": 9.719939117021673e-05, |
| "loss": 0.0041, |
| "step": 15170 |
| }, |
| { |
| "epoch": 361.42857142857144, |
| "grad_norm": 0.1409807950258255, |
| "learning_rate": 9.719393246505275e-05, |
| "loss": 0.004, |
| "step": 15180 |
| }, |
| { |
| "epoch": 361.6666666666667, |
| "grad_norm": 0.12726131081581116, |
| "learning_rate": 9.718846859883128e-05, |
| "loss": 0.0043, |
| "step": 15190 |
| }, |
| { |
| "epoch": 361.9047619047619, |
| "grad_norm": 0.11613267660140991, |
| "learning_rate": 9.718299957214982e-05, |
| "loss": 0.004, |
| "step": 15200 |
| }, |
| { |
| "epoch": 362.14285714285717, |
| "grad_norm": 0.0947757437825203, |
| "learning_rate": 9.717752538560646e-05, |
| "loss": 0.004, |
| "step": 15210 |
| }, |
| { |
| "epoch": 362.3809523809524, |
| "grad_norm": 0.09302181750535965, |
| "learning_rate": 9.717204603979986e-05, |
| "loss": 0.0037, |
| "step": 15220 |
| }, |
| { |
| "epoch": 362.6190476190476, |
| "grad_norm": 0.13853846490383148, |
| "learning_rate": 9.716656153532922e-05, |
| "loss": 0.0038, |
| "step": 15230 |
| }, |
| { |
| "epoch": 362.85714285714283, |
| "grad_norm": 0.09680744260549545, |
| "learning_rate": 9.716107187279434e-05, |
| "loss": 0.0038, |
| "step": 15240 |
| }, |
| { |
| "epoch": 363.0952380952381, |
| "grad_norm": 0.09129160642623901, |
| "learning_rate": 9.715557705279555e-05, |
| "loss": 0.0032, |
| "step": 15250 |
| }, |
| { |
| "epoch": 363.3333333333333, |
| "grad_norm": 0.09432261437177658, |
| "learning_rate": 9.715007707593372e-05, |
| "loss": 0.0044, |
| "step": 15260 |
| }, |
| { |
| "epoch": 363.57142857142856, |
| "grad_norm": 0.08685706555843353, |
| "learning_rate": 9.714457194281036e-05, |
| "loss": 0.0036, |
| "step": 15270 |
| }, |
| { |
| "epoch": 363.8095238095238, |
| "grad_norm": 0.105177141726017, |
| "learning_rate": 9.713906165402751e-05, |
| "loss": 0.0034, |
| "step": 15280 |
| }, |
| { |
| "epoch": 364.04761904761904, |
| "grad_norm": 0.11833895742893219, |
| "learning_rate": 9.713354621018774e-05, |
| "loss": 0.0037, |
| "step": 15290 |
| }, |
| { |
| "epoch": 364.2857142857143, |
| "grad_norm": 0.07585232704877853, |
| "learning_rate": 9.712802561189422e-05, |
| "loss": 0.0037, |
| "step": 15300 |
| }, |
| { |
| "epoch": 364.5238095238095, |
| "grad_norm": 0.09387091547250748, |
| "learning_rate": 9.712249985975069e-05, |
| "loss": 0.0039, |
| "step": 15310 |
| }, |
| { |
| "epoch": 364.76190476190476, |
| "grad_norm": 0.09357699006795883, |
| "learning_rate": 9.71169689543614e-05, |
| "loss": 0.0041, |
| "step": 15320 |
| }, |
| { |
| "epoch": 365.0, |
| "grad_norm": 0.09244909882545471, |
| "learning_rate": 9.711143289633123e-05, |
| "loss": 0.0035, |
| "step": 15330 |
| }, |
| { |
| "epoch": 365.23809523809524, |
| "grad_norm": 0.07996772229671478, |
| "learning_rate": 9.710589168626561e-05, |
| "loss": 0.0035, |
| "step": 15340 |
| }, |
| { |
| "epoch": 365.4761904761905, |
| "grad_norm": 0.07006168365478516, |
| "learning_rate": 9.710034532477048e-05, |
| "loss": 0.0032, |
| "step": 15350 |
| }, |
| { |
| "epoch": 365.7142857142857, |
| "grad_norm": 0.0894763320684433, |
| "learning_rate": 9.709479381245239e-05, |
| "loss": 0.0031, |
| "step": 15360 |
| }, |
| { |
| "epoch": 365.95238095238096, |
| "grad_norm": 0.07830844819545746, |
| "learning_rate": 9.708923714991847e-05, |
| "loss": 0.004, |
| "step": 15370 |
| }, |
| { |
| "epoch": 366.1904761904762, |
| "grad_norm": 0.10145238041877747, |
| "learning_rate": 9.708367533777638e-05, |
| "loss": 0.0035, |
| "step": 15380 |
| }, |
| { |
| "epoch": 366.42857142857144, |
| "grad_norm": 0.09410473704338074, |
| "learning_rate": 9.707810837663431e-05, |
| "loss": 0.0034, |
| "step": 15390 |
| }, |
| { |
| "epoch": 366.6666666666667, |
| "grad_norm": 0.1192241758108139, |
| "learning_rate": 9.707253626710113e-05, |
| "loss": 0.0034, |
| "step": 15400 |
| }, |
| { |
| "epoch": 366.9047619047619, |
| "grad_norm": 0.08863279968500137, |
| "learning_rate": 9.706695900978613e-05, |
| "loss": 0.0029, |
| "step": 15410 |
| }, |
| { |
| "epoch": 367.14285714285717, |
| "grad_norm": 0.07320625334978104, |
| "learning_rate": 9.706137660529926e-05, |
| "loss": 0.0037, |
| "step": 15420 |
| }, |
| { |
| "epoch": 367.3809523809524, |
| "grad_norm": 0.08542609959840775, |
| "learning_rate": 9.705578905425101e-05, |
| "loss": 0.003, |
| "step": 15430 |
| }, |
| { |
| "epoch": 367.6190476190476, |
| "grad_norm": 0.08456038683652878, |
| "learning_rate": 9.705019635725241e-05, |
| "loss": 0.004, |
| "step": 15440 |
| }, |
| { |
| "epoch": 367.85714285714283, |
| "grad_norm": 0.10127533972263336, |
| "learning_rate": 9.704459851491508e-05, |
| "loss": 0.0035, |
| "step": 15450 |
| }, |
| { |
| "epoch": 368.0952380952381, |
| "grad_norm": 0.08799908310174942, |
| "learning_rate": 9.703899552785118e-05, |
| "loss": 0.0045, |
| "step": 15460 |
| }, |
| { |
| "epoch": 368.3333333333333, |
| "grad_norm": 0.0717431902885437, |
| "learning_rate": 9.703338739667346e-05, |
| "loss": 0.0036, |
| "step": 15470 |
| }, |
| { |
| "epoch": 368.57142857142856, |
| "grad_norm": 0.06366679072380066, |
| "learning_rate": 9.70277741219952e-05, |
| "loss": 0.003, |
| "step": 15480 |
| }, |
| { |
| "epoch": 368.8095238095238, |
| "grad_norm": 0.07284467667341232, |
| "learning_rate": 9.702215570443027e-05, |
| "loss": 0.0036, |
| "step": 15490 |
| }, |
| { |
| "epoch": 369.04761904761904, |
| "grad_norm": 0.09091628342866898, |
| "learning_rate": 9.701653214459309e-05, |
| "loss": 0.0028, |
| "step": 15500 |
| }, |
| { |
| "epoch": 369.2857142857143, |
| "grad_norm": 0.07378444075584412, |
| "learning_rate": 9.701090344309865e-05, |
| "loss": 0.0031, |
| "step": 15510 |
| }, |
| { |
| "epoch": 369.5238095238095, |
| "grad_norm": 0.059222783893346786, |
| "learning_rate": 9.700526960056247e-05, |
| "loss": 0.0038, |
| "step": 15520 |
| }, |
| { |
| "epoch": 369.76190476190476, |
| "grad_norm": 0.07987191528081894, |
| "learning_rate": 9.699963061760068e-05, |
| "loss": 0.003, |
| "step": 15530 |
| }, |
| { |
| "epoch": 370.0, |
| "grad_norm": 0.07932016998529434, |
| "learning_rate": 9.699398649482997e-05, |
| "loss": 0.003, |
| "step": 15540 |
| }, |
| { |
| "epoch": 370.23809523809524, |
| "grad_norm": 0.11053614318370819, |
| "learning_rate": 9.698833723286753e-05, |
| "loss": 0.0035, |
| "step": 15550 |
| }, |
| { |
| "epoch": 370.4761904761905, |
| "grad_norm": 0.07198098301887512, |
| "learning_rate": 9.698268283233118e-05, |
| "loss": 0.0034, |
| "step": 15560 |
| }, |
| { |
| "epoch": 370.7142857142857, |
| "grad_norm": 0.08047537505626678, |
| "learning_rate": 9.697702329383929e-05, |
| "loss": 0.0032, |
| "step": 15570 |
| }, |
| { |
| "epoch": 370.95238095238096, |
| "grad_norm": 0.07891126722097397, |
| "learning_rate": 9.697135861801074e-05, |
| "loss": 0.0031, |
| "step": 15580 |
| }, |
| { |
| "epoch": 371.1904761904762, |
| "grad_norm": 0.08007906377315521, |
| "learning_rate": 9.696568880546505e-05, |
| "loss": 0.004, |
| "step": 15590 |
| }, |
| { |
| "epoch": 371.42857142857144, |
| "grad_norm": 0.07694392651319504, |
| "learning_rate": 9.696001385682223e-05, |
| "loss": 0.0037, |
| "step": 15600 |
| }, |
| { |
| "epoch": 371.6666666666667, |
| "grad_norm": 0.07185126841068268, |
| "learning_rate": 9.695433377270291e-05, |
| "loss": 0.0038, |
| "step": 15610 |
| }, |
| { |
| "epoch": 371.9047619047619, |
| "grad_norm": 0.07868017256259918, |
| "learning_rate": 9.694864855372824e-05, |
| "loss": 0.0038, |
| "step": 15620 |
| }, |
| { |
| "epoch": 372.14285714285717, |
| "grad_norm": 0.08354003727436066, |
| "learning_rate": 9.694295820051995e-05, |
| "loss": 0.004, |
| "step": 15630 |
| }, |
| { |
| "epoch": 372.3809523809524, |
| "grad_norm": 0.09106825292110443, |
| "learning_rate": 9.693726271370032e-05, |
| "loss": 0.0036, |
| "step": 15640 |
| }, |
| { |
| "epoch": 372.6190476190476, |
| "grad_norm": 0.08724596351385117, |
| "learning_rate": 9.693156209389221e-05, |
| "loss": 0.0032, |
| "step": 15650 |
| }, |
| { |
| "epoch": 372.85714285714283, |
| "grad_norm": 0.09573080390691757, |
| "learning_rate": 9.692585634171905e-05, |
| "loss": 0.0039, |
| "step": 15660 |
| }, |
| { |
| "epoch": 373.0952380952381, |
| "grad_norm": 0.08690960705280304, |
| "learning_rate": 9.692014545780476e-05, |
| "loss": 0.0038, |
| "step": 15670 |
| }, |
| { |
| "epoch": 373.3333333333333, |
| "grad_norm": 0.07339439541101456, |
| "learning_rate": 9.691442944277393e-05, |
| "loss": 0.0041, |
| "step": 15680 |
| }, |
| { |
| "epoch": 373.57142857142856, |
| "grad_norm": 0.08173655718564987, |
| "learning_rate": 9.690870829725162e-05, |
| "loss": 0.0043, |
| "step": 15690 |
| }, |
| { |
| "epoch": 373.8095238095238, |
| "grad_norm": 0.11303990334272385, |
| "learning_rate": 9.69029820218635e-05, |
| "loss": 0.0039, |
| "step": 15700 |
| }, |
| { |
| "epoch": 374.04761904761904, |
| "grad_norm": 0.13039492070674896, |
| "learning_rate": 9.689725061723579e-05, |
| "loss": 0.0032, |
| "step": 15710 |
| }, |
| { |
| "epoch": 374.2857142857143, |
| "grad_norm": 0.10763411223888397, |
| "learning_rate": 9.689151408399527e-05, |
| "loss": 0.0036, |
| "step": 15720 |
| }, |
| { |
| "epoch": 374.5238095238095, |
| "grad_norm": 0.09858668595552444, |
| "learning_rate": 9.688577242276924e-05, |
| "loss": 0.0033, |
| "step": 15730 |
| }, |
| { |
| "epoch": 374.76190476190476, |
| "grad_norm": 0.10579327493906021, |
| "learning_rate": 9.688002563418566e-05, |
| "loss": 0.0034, |
| "step": 15740 |
| }, |
| { |
| "epoch": 375.0, |
| "grad_norm": 0.09915103018283844, |
| "learning_rate": 9.687427371887293e-05, |
| "loss": 0.004, |
| "step": 15750 |
| }, |
| { |
| "epoch": 375.23809523809524, |
| "grad_norm": 0.0961657166481018, |
| "learning_rate": 9.686851667746012e-05, |
| "loss": 0.0043, |
| "step": 15760 |
| }, |
| { |
| "epoch": 375.4761904761905, |
| "grad_norm": 0.08477150648832321, |
| "learning_rate": 9.686275451057677e-05, |
| "loss": 0.003, |
| "step": 15770 |
| }, |
| { |
| "epoch": 375.7142857142857, |
| "grad_norm": 0.07308121770620346, |
| "learning_rate": 9.685698721885308e-05, |
| "loss": 0.0032, |
| "step": 15780 |
| }, |
| { |
| "epoch": 375.95238095238096, |
| "grad_norm": 0.06217966228723526, |
| "learning_rate": 9.68512148029197e-05, |
| "loss": 0.004, |
| "step": 15790 |
| }, |
| { |
| "epoch": 376.1904761904762, |
| "grad_norm": 0.09704345464706421, |
| "learning_rate": 9.684543726340791e-05, |
| "loss": 0.0034, |
| "step": 15800 |
| }, |
| { |
| "epoch": 376.42857142857144, |
| "grad_norm": 0.10821390897035599, |
| "learning_rate": 9.683965460094952e-05, |
| "loss": 0.0034, |
| "step": 15810 |
| }, |
| { |
| "epoch": 376.6666666666667, |
| "grad_norm": 0.10692407190799713, |
| "learning_rate": 9.683386681617694e-05, |
| "loss": 0.0034, |
| "step": 15820 |
| }, |
| { |
| "epoch": 376.9047619047619, |
| "grad_norm": 0.12938278913497925, |
| "learning_rate": 9.68280739097231e-05, |
| "loss": 0.0032, |
| "step": 15830 |
| }, |
| { |
| "epoch": 377.14285714285717, |
| "grad_norm": 0.08428510278463364, |
| "learning_rate": 9.682227588222148e-05, |
| "loss": 0.0035, |
| "step": 15840 |
| }, |
| { |
| "epoch": 377.3809523809524, |
| "grad_norm": 0.08831379562616348, |
| "learning_rate": 9.681647273430618e-05, |
| "loss": 0.0038, |
| "step": 15850 |
| }, |
| { |
| "epoch": 377.6190476190476, |
| "grad_norm": 0.08449848741292953, |
| "learning_rate": 9.681066446661182e-05, |
| "loss": 0.0032, |
| "step": 15860 |
| }, |
| { |
| "epoch": 377.85714285714283, |
| "grad_norm": 0.10025858879089355, |
| "learning_rate": 9.680485107977357e-05, |
| "loss": 0.0033, |
| "step": 15870 |
| }, |
| { |
| "epoch": 378.0952380952381, |
| "grad_norm": 0.08797000348567963, |
| "learning_rate": 9.679903257442716e-05, |
| "loss": 0.003, |
| "step": 15880 |
| }, |
| { |
| "epoch": 378.3333333333333, |
| "grad_norm": 0.08503340184688568, |
| "learning_rate": 9.679320895120891e-05, |
| "loss": 0.0033, |
| "step": 15890 |
| }, |
| { |
| "epoch": 378.57142857142856, |
| "grad_norm": 0.1261996179819107, |
| "learning_rate": 9.67873802107557e-05, |
| "loss": 0.0035, |
| "step": 15900 |
| }, |
| { |
| "epoch": 378.8095238095238, |
| "grad_norm": 0.12541133165359497, |
| "learning_rate": 9.67815463537049e-05, |
| "loss": 0.0032, |
| "step": 15910 |
| }, |
| { |
| "epoch": 379.04761904761904, |
| "grad_norm": 0.12868760526180267, |
| "learning_rate": 9.677570738069457e-05, |
| "loss": 0.0041, |
| "step": 15920 |
| }, |
| { |
| "epoch": 379.2857142857143, |
| "grad_norm": 0.125481978058815, |
| "learning_rate": 9.676986329236318e-05, |
| "loss": 0.0041, |
| "step": 15930 |
| }, |
| { |
| "epoch": 379.5238095238095, |
| "grad_norm": 0.08111248165369034, |
| "learning_rate": 9.676401408934987e-05, |
| "loss": 0.004, |
| "step": 15940 |
| }, |
| { |
| "epoch": 379.76190476190476, |
| "grad_norm": 0.08863970637321472, |
| "learning_rate": 9.675815977229428e-05, |
| "loss": 0.0035, |
| "step": 15950 |
| }, |
| { |
| "epoch": 380.0, |
| "grad_norm": 0.09570310264825821, |
| "learning_rate": 9.675230034183664e-05, |
| "loss": 0.0036, |
| "step": 15960 |
| }, |
| { |
| "epoch": 380.23809523809524, |
| "grad_norm": 0.10490087419748306, |
| "learning_rate": 9.674643579861773e-05, |
| "loss": 0.0036, |
| "step": 15970 |
| }, |
| { |
| "epoch": 380.4761904761905, |
| "grad_norm": 0.09651044756174088, |
| "learning_rate": 9.674056614327886e-05, |
| "loss": 0.0038, |
| "step": 15980 |
| }, |
| { |
| "epoch": 380.7142857142857, |
| "grad_norm": 0.10997740924358368, |
| "learning_rate": 9.673469137646198e-05, |
| "loss": 0.0028, |
| "step": 15990 |
| }, |
| { |
| "epoch": 380.95238095238096, |
| "grad_norm": 0.08573995530605316, |
| "learning_rate": 9.67288114988095e-05, |
| "loss": 0.0036, |
| "step": 16000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2381, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 256, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|