| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "eval_steps": 900, | |
| "global_step": 22500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10222222222222223, | |
| "grad_norm": 6382.4189453125, | |
| "learning_rate": 3.4074074074074077e-06, | |
| "loss": 306.5843, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.20444444444444446, | |
| "grad_norm": 154.1435546875, | |
| "learning_rate": 6.814814814814815e-06, | |
| "loss": 62.983, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.30666666666666664, | |
| "grad_norm": 10.364509582519531, | |
| "learning_rate": 1.0222222222222223e-05, | |
| "loss": 7.6227, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.4088888888888889, | |
| "grad_norm": 13.348139762878418, | |
| "learning_rate": 1.362962962962963e-05, | |
| "loss": 7.322, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5111111111111111, | |
| "grad_norm": 1.03432297706604, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 7.2767, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6133333333333333, | |
| "grad_norm": 2.311262369155884, | |
| "learning_rate": 2.0444444444444446e-05, | |
| "loss": 7.2605, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7155555555555555, | |
| "grad_norm": 1.2174512147903442, | |
| "learning_rate": 2.3851851851851854e-05, | |
| "loss": 7.2589, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 1.1917160749435425, | |
| "learning_rate": 2.725925925925926e-05, | |
| "loss": 7.2573, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.802689254283905, | |
| "learning_rate": 3.066666666666667e-05, | |
| "loss": 7.2555, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0222222222222221, | |
| "grad_norm": 0.8915572762489319, | |
| "learning_rate": 3.4074074074074077e-05, | |
| "loss": 7.2539, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.1244444444444444, | |
| "grad_norm": 1.1943933963775635, | |
| "learning_rate": 3.7481481481481484e-05, | |
| "loss": 7.2509, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.2266666666666666, | |
| "grad_norm": 0.9069448709487915, | |
| "learning_rate": 4.088888888888889e-05, | |
| "loss": 7.2492, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.3288888888888888, | |
| "grad_norm": 0.7575523853302002, | |
| "learning_rate": 4.42962962962963e-05, | |
| "loss": 7.2472, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.431111111111111, | |
| "grad_norm": 0.5182924866676331, | |
| "learning_rate": 4.770370370370371e-05, | |
| "loss": 7.2453, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.5333333333333332, | |
| "grad_norm": 0.5943706631660461, | |
| "learning_rate": 5.111111111111111e-05, | |
| "loss": 7.2433, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.6355555555555554, | |
| "grad_norm": 0.5987505912780762, | |
| "learning_rate": 5.451851851851852e-05, | |
| "loss": 7.2415, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.7377777777777776, | |
| "grad_norm": 0.8691719770431519, | |
| "learning_rate": 5.792592592592593e-05, | |
| "loss": 7.2388, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.9830289483070374, | |
| "learning_rate": 6.133333333333334e-05, | |
| "loss": 7.2337, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.942222222222222, | |
| "grad_norm": 0.9140754342079163, | |
| "learning_rate": 6.474074074074075e-05, | |
| "loss": 7.2245, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.0444444444444443, | |
| "grad_norm": 1.7775914669036865, | |
| "learning_rate": 6.814814814814815e-05, | |
| "loss": 7.2109, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.1466666666666665, | |
| "grad_norm": 1.317034363746643, | |
| "learning_rate": 7.155555555555555e-05, | |
| "loss": 7.1871, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.2488888888888887, | |
| "grad_norm": 1.6885732412338257, | |
| "learning_rate": 7.496296296296297e-05, | |
| "loss": 7.1349, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.351111111111111, | |
| "grad_norm": 2.464526653289795, | |
| "learning_rate": 7.837037037037037e-05, | |
| "loss": 7.0371, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.453333333333333, | |
| "grad_norm": 3.4147374629974365, | |
| "learning_rate": 8.177777777777778e-05, | |
| "loss": 6.9365, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.5555555555555554, | |
| "grad_norm": 3.6264212131500244, | |
| "learning_rate": 8.518518518518518e-05, | |
| "loss": 6.8548, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.6577777777777776, | |
| "grad_norm": 2.2979955673217773, | |
| "learning_rate": 8.85925925925926e-05, | |
| "loss": 6.7637, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 2.969346046447754, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 6.6778, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.862222222222222, | |
| "grad_norm": 4.26610803604126, | |
| "learning_rate": 9.540740740740741e-05, | |
| "loss": 6.5954, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.964444444444444, | |
| "grad_norm": 1.7684084177017212, | |
| "learning_rate": 9.881481481481482e-05, | |
| "loss": 6.5164, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.066666666666667, | |
| "grad_norm": 3.720853090286255, | |
| "learning_rate": 9.999988344964554e-05, | |
| "loss": 6.4356, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.168888888888889, | |
| "grad_norm": 2.5611510276794434, | |
| "learning_rate": 9.99992520072995e-05, | |
| "loss": 6.3594, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.2711111111111113, | |
| "grad_norm": 5.3843255043029785, | |
| "learning_rate": 9.999807252777301e-05, | |
| "loss": 6.3057, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.3733333333333335, | |
| "grad_norm": 4.412026882171631, | |
| "learning_rate": 9.999634502399426e-05, | |
| "loss": 6.25, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 3.4755555555555557, | |
| "grad_norm": 3.188660144805908, | |
| "learning_rate": 9.999406951489825e-05, | |
| "loss": 6.1975, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 3.5777777777777775, | |
| "grad_norm": 4.5765156745910645, | |
| "learning_rate": 9.999124602542662e-05, | |
| "loss": 6.1516, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 5.967836856842041, | |
| "learning_rate": 9.998787458652739e-05, | |
| "loss": 6.1038, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 3.7822222222222224, | |
| "grad_norm": 6.038416385650635, | |
| "learning_rate": 9.998395523515457e-05, | |
| "loss": 6.078, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 3.8844444444444446, | |
| "grad_norm": 2.577953577041626, | |
| "learning_rate": 9.997948801426783e-05, | |
| "loss": 6.0297, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 3.986666666666667, | |
| "grad_norm": 3.8739564418792725, | |
| "learning_rate": 9.997447297283196e-05, | |
| "loss": 5.9847, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.088888888888889, | |
| "grad_norm": 5.759775161743164, | |
| "learning_rate": 9.996891016581633e-05, | |
| "loss": 5.9452, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.191111111111111, | |
| "grad_norm": 5.758726596832275, | |
| "learning_rate": 9.996279965419441e-05, | |
| "loss": 5.9283, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.293333333333333, | |
| "grad_norm": 3.345691204071045, | |
| "learning_rate": 9.995614150494293e-05, | |
| "loss": 5.8792, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 4.395555555555555, | |
| "grad_norm": 5.426297664642334, | |
| "learning_rate": 9.994893579104123e-05, | |
| "loss": 5.8526, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 4.497777777777777, | |
| "grad_norm": 4.649121284484863, | |
| "learning_rate": 9.994118259147049e-05, | |
| "loss": 5.8266, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 5.175451278686523, | |
| "learning_rate": 9.993288199121283e-05, | |
| "loss": 5.8114, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 4.702222222222222, | |
| "grad_norm": 4.655645370483398, | |
| "learning_rate": 9.992403408125033e-05, | |
| "loss": 5.7801, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 4.804444444444444, | |
| "grad_norm": 5.830355644226074, | |
| "learning_rate": 9.991463895856414e-05, | |
| "loss": 5.7576, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 4.906666666666666, | |
| "grad_norm": 3.2799057960510254, | |
| "learning_rate": 9.990469672613331e-05, | |
| "loss": 5.7327, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 5.0088888888888885, | |
| "grad_norm": 5.891563415527344, | |
| "learning_rate": 9.989420749293375e-05, | |
| "loss": 5.7139, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 5.111111111111111, | |
| "grad_norm": 6.125003337860107, | |
| "learning_rate": 9.988317137393697e-05, | |
| "loss": 5.6823, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 5.213333333333333, | |
| "grad_norm": 4.9209442138671875, | |
| "learning_rate": 9.987158849010885e-05, | |
| "loss": 5.6534, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 5.315555555555555, | |
| "grad_norm": 3.9249610900878906, | |
| "learning_rate": 9.985945896840829e-05, | |
| "loss": 5.6601, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 5.417777777777777, | |
| "grad_norm": 7.975271701812744, | |
| "learning_rate": 9.984678294178589e-05, | |
| "loss": 5.6278, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 4.072458267211914, | |
| "learning_rate": 9.983356054918238e-05, | |
| "loss": 5.6104, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 5.622222222222222, | |
| "grad_norm": 5.122928142547607, | |
| "learning_rate": 9.981979193552721e-05, | |
| "loss": 5.5991, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 5.724444444444444, | |
| "grad_norm": 6.029202461242676, | |
| "learning_rate": 9.980547725173685e-05, | |
| "loss": 5.5761, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 5.826666666666666, | |
| "grad_norm": 4.795958042144775, | |
| "learning_rate": 9.979061665471326e-05, | |
| "loss": 5.5573, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 5.928888888888888, | |
| "grad_norm": 3.8007431030273438, | |
| "learning_rate": 9.977521030734203e-05, | |
| "loss": 5.5274, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 6.0311111111111115, | |
| "grad_norm": 5.163888931274414, | |
| "learning_rate": 9.975925837849073e-05, | |
| "loss": 5.5212, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 6.133333333333334, | |
| "grad_norm": 5.857538223266602, | |
| "learning_rate": 9.9742761043007e-05, | |
| "loss": 5.5039, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 6.235555555555556, | |
| "grad_norm": 4.817676067352295, | |
| "learning_rate": 9.972571848171657e-05, | |
| "loss": 5.4863, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 6.337777777777778, | |
| "grad_norm": 4.5216450691223145, | |
| "learning_rate": 9.97081308814214e-05, | |
| "loss": 5.4866, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 5.7964630126953125, | |
| "learning_rate": 9.968999843489754e-05, | |
| "loss": 5.4544, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 6.542222222222223, | |
| "grad_norm": 7.403745174407959, | |
| "learning_rate": 9.967132134089309e-05, | |
| "loss": 5.4383, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 6.644444444444445, | |
| "grad_norm": 5.906863689422607, | |
| "learning_rate": 9.965209980412593e-05, | |
| "loss": 5.4435, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 6.746666666666667, | |
| "grad_norm": 4.985208511352539, | |
| "learning_rate": 9.963233403528154e-05, | |
| "loss": 5.4271, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 6.848888888888889, | |
| "grad_norm": 5.670632839202881, | |
| "learning_rate": 9.96120242510107e-05, | |
| "loss": 5.4023, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 6.9511111111111115, | |
| "grad_norm": 4.155480861663818, | |
| "learning_rate": 9.959117067392709e-05, | |
| "loss": 5.3781, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 7.053333333333334, | |
| "grad_norm": 6.202167987823486, | |
| "learning_rate": 9.95697735326048e-05, | |
| "loss": 5.3696, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 7.155555555555556, | |
| "grad_norm": 5.649682998657227, | |
| "learning_rate": 9.954783306157595e-05, | |
| "loss": 5.3255, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 7.257777777777778, | |
| "grad_norm": 6.699223518371582, | |
| "learning_rate": 9.952534950132802e-05, | |
| "loss": 5.3186, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 5.433987140655518, | |
| "learning_rate": 9.95023230983012e-05, | |
| "loss": 5.3147, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 7.4622222222222225, | |
| "grad_norm": 4.822690010070801, | |
| "learning_rate": 9.947875410488581e-05, | |
| "loss": 5.3022, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 7.564444444444445, | |
| "grad_norm": 5.345188617706299, | |
| "learning_rate": 9.945464277941939e-05, | |
| "loss": 5.2828, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 7.666666666666667, | |
| "grad_norm": 4.902531623840332, | |
| "learning_rate": 9.942998938618394e-05, | |
| "loss": 5.2818, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 7.768888888888889, | |
| "grad_norm": 7.8368353843688965, | |
| "learning_rate": 9.940479419540304e-05, | |
| "loss": 5.2735, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 7.871111111111111, | |
| "grad_norm": 5.669989585876465, | |
| "learning_rate": 9.937905748323883e-05, | |
| "loss": 5.2554, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 7.973333333333334, | |
| "grad_norm": 4.463327407836914, | |
| "learning_rate": 9.935277953178905e-05, | |
| "loss": 5.2421, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 8.075555555555555, | |
| "grad_norm": 4.2700629234313965, | |
| "learning_rate": 9.93259606290839e-05, | |
| "loss": 5.1956, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 8.177777777777777, | |
| "grad_norm": 5.543842315673828, | |
| "learning_rate": 9.929860106908289e-05, | |
| "loss": 5.1719, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "grad_norm": 10.465546607971191, | |
| "learning_rate": 9.927070115167161e-05, | |
| "loss": 5.1691, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 8.382222222222222, | |
| "grad_norm": 5.517487525939941, | |
| "learning_rate": 9.924226118265849e-05, | |
| "loss": 5.1431, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 8.484444444444444, | |
| "grad_norm": 6.022068977355957, | |
| "learning_rate": 9.921328147377143e-05, | |
| "loss": 5.1507, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 8.586666666666666, | |
| "grad_norm": 4.770472526550293, | |
| "learning_rate": 9.918376234265428e-05, | |
| "loss": 5.1385, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 8.688888888888888, | |
| "grad_norm": 6.177302360534668, | |
| "learning_rate": 9.915370411286356e-05, | |
| "loss": 5.1091, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 8.79111111111111, | |
| "grad_norm": 6.306371688842773, | |
| "learning_rate": 9.912310711386473e-05, | |
| "loss": 5.1276, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 8.893333333333333, | |
| "grad_norm": 7.086174488067627, | |
| "learning_rate": 9.909197168102867e-05, | |
| "loss": 5.0997, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 8.995555555555555, | |
| "grad_norm": 5.590447902679443, | |
| "learning_rate": 9.906029815562797e-05, | |
| "loss": 5.0776, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 9.097777777777777, | |
| "grad_norm": 4.786597728729248, | |
| "learning_rate": 9.902808688483323e-05, | |
| "loss": 5.0244, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 7.7961015701293945, | |
| "learning_rate": 9.899533822170922e-05, | |
| "loss": 5.0232, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 9.302222222222222, | |
| "grad_norm": 5.857214450836182, | |
| "learning_rate": 9.896205252521099e-05, | |
| "loss": 5.0213, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 9.404444444444444, | |
| "grad_norm": 6.194970607757568, | |
| "learning_rate": 9.892823016017999e-05, | |
| "loss": 4.984, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 9.506666666666666, | |
| "grad_norm": 7.040445804595947, | |
| "learning_rate": 9.889387149734004e-05, | |
| "loss": 4.9845, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 9.608888888888888, | |
| "grad_norm": 6.245872497558594, | |
| "learning_rate": 9.885897691329327e-05, | |
| "loss": 4.9771, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 9.71111111111111, | |
| "grad_norm": 4.590968608856201, | |
| "learning_rate": 9.882354679051598e-05, | |
| "loss": 4.9565, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 9.813333333333333, | |
| "grad_norm": 5.94847297668457, | |
| "learning_rate": 9.87875815173545e-05, | |
| "loss": 4.9531, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 9.915555555555555, | |
| "grad_norm": 8.10450267791748, | |
| "learning_rate": 9.875108148802082e-05, | |
| "loss": 4.9557, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 10.017777777777777, | |
| "grad_norm": 5.512363910675049, | |
| "learning_rate": 9.871404710258841e-05, | |
| "loss": 4.9295, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "grad_norm": 5.455718517303467, | |
| "learning_rate": 9.867647876698775e-05, | |
| "loss": 4.8753, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 10.222222222222221, | |
| "grad_norm": 5.959130764007568, | |
| "learning_rate": 9.86383768930019e-05, | |
| "loss": 4.8732, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 10.324444444444444, | |
| "grad_norm": 6.239514350891113, | |
| "learning_rate": 9.859974189826198e-05, | |
| "loss": 4.8707, | |
| "step": 2323 | |
| }, | |
| { | |
| "epoch": 10.426666666666666, | |
| "grad_norm": 7.127731800079346, | |
| "learning_rate": 9.856057420624259e-05, | |
| "loss": 4.846, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 10.528888888888888, | |
| "grad_norm": 6.327420234680176, | |
| "learning_rate": 9.852087424625717e-05, | |
| "loss": 4.8457, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 10.63111111111111, | |
| "grad_norm": 6.398340225219727, | |
| "learning_rate": 9.848064245345333e-05, | |
| "loss": 4.8295, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 10.733333333333333, | |
| "grad_norm": 5.890859603881836, | |
| "learning_rate": 9.843987926880803e-05, | |
| "loss": 4.8091, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 10.835555555555555, | |
| "grad_norm": 7.191392421722412, | |
| "learning_rate": 9.839858513912276e-05, | |
| "loss": 4.8022, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 10.937777777777779, | |
| "grad_norm": 6.238222122192383, | |
| "learning_rate": 9.835676051701867e-05, | |
| "loss": 4.7898, | |
| "step": 2461 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "grad_norm": 6.7246551513671875, | |
| "learning_rate": 9.831440586093157e-05, | |
| "loss": 4.7692, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 11.142222222222221, | |
| "grad_norm": 5.07949161529541, | |
| "learning_rate": 9.827152163510693e-05, | |
| "loss": 4.7251, | |
| "step": 2507 | |
| }, | |
| { | |
| "epoch": 11.244444444444444, | |
| "grad_norm": 7.340390682220459, | |
| "learning_rate": 9.82281083095948e-05, | |
| "loss": 4.7188, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 11.346666666666668, | |
| "grad_norm": 5.695153713226318, | |
| "learning_rate": 9.818416636024461e-05, | |
| "loss": 4.7111, | |
| "step": 2553 | |
| }, | |
| { | |
| "epoch": 11.448888888888888, | |
| "grad_norm": 5.70296573638916, | |
| "learning_rate": 9.813969626870002e-05, | |
| "loss": 4.7043, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 11.551111111111112, | |
| "grad_norm": 5.775058269500732, | |
| "learning_rate": 9.809469852239359e-05, | |
| "loss": 4.6924, | |
| "step": 2599 | |
| }, | |
| { | |
| "epoch": 11.653333333333332, | |
| "grad_norm": 7.319630146026611, | |
| "learning_rate": 9.804917361454145e-05, | |
| "loss": 4.6848, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 11.755555555555556, | |
| "grad_norm": 7.945709705352783, | |
| "learning_rate": 9.800312204413793e-05, | |
| "loss": 4.6667, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 11.857777777777777, | |
| "grad_norm": 7.591863632202148, | |
| "learning_rate": 9.795654431595e-05, | |
| "loss": 4.6778, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "grad_norm": 6.433276653289795, | |
| "learning_rate": 9.790944094051187e-05, | |
| "loss": 4.6699, | |
| "step": 2691 | |
| }, | |
| { | |
| "epoch": 12.062222222222223, | |
| "grad_norm": 6.956933975219727, | |
| "learning_rate": 9.786181243411926e-05, | |
| "loss": 4.6113, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 12.164444444444445, | |
| "grad_norm": 5.551136016845703, | |
| "learning_rate": 9.781365931882387e-05, | |
| "loss": 4.582, | |
| "step": 2737 | |
| }, | |
| { | |
| "epoch": 12.266666666666667, | |
| "grad_norm": 7.214599609375, | |
| "learning_rate": 9.776498212242749e-05, | |
| "loss": 4.5932, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 12.36888888888889, | |
| "grad_norm": 6.5685715675354, | |
| "learning_rate": 9.771578137847639e-05, | |
| "loss": 4.5896, | |
| "step": 2783 | |
| }, | |
| { | |
| "epoch": 12.471111111111112, | |
| "grad_norm": 8.017729759216309, | |
| "learning_rate": 9.766605762625541e-05, | |
| "loss": 4.5579, | |
| "step": 2806 | |
| }, | |
| { | |
| "epoch": 12.573333333333334, | |
| "grad_norm": 6.021265983581543, | |
| "learning_rate": 9.761581141078194e-05, | |
| "loss": 4.5715, | |
| "step": 2829 | |
| }, | |
| { | |
| "epoch": 12.675555555555556, | |
| "grad_norm": 7.427931785583496, | |
| "learning_rate": 9.756504328280016e-05, | |
| "loss": 4.5681, | |
| "step": 2852 | |
| }, | |
| { | |
| "epoch": 12.777777777777779, | |
| "grad_norm": 6.325420379638672, | |
| "learning_rate": 9.751375379877481e-05, | |
| "loss": 4.5695, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "grad_norm": 4.837381839752197, | |
| "learning_rate": 9.746194352088518e-05, | |
| "loss": 4.5321, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 12.982222222222223, | |
| "grad_norm": 6.933470726013184, | |
| "learning_rate": 9.740961301701894e-05, | |
| "loss": 4.5286, | |
| "step": 2921 | |
| }, | |
| { | |
| "epoch": 13.084444444444445, | |
| "grad_norm": 5.810832977294922, | |
| "learning_rate": 9.73567628607659e-05, | |
| "loss": 4.463, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 13.186666666666667, | |
| "grad_norm": 7.62490177154541, | |
| "learning_rate": 9.730339363141175e-05, | |
| "loss": 4.462, | |
| "step": 2967 | |
| }, | |
| { | |
| "epoch": 13.28888888888889, | |
| "grad_norm": 6.67575216293335, | |
| "learning_rate": 9.72495059139317e-05, | |
| "loss": 4.4402, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 13.391111111111112, | |
| "grad_norm": 6.110825538635254, | |
| "learning_rate": 9.719510029898398e-05, | |
| "loss": 4.443, | |
| "step": 3013 | |
| }, | |
| { | |
| "epoch": 13.493333333333334, | |
| "grad_norm": 7.317692279815674, | |
| "learning_rate": 9.714017738290358e-05, | |
| "loss": 4.4456, | |
| "step": 3036 | |
| }, | |
| { | |
| "epoch": 13.595555555555556, | |
| "grad_norm": 6.189058303833008, | |
| "learning_rate": 9.708473776769544e-05, | |
| "loss": 4.4524, | |
| "step": 3059 | |
| }, | |
| { | |
| "epoch": 13.697777777777778, | |
| "grad_norm": 5.6017632484436035, | |
| "learning_rate": 9.702878206102811e-05, | |
| "loss": 4.4234, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "grad_norm": 5.7952189445495605, | |
| "learning_rate": 9.697231087622691e-05, | |
| "loss": 4.4016, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 13.902222222222223, | |
| "grad_norm": 5.7486677169799805, | |
| "learning_rate": 9.691532483226723e-05, | |
| "loss": 4.4106, | |
| "step": 3128 | |
| }, | |
| { | |
| "epoch": 14.004444444444445, | |
| "grad_norm": 6.603976249694824, | |
| "learning_rate": 9.68578245537679e-05, | |
| "loss": 4.367, | |
| "step": 3151 | |
| }, | |
| { | |
| "epoch": 14.106666666666667, | |
| "grad_norm": 6.593631744384766, | |
| "learning_rate": 9.679981067098414e-05, | |
| "loss": 4.3122, | |
| "step": 3174 | |
| }, | |
| { | |
| "epoch": 14.20888888888889, | |
| "grad_norm": 6.519464015960693, | |
| "learning_rate": 9.674128381980072e-05, | |
| "loss": 4.3038, | |
| "step": 3197 | |
| }, | |
| { | |
| "epoch": 14.311111111111112, | |
| "grad_norm": 7.691238880157471, | |
| "learning_rate": 9.668224464172508e-05, | |
| "loss": 4.3305, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 14.413333333333334, | |
| "grad_norm": 5.136379718780518, | |
| "learning_rate": 9.66226937838802e-05, | |
| "loss": 4.3137, | |
| "step": 3243 | |
| }, | |
| { | |
| "epoch": 14.515555555555556, | |
| "grad_norm": 5.727292537689209, | |
| "learning_rate": 9.65626318989975e-05, | |
| "loss": 4.3126, | |
| "step": 3266 | |
| }, | |
| { | |
| "epoch": 14.617777777777778, | |
| "grad_norm": 7.882863998413086, | |
| "learning_rate": 9.650205964540978e-05, | |
| "loss": 4.2942, | |
| "step": 3289 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "grad_norm": 7.945621013641357, | |
| "learning_rate": 9.64409776870439e-05, | |
| "loss": 4.3076, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 14.822222222222223, | |
| "grad_norm": 6.543049335479736, | |
| "learning_rate": 9.637938669341356e-05, | |
| "loss": 4.2815, | |
| "step": 3335 | |
| }, | |
| { | |
| "epoch": 14.924444444444445, | |
| "grad_norm": 5.685489654541016, | |
| "learning_rate": 9.631728733961194e-05, | |
| "loss": 4.2873, | |
| "step": 3358 | |
| }, | |
| { | |
| "epoch": 15.026666666666667, | |
| "grad_norm": 5.528294563293457, | |
| "learning_rate": 9.625468030630432e-05, | |
| "loss": 4.2617, | |
| "step": 3381 | |
| }, | |
| { | |
| "epoch": 15.12888888888889, | |
| "grad_norm": 7.666279315948486, | |
| "learning_rate": 9.619156627972064e-05, | |
| "loss": 4.2157, | |
| "step": 3404 | |
| }, | |
| { | |
| "epoch": 15.231111111111112, | |
| "grad_norm": 6.189380645751953, | |
| "learning_rate": 9.612794595164786e-05, | |
| "loss": 4.207, | |
| "step": 3427 | |
| }, | |
| { | |
| "epoch": 15.333333333333334, | |
| "grad_norm": 6.782273292541504, | |
| "learning_rate": 9.606382001942255e-05, | |
| "loss": 4.1977, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 15.435555555555556, | |
| "grad_norm": 6.819105625152588, | |
| "learning_rate": 9.599918918592313e-05, | |
| "loss": 4.2046, | |
| "step": 3473 | |
| }, | |
| { | |
| "epoch": 15.537777777777778, | |
| "grad_norm": 6.558395862579346, | |
| "learning_rate": 9.593405415956216e-05, | |
| "loss": 4.1959, | |
| "step": 3496 | |
| }, | |
| { | |
| "epoch": 15.64, | |
| "grad_norm": 7.579700946807861, | |
| "learning_rate": 9.58684156542787e-05, | |
| "loss": 4.2004, | |
| "step": 3519 | |
| }, | |
| { | |
| "epoch": 15.742222222222223, | |
| "grad_norm": 5.998022556304932, | |
| "learning_rate": 9.580227438953028e-05, | |
| "loss": 4.1972, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 15.844444444444445, | |
| "grad_norm": 8.631059646606445, | |
| "learning_rate": 9.573563109028523e-05, | |
| "loss": 4.1674, | |
| "step": 3565 | |
| }, | |
| { | |
| "epoch": 15.946666666666667, | |
| "grad_norm": 6.702101230621338, | |
| "learning_rate": 9.566848648701457e-05, | |
| "loss": 4.1303, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 16.04888888888889, | |
| "grad_norm": 7.247947692871094, | |
| "learning_rate": 9.56008413156841e-05, | |
| "loss": 4.0834, | |
| "step": 3611 | |
| }, | |
| { | |
| "epoch": 16.15111111111111, | |
| "grad_norm": 6.5919575691223145, | |
| "learning_rate": 9.553269631774631e-05, | |
| "loss": 4.0488, | |
| "step": 3634 | |
| }, | |
| { | |
| "epoch": 16.253333333333334, | |
| "grad_norm": 8.66784381866455, | |
| "learning_rate": 9.54640522401322e-05, | |
| "loss": 4.0754, | |
| "step": 3657 | |
| }, | |
| { | |
| "epoch": 16.355555555555554, | |
| "grad_norm": 7.605900764465332, | |
| "learning_rate": 9.539490983524316e-05, | |
| "loss": 4.0721, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 16.45777777777778, | |
| "grad_norm": 7.925562381744385, | |
| "learning_rate": 9.532526986094273e-05, | |
| "loss": 4.0685, | |
| "step": 3703 | |
| }, | |
| { | |
| "epoch": 16.56, | |
| "grad_norm": 7.180625915527344, | |
| "learning_rate": 9.525513308054819e-05, | |
| "loss": 4.0581, | |
| "step": 3726 | |
| }, | |
| { | |
| "epoch": 16.662222222222223, | |
| "grad_norm": 6.261662483215332, | |
| "learning_rate": 9.518450026282233e-05, | |
| "loss": 4.0405, | |
| "step": 3749 | |
| }, | |
| { | |
| "epoch": 16.764444444444443, | |
| "grad_norm": 5.739262580871582, | |
| "learning_rate": 9.511337218196494e-05, | |
| "loss": 4.0315, | |
| "step": 3772 | |
| }, | |
| { | |
| "epoch": 16.866666666666667, | |
| "grad_norm": 6.229343891143799, | |
| "learning_rate": 9.504174961760435e-05, | |
| "loss": 4.036, | |
| "step": 3795 | |
| }, | |
| { | |
| "epoch": 16.968888888888888, | |
| "grad_norm": 7.991888046264648, | |
| "learning_rate": 9.496963335478884e-05, | |
| "loss": 4.0707, | |
| "step": 3818 | |
| }, | |
| { | |
| "epoch": 17.07111111111111, | |
| "grad_norm": 5.881919860839844, | |
| "learning_rate": 9.489702418397814e-05, | |
| "loss": 3.9782, | |
| "step": 3841 | |
| }, | |
| { | |
| "epoch": 17.173333333333332, | |
| "grad_norm": 5.624960899353027, | |
| "learning_rate": 9.482392290103462e-05, | |
| "loss": 3.9473, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 17.275555555555556, | |
| "grad_norm": 5.786345481872559, | |
| "learning_rate": 9.475033030721471e-05, | |
| "loss": 3.9561, | |
| "step": 3887 | |
| }, | |
| { | |
| "epoch": 17.377777777777776, | |
| "grad_norm": 7.602824687957764, | |
| "learning_rate": 9.467624720916002e-05, | |
| "loss": 3.9605, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "grad_norm": 6.39411735534668, | |
| "learning_rate": 9.460167441888854e-05, | |
| "loss": 3.9324, | |
| "step": 3933 | |
| }, | |
| { | |
| "epoch": 17.58222222222222, | |
| "grad_norm": 6.903740882873535, | |
| "learning_rate": 9.452661275378576e-05, | |
| "loss": 3.9302, | |
| "step": 3956 | |
| }, | |
| { | |
| "epoch": 17.684444444444445, | |
| "grad_norm": 7.515189170837402, | |
| "learning_rate": 9.445106303659562e-05, | |
| "loss": 3.911, | |
| "step": 3979 | |
| }, | |
| { | |
| "epoch": 17.786666666666665, | |
| "grad_norm": 6.514119625091553, | |
| "learning_rate": 9.43750260954116e-05, | |
| "loss": 3.9168, | |
| "step": 4002 | |
| }, | |
| { | |
| "epoch": 17.88888888888889, | |
| "grad_norm": 5.5810370445251465, | |
| "learning_rate": 9.429850276366758e-05, | |
| "loss": 3.9236, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 17.99111111111111, | |
| "grad_norm": 6.529542446136475, | |
| "learning_rate": 9.422149388012875e-05, | |
| "loss": 3.9076, | |
| "step": 4048 | |
| }, | |
| { | |
| "epoch": 18.093333333333334, | |
| "grad_norm": 5.167507171630859, | |
| "learning_rate": 9.414400028888235e-05, | |
| "loss": 3.8211, | |
| "step": 4071 | |
| }, | |
| { | |
| "epoch": 18.195555555555554, | |
| "grad_norm": 6.467238426208496, | |
| "learning_rate": 9.406602283932845e-05, | |
| "loss": 3.8423, | |
| "step": 4094 | |
| }, | |
| { | |
| "epoch": 18.297777777777778, | |
| "grad_norm": 7.490845203399658, | |
| "learning_rate": 9.398756238617071e-05, | |
| "loss": 3.8308, | |
| "step": 4117 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "grad_norm": 5.916659832000732, | |
| "learning_rate": 9.390861978940686e-05, | |
| "loss": 3.8273, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 18.502222222222223, | |
| "grad_norm": 6.601635456085205, | |
| "learning_rate": 9.382919591431945e-05, | |
| "loss": 3.8316, | |
| "step": 4163 | |
| }, | |
| { | |
| "epoch": 18.604444444444443, | |
| "grad_norm": 7.86677885055542, | |
| "learning_rate": 9.374929163146621e-05, | |
| "loss": 3.8223, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 18.706666666666667, | |
| "grad_norm": 6.863983154296875, | |
| "learning_rate": 9.36689078166706e-05, | |
| "loss": 3.8244, | |
| "step": 4209 | |
| }, | |
| { | |
| "epoch": 18.808888888888887, | |
| "grad_norm": 6.3487467765808105, | |
| "learning_rate": 9.35880453510122e-05, | |
| "loss": 3.7945, | |
| "step": 4232 | |
| }, | |
| { | |
| "epoch": 18.91111111111111, | |
| "grad_norm": 7.521273612976074, | |
| "learning_rate": 9.350670512081702e-05, | |
| "loss": 3.8077, | |
| "step": 4255 | |
| }, | |
| { | |
| "epoch": 19.013333333333332, | |
| "grad_norm": 7.834831714630127, | |
| "learning_rate": 9.34248880176478e-05, | |
| "loss": 3.7712, | |
| "step": 4278 | |
| }, | |
| { | |
| "epoch": 19.115555555555556, | |
| "grad_norm": 6.245793342590332, | |
| "learning_rate": 9.334259493829423e-05, | |
| "loss": 3.6992, | |
| "step": 4301 | |
| }, | |
| { | |
| "epoch": 19.217777777777776, | |
| "grad_norm": 7.780862808227539, | |
| "learning_rate": 9.325982678476317e-05, | |
| "loss": 3.6929, | |
| "step": 4324 | |
| }, | |
| { | |
| "epoch": 19.32, | |
| "grad_norm": 7.378338813781738, | |
| "learning_rate": 9.317658446426871e-05, | |
| "loss": 3.7204, | |
| "step": 4347 | |
| }, | |
| { | |
| "epoch": 19.42222222222222, | |
| "grad_norm": 6.953887462615967, | |
| "learning_rate": 9.309286888922219e-05, | |
| "loss": 3.7305, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 19.524444444444445, | |
| "grad_norm": 6.669604301452637, | |
| "learning_rate": 9.300868097722235e-05, | |
| "loss": 3.7116, | |
| "step": 4393 | |
| }, | |
| { | |
| "epoch": 19.626666666666665, | |
| "grad_norm": 6.265507221221924, | |
| "learning_rate": 9.292402165104506e-05, | |
| "loss": 3.6961, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 19.72888888888889, | |
| "grad_norm": 6.823009967803955, | |
| "learning_rate": 9.28388918386334e-05, | |
| "loss": 3.6913, | |
| "step": 4439 | |
| }, | |
| { | |
| "epoch": 19.83111111111111, | |
| "grad_norm": 6.928945064544678, | |
| "learning_rate": 9.275329247308737e-05, | |
| "loss": 3.7144, | |
| "step": 4462 | |
| }, | |
| { | |
| "epoch": 19.933333333333334, | |
| "grad_norm": 7.16089391708374, | |
| "learning_rate": 9.26672244926537e-05, | |
| "loss": 3.7168, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 20.035555555555554, | |
| "grad_norm": 6.4695563316345215, | |
| "learning_rate": 9.258068884071559e-05, | |
| "loss": 3.655, | |
| "step": 4508 | |
| }, | |
| { | |
| "epoch": 20.137777777777778, | |
| "grad_norm": 6.902811527252197, | |
| "learning_rate": 9.249368646578227e-05, | |
| "loss": 3.5855, | |
| "step": 4531 | |
| }, | |
| { | |
| "epoch": 20.24, | |
| "grad_norm": 7.336968898773193, | |
| "learning_rate": 9.24062183214788e-05, | |
| "loss": 3.5716, | |
| "step": 4554 | |
| }, | |
| { | |
| "epoch": 20.342222222222222, | |
| "grad_norm": 6.539813995361328, | |
| "learning_rate": 9.231828536653537e-05, | |
| "loss": 3.6035, | |
| "step": 4577 | |
| }, | |
| { | |
| "epoch": 20.444444444444443, | |
| "grad_norm": 8.689528465270996, | |
| "learning_rate": 9.222988856477702e-05, | |
| "loss": 3.6179, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 20.546666666666667, | |
| "grad_norm": 8.209162712097168, | |
| "learning_rate": 9.214102888511287e-05, | |
| "loss": 3.6182, | |
| "step": 4623 | |
| }, | |
| { | |
| "epoch": 20.648888888888887, | |
| "grad_norm": 7.320056438446045, | |
| "learning_rate": 9.20517073015257e-05, | |
| "loss": 3.5944, | |
| "step": 4646 | |
| }, | |
| { | |
| "epoch": 20.75111111111111, | |
| "grad_norm": 7.204301357269287, | |
| "learning_rate": 9.196192479306114e-05, | |
| "loss": 3.5922, | |
| "step": 4669 | |
| }, | |
| { | |
| "epoch": 20.85333333333333, | |
| "grad_norm": 7.85291051864624, | |
| "learning_rate": 9.187168234381692e-05, | |
| "loss": 3.5992, | |
| "step": 4692 | |
| }, | |
| { | |
| "epoch": 20.955555555555556, | |
| "grad_norm": 6.276856422424316, | |
| "learning_rate": 9.178098094293222e-05, | |
| "loss": 3.5929, | |
| "step": 4715 | |
| }, | |
| { | |
| "epoch": 21.057777777777776, | |
| "grad_norm": 5.835750102996826, | |
| "learning_rate": 9.168982158457672e-05, | |
| "loss": 3.5289, | |
| "step": 4738 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "grad_norm": 6.322780609130859, | |
| "learning_rate": 9.159820526793969e-05, | |
| "loss": 3.4881, | |
| "step": 4761 | |
| }, | |
| { | |
| "epoch": 21.26222222222222, | |
| "grad_norm": 7.341971397399902, | |
| "learning_rate": 9.150613299721916e-05, | |
| "loss": 3.4799, | |
| "step": 4784 | |
| }, | |
| { | |
| "epoch": 21.364444444444445, | |
| "grad_norm": 6.387499809265137, | |
| "learning_rate": 9.14136057816107e-05, | |
| "loss": 3.4747, | |
| "step": 4807 | |
| }, | |
| { | |
| "epoch": 21.466666666666665, | |
| "grad_norm": 7.271056175231934, | |
| "learning_rate": 9.132062463529665e-05, | |
| "loss": 3.4783, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 21.56888888888889, | |
| "grad_norm": 7.323966026306152, | |
| "learning_rate": 9.122719057743473e-05, | |
| "loss": 3.4756, | |
| "step": 4853 | |
| }, | |
| { | |
| "epoch": 21.67111111111111, | |
| "grad_norm": 7.535403251647949, | |
| "learning_rate": 9.113330463214699e-05, | |
| "loss": 3.4825, | |
| "step": 4876 | |
| }, | |
| { | |
| "epoch": 21.773333333333333, | |
| "grad_norm": 5.771243095397949, | |
| "learning_rate": 9.103896782850865e-05, | |
| "loss": 3.4737, | |
| "step": 4899 | |
| }, | |
| { | |
| "epoch": 21.875555555555554, | |
| "grad_norm": 8.020050048828125, | |
| "learning_rate": 9.094418120053667e-05, | |
| "loss": 3.4709, | |
| "step": 4922 | |
| }, | |
| { | |
| "epoch": 21.977777777777778, | |
| "grad_norm": 6.315218448638916, | |
| "learning_rate": 9.08489457871785e-05, | |
| "loss": 3.4751, | |
| "step": 4945 | |
| }, | |
| { | |
| "epoch": 22.08, | |
| "grad_norm": 7.77646017074585, | |
| "learning_rate": 9.075326263230073e-05, | |
| "loss": 3.4052, | |
| "step": 4968 | |
| }, | |
| { | |
| "epoch": 22.182222222222222, | |
| "grad_norm": 6.549800872802734, | |
| "learning_rate": 9.065713278467755e-05, | |
| "loss": 3.3815, | |
| "step": 4991 | |
| }, | |
| { | |
| "epoch": 22.284444444444443, | |
| "grad_norm": 6.499227523803711, | |
| "learning_rate": 9.056055729797938e-05, | |
| "loss": 3.3818, | |
| "step": 5014 | |
| }, | |
| { | |
| "epoch": 22.386666666666667, | |
| "grad_norm": 7.827967643737793, | |
| "learning_rate": 9.046353723076117e-05, | |
| "loss": 3.3781, | |
| "step": 5037 | |
| }, | |
| { | |
| "epoch": 22.488888888888887, | |
| "grad_norm": 8.61707592010498, | |
| "learning_rate": 9.036607364645094e-05, | |
| "loss": 3.362, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 22.59111111111111, | |
| "grad_norm": 8.287631034851074, | |
| "learning_rate": 9.026816761333799e-05, | |
| "loss": 3.3951, | |
| "step": 5083 | |
| }, | |
| { | |
| "epoch": 22.693333333333335, | |
| "grad_norm": 6.027954578399658, | |
| "learning_rate": 9.016982020456133e-05, | |
| "loss": 3.3988, | |
| "step": 5106 | |
| }, | |
| { | |
| "epoch": 22.795555555555556, | |
| "grad_norm": 5.422713756561279, | |
| "learning_rate": 9.00710324980978e-05, | |
| "loss": 3.3986, | |
| "step": 5129 | |
| }, | |
| { | |
| "epoch": 22.897777777777776, | |
| "grad_norm": 6.52266788482666, | |
| "learning_rate": 8.997180557675034e-05, | |
| "loss": 3.3685, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 8.5319242477417, | |
| "learning_rate": 8.987214052813604e-05, | |
| "loss": 3.3852, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 23.102222222222224, | |
| "grad_norm": 5.753627300262451, | |
| "learning_rate": 8.977203844467432e-05, | |
| "loss": 3.2861, | |
| "step": 5198 | |
| }, | |
| { | |
| "epoch": 23.204444444444444, | |
| "grad_norm": 6.238333225250244, | |
| "learning_rate": 8.967150042357484e-05, | |
| "loss": 3.297, | |
| "step": 5221 | |
| }, | |
| { | |
| "epoch": 23.306666666666665, | |
| "grad_norm": 7.126039505004883, | |
| "learning_rate": 8.957052756682556e-05, | |
| "loss": 3.3114, | |
| "step": 5244 | |
| }, | |
| { | |
| "epoch": 23.40888888888889, | |
| "grad_norm": 7.4155426025390625, | |
| "learning_rate": 8.946912098118066e-05, | |
| "loss": 3.3054, | |
| "step": 5267 | |
| }, | |
| { | |
| "epoch": 23.511111111111113, | |
| "grad_norm": 6.702388763427734, | |
| "learning_rate": 8.93672817781483e-05, | |
| "loss": 3.2675, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 23.613333333333333, | |
| "grad_norm": 7.878185272216797, | |
| "learning_rate": 8.926501107397863e-05, | |
| "loss": 3.2968, | |
| "step": 5313 | |
| }, | |
| { | |
| "epoch": 23.715555555555554, | |
| "grad_norm": 7.802605152130127, | |
| "learning_rate": 8.916230998965134e-05, | |
| "loss": 3.2743, | |
| "step": 5336 | |
| }, | |
| { | |
| "epoch": 23.817777777777778, | |
| "grad_norm": 6.080660820007324, | |
| "learning_rate": 8.905917965086356e-05, | |
| "loss": 3.287, | |
| "step": 5359 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "grad_norm": 7.292867660522461, | |
| "learning_rate": 8.895562118801738e-05, | |
| "loss": 3.2723, | |
| "step": 5382 | |
| }, | |
| { | |
| "epoch": 24.022222222222222, | |
| "grad_norm": 6.736908435821533, | |
| "learning_rate": 8.885163573620754e-05, | |
| "loss": 3.2406, | |
| "step": 5405 | |
| }, | |
| { | |
| "epoch": 24.124444444444446, | |
| "grad_norm": 5.8357343673706055, | |
| "learning_rate": 8.874722443520899e-05, | |
| "loss": 3.1797, | |
| "step": 5428 | |
| }, | |
| { | |
| "epoch": 24.226666666666667, | |
| "grad_norm": 6.487481117248535, | |
| "learning_rate": 8.864238842946433e-05, | |
| "loss": 3.1876, | |
| "step": 5451 | |
| }, | |
| { | |
| "epoch": 24.32888888888889, | |
| "grad_norm": 7.854300498962402, | |
| "learning_rate": 8.853712886807132e-05, | |
| "loss": 3.2056, | |
| "step": 5474 | |
| }, | |
| { | |
| "epoch": 24.43111111111111, | |
| "grad_norm": 8.225058555603027, | |
| "learning_rate": 8.84314469047703e-05, | |
| "loss": 3.2518, | |
| "step": 5497 | |
| }, | |
| { | |
| "epoch": 24.533333333333335, | |
| "grad_norm": 7.0223236083984375, | |
| "learning_rate": 8.832534369793153e-05, | |
| "loss": 3.2102, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 24.635555555555555, | |
| "grad_norm": 7.004525661468506, | |
| "learning_rate": 8.821882041054239e-05, | |
| "loss": 3.1674, | |
| "step": 5543 | |
| }, | |
| { | |
| "epoch": 24.73777777777778, | |
| "grad_norm": 7.304614543914795, | |
| "learning_rate": 8.811187821019486e-05, | |
| "loss": 3.188, | |
| "step": 5566 | |
| }, | |
| { | |
| "epoch": 24.84, | |
| "grad_norm": 6.002228736877441, | |
| "learning_rate": 8.800451826907245e-05, | |
| "loss": 3.1785, | |
| "step": 5589 | |
| }, | |
| { | |
| "epoch": 24.942222222222224, | |
| "grad_norm": 6.998710632324219, | |
| "learning_rate": 8.789674176393761e-05, | |
| "loss": 3.1713, | |
| "step": 5612 | |
| }, | |
| { | |
| "epoch": 25.044444444444444, | |
| "grad_norm": 7.029483795166016, | |
| "learning_rate": 8.77885498761186e-05, | |
| "loss": 3.1521, | |
| "step": 5635 | |
| }, | |
| { | |
| "epoch": 25.14666666666667, | |
| "grad_norm": 6.024033069610596, | |
| "learning_rate": 8.767994379149675e-05, | |
| "loss": 3.0885, | |
| "step": 5658 | |
| }, | |
| { | |
| "epoch": 25.24888888888889, | |
| "grad_norm": 7.233892440795898, | |
| "learning_rate": 8.757092470049329e-05, | |
| "loss": 3.0891, | |
| "step": 5681 | |
| }, | |
| { | |
| "epoch": 25.351111111111113, | |
| "grad_norm": 7.917546272277832, | |
| "learning_rate": 8.74614937980564e-05, | |
| "loss": 3.1085, | |
| "step": 5704 | |
| }, | |
| { | |
| "epoch": 25.453333333333333, | |
| "grad_norm": 7.8942437171936035, | |
| "learning_rate": 8.735165228364809e-05, | |
| "loss": 3.0931, | |
| "step": 5727 | |
| }, | |
| { | |
| "epoch": 25.555555555555557, | |
| "grad_norm": 7.184880256652832, | |
| "learning_rate": 8.724140136123106e-05, | |
| "loss": 3.1079, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 25.657777777777778, | |
| "grad_norm": 5.8746137619018555, | |
| "learning_rate": 8.713074223925546e-05, | |
| "loss": 3.0924, | |
| "step": 5773 | |
| }, | |
| { | |
| "epoch": 25.76, | |
| "grad_norm": 6.722870826721191, | |
| "learning_rate": 8.701967613064575e-05, | |
| "loss": 3.0918, | |
| "step": 5796 | |
| }, | |
| { | |
| "epoch": 25.862222222222222, | |
| "grad_norm": 8.777771949768066, | |
| "learning_rate": 8.690820425278721e-05, | |
| "loss": 3.1046, | |
| "step": 5819 | |
| }, | |
| { | |
| "epoch": 25.964444444444446, | |
| "grad_norm": 7.208896636962891, | |
| "learning_rate": 8.679632782751283e-05, | |
| "loss": 3.1053, | |
| "step": 5842 | |
| }, | |
| { | |
| "epoch": 26.066666666666666, | |
| "grad_norm": 12.179716110229492, | |
| "learning_rate": 8.668404808108978e-05, | |
| "loss": 3.034, | |
| "step": 5865 | |
| }, | |
| { | |
| "epoch": 26.16888888888889, | |
| "grad_norm": 6.9270501136779785, | |
| "learning_rate": 8.657136624420596e-05, | |
| "loss": 2.982, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 26.27111111111111, | |
| "grad_norm": 6.495911598205566, | |
| "learning_rate": 8.645828355195658e-05, | |
| "loss": 2.9953, | |
| "step": 5911 | |
| }, | |
| { | |
| "epoch": 26.373333333333335, | |
| "grad_norm": 6.193568229675293, | |
| "learning_rate": 8.634480124383057e-05, | |
| "loss": 3.0264, | |
| "step": 5934 | |
| }, | |
| { | |
| "epoch": 26.475555555555555, | |
| "grad_norm": 7.5366034507751465, | |
| "learning_rate": 8.623092056369704e-05, | |
| "loss": 3.029, | |
| "step": 5957 | |
| }, | |
| { | |
| "epoch": 26.57777777777778, | |
| "grad_norm": 7.380651473999023, | |
| "learning_rate": 8.611664275979157e-05, | |
| "loss": 3.0148, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 26.68, | |
| "grad_norm": 6.579084396362305, | |
| "learning_rate": 8.600196908470265e-05, | |
| "loss": 3.0019, | |
| "step": 6003 | |
| }, | |
| { | |
| "epoch": 26.782222222222224, | |
| "grad_norm": 7.964267253875732, | |
| "learning_rate": 8.588690079535779e-05, | |
| "loss": 3.0102, | |
| "step": 6026 | |
| }, | |
| { | |
| "epoch": 26.884444444444444, | |
| "grad_norm": 7.465826034545898, | |
| "learning_rate": 8.577143915300993e-05, | |
| "loss": 2.9759, | |
| "step": 6049 | |
| }, | |
| { | |
| "epoch": 26.986666666666668, | |
| "grad_norm": 6.584536552429199, | |
| "learning_rate": 8.56555854232234e-05, | |
| "loss": 2.9609, | |
| "step": 6072 | |
| }, | |
| { | |
| "epoch": 27.08888888888889, | |
| "grad_norm": 6.6631550788879395, | |
| "learning_rate": 8.553934087586026e-05, | |
| "loss": 2.8921, | |
| "step": 6095 | |
| }, | |
| { | |
| "epoch": 27.191111111111113, | |
| "grad_norm": 7.030783176422119, | |
| "learning_rate": 8.542270678506625e-05, | |
| "loss": 2.8946, | |
| "step": 6118 | |
| }, | |
| { | |
| "epoch": 27.293333333333333, | |
| "grad_norm": 6.412444114685059, | |
| "learning_rate": 8.530568442925684e-05, | |
| "loss": 2.9002, | |
| "step": 6141 | |
| }, | |
| { | |
| "epoch": 27.395555555555557, | |
| "grad_norm": 8.111526489257812, | |
| "learning_rate": 8.518827509110328e-05, | |
| "loss": 2.9037, | |
| "step": 6164 | |
| }, | |
| { | |
| "epoch": 27.497777777777777, | |
| "grad_norm": 6.402091026306152, | |
| "learning_rate": 8.507048005751847e-05, | |
| "loss": 2.9006, | |
| "step": 6187 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "grad_norm": 7.210970878601074, | |
| "learning_rate": 8.495230061964288e-05, | |
| "loss": 2.911, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 27.702222222222222, | |
| "grad_norm": 9.301465034484863, | |
| "learning_rate": 8.48337380728304e-05, | |
| "loss": 2.915, | |
| "step": 6233 | |
| }, | |
| { | |
| "epoch": 27.804444444444446, | |
| "grad_norm": 10.22038745880127, | |
| "learning_rate": 8.471479371663417e-05, | |
| "loss": 2.9234, | |
| "step": 6256 | |
| }, | |
| { | |
| "epoch": 27.906666666666666, | |
| "grad_norm": 8.557666778564453, | |
| "learning_rate": 8.459546885479226e-05, | |
| "loss": 2.9312, | |
| "step": 6279 | |
| }, | |
| { | |
| "epoch": 28.00888888888889, | |
| "grad_norm": 8.308337211608887, | |
| "learning_rate": 8.447576479521348e-05, | |
| "loss": 2.9055, | |
| "step": 6302 | |
| }, | |
| { | |
| "epoch": 28.11111111111111, | |
| "grad_norm": 9.826993942260742, | |
| "learning_rate": 8.435568284996294e-05, | |
| "loss": 2.795, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 28.213333333333335, | |
| "grad_norm": 7.39091157913208, | |
| "learning_rate": 8.423522433524776e-05, | |
| "loss": 2.7985, | |
| "step": 6348 | |
| }, | |
| { | |
| "epoch": 28.315555555555555, | |
| "grad_norm": 7.943458557128906, | |
| "learning_rate": 8.411439057140257e-05, | |
| "loss": 2.804, | |
| "step": 6371 | |
| }, | |
| { | |
| "epoch": 28.41777777777778, | |
| "grad_norm": 7.037588119506836, | |
| "learning_rate": 8.399318288287512e-05, | |
| "loss": 2.8196, | |
| "step": 6394 | |
| }, | |
| { | |
| "epoch": 28.52, | |
| "grad_norm": 6.966550350189209, | |
| "learning_rate": 8.387160259821166e-05, | |
| "loss": 2.8037, | |
| "step": 6417 | |
| }, | |
| { | |
| "epoch": 28.622222222222224, | |
| "grad_norm": 6.990281105041504, | |
| "learning_rate": 8.374965105004244e-05, | |
| "loss": 2.8049, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 28.724444444444444, | |
| "grad_norm": 8.029483795166016, | |
| "learning_rate": 8.362732957506714e-05, | |
| "loss": 2.8056, | |
| "step": 6463 | |
| }, | |
| { | |
| "epoch": 28.826666666666668, | |
| "grad_norm": 6.398525714874268, | |
| "learning_rate": 8.350463951404012e-05, | |
| "loss": 2.8254, | |
| "step": 6486 | |
| }, | |
| { | |
| "epoch": 28.92888888888889, | |
| "grad_norm": 9.660991668701172, | |
| "learning_rate": 8.338158221175581e-05, | |
| "loss": 2.8516, | |
| "step": 6509 | |
| }, | |
| { | |
| "epoch": 29.031111111111112, | |
| "grad_norm": 7.429766654968262, | |
| "learning_rate": 8.325815901703394e-05, | |
| "loss": 2.8115, | |
| "step": 6532 | |
| }, | |
| { | |
| "epoch": 29.133333333333333, | |
| "grad_norm": 6.842705726623535, | |
| "learning_rate": 8.313437128270469e-05, | |
| "loss": 2.7238, | |
| "step": 6555 | |
| }, | |
| { | |
| "epoch": 29.235555555555557, | |
| "grad_norm": 9.195459365844727, | |
| "learning_rate": 8.301022036559405e-05, | |
| "loss": 2.7192, | |
| "step": 6578 | |
| }, | |
| { | |
| "epoch": 29.337777777777777, | |
| "grad_norm": 7.685567378997803, | |
| "learning_rate": 8.288570762650869e-05, | |
| "loss": 2.7009, | |
| "step": 6601 | |
| }, | |
| { | |
| "epoch": 29.44, | |
| "grad_norm": 6.384602069854736, | |
| "learning_rate": 8.276083443022126e-05, | |
| "loss": 2.7286, | |
| "step": 6624 | |
| }, | |
| { | |
| "epoch": 29.54222222222222, | |
| "grad_norm": 7.564410209655762, | |
| "learning_rate": 8.263560214545532e-05, | |
| "loss": 2.7405, | |
| "step": 6647 | |
| }, | |
| { | |
| "epoch": 29.644444444444446, | |
| "grad_norm": 6.835319995880127, | |
| "learning_rate": 8.251001214487039e-05, | |
| "loss": 2.7197, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 29.746666666666666, | |
| "grad_norm": 7.009396553039551, | |
| "learning_rate": 8.238406580504683e-05, | |
| "loss": 2.7322, | |
| "step": 6693 | |
| }, | |
| { | |
| "epoch": 29.84888888888889, | |
| "grad_norm": 6.862404823303223, | |
| "learning_rate": 8.225776450647082e-05, | |
| "loss": 2.7476, | |
| "step": 6716 | |
| }, | |
| { | |
| "epoch": 29.95111111111111, | |
| "grad_norm": 6.345396041870117, | |
| "learning_rate": 8.213110963351928e-05, | |
| "loss": 2.7317, | |
| "step": 6739 | |
| }, | |
| { | |
| "epoch": 30.053333333333335, | |
| "grad_norm": 7.607011795043945, | |
| "learning_rate": 8.200410257444451e-05, | |
| "loss": 2.6859, | |
| "step": 6762 | |
| }, | |
| { | |
| "epoch": 30.155555555555555, | |
| "grad_norm": 6.952041149139404, | |
| "learning_rate": 8.187674472135915e-05, | |
| "loss": 2.6587, | |
| "step": 6785 | |
| }, | |
| { | |
| "epoch": 30.25777777777778, | |
| "grad_norm": 6.717074394226074, | |
| "learning_rate": 8.17490374702209e-05, | |
| "loss": 2.6636, | |
| "step": 6808 | |
| }, | |
| { | |
| "epoch": 30.36, | |
| "grad_norm": 7.299156665802002, | |
| "learning_rate": 8.162098222081711e-05, | |
| "loss": 2.6731, | |
| "step": 6831 | |
| }, | |
| { | |
| "epoch": 30.462222222222223, | |
| "grad_norm": 7.86132287979126, | |
| "learning_rate": 8.149258037674952e-05, | |
| "loss": 2.6568, | |
| "step": 6854 | |
| }, | |
| { | |
| "epoch": 30.564444444444444, | |
| "grad_norm": 6.957241535186768, | |
| "learning_rate": 8.13638333454189e-05, | |
| "loss": 2.621, | |
| "step": 6877 | |
| }, | |
| { | |
| "epoch": 30.666666666666668, | |
| "grad_norm": 7.0929741859436035, | |
| "learning_rate": 8.123474253800957e-05, | |
| "loss": 2.6453, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 30.76888888888889, | |
| "grad_norm": 7.3665385246276855, | |
| "learning_rate": 8.110530936947392e-05, | |
| "loss": 2.6668, | |
| "step": 6923 | |
| }, | |
| { | |
| "epoch": 30.871111111111112, | |
| "grad_norm": 8.744823455810547, | |
| "learning_rate": 8.097553525851693e-05, | |
| "loss": 2.6759, | |
| "step": 6946 | |
| }, | |
| { | |
| "epoch": 30.973333333333333, | |
| "grad_norm": 6.603512287139893, | |
| "learning_rate": 8.084542162758067e-05, | |
| "loss": 2.6677, | |
| "step": 6969 | |
| }, | |
| { | |
| "epoch": 31.075555555555557, | |
| "grad_norm": 6.355960369110107, | |
| "learning_rate": 8.071496990282861e-05, | |
| "loss": 2.6044, | |
| "step": 6992 | |
| }, | |
| { | |
| "epoch": 31.177777777777777, | |
| "grad_norm": 6.957365989685059, | |
| "learning_rate": 8.058418151413005e-05, | |
| "loss": 2.5647, | |
| "step": 7015 | |
| }, | |
| { | |
| "epoch": 31.28, | |
| "grad_norm": 7.455416679382324, | |
| "learning_rate": 8.045305789504444e-05, | |
| "loss": 2.5981, | |
| "step": 7038 | |
| }, | |
| { | |
| "epoch": 31.38222222222222, | |
| "grad_norm": 6.41038703918457, | |
| "learning_rate": 8.032160048280566e-05, | |
| "loss": 2.6026, | |
| "step": 7061 | |
| }, | |
| { | |
| "epoch": 31.484444444444446, | |
| "grad_norm": 8.298896789550781, | |
| "learning_rate": 8.018981071830622e-05, | |
| "loss": 2.5975, | |
| "step": 7084 | |
| }, | |
| { | |
| "epoch": 31.586666666666666, | |
| "grad_norm": 9.506787300109863, | |
| "learning_rate": 8.005769004608156e-05, | |
| "loss": 2.6356, | |
| "step": 7107 | |
| }, | |
| { | |
| "epoch": 31.68888888888889, | |
| "grad_norm": 8.870840072631836, | |
| "learning_rate": 7.992523991429419e-05, | |
| "loss": 2.6015, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 31.79111111111111, | |
| "grad_norm": 8.160204887390137, | |
| "learning_rate": 7.979246177471773e-05, | |
| "loss": 2.593, | |
| "step": 7153 | |
| }, | |
| { | |
| "epoch": 31.893333333333334, | |
| "grad_norm": 6.366309642791748, | |
| "learning_rate": 7.96593570827211e-05, | |
| "loss": 2.5548, | |
| "step": 7176 | |
| }, | |
| { | |
| "epoch": 31.995555555555555, | |
| "grad_norm": 6.812814712524414, | |
| "learning_rate": 7.952592729725254e-05, | |
| "loss": 2.5352, | |
| "step": 7199 | |
| }, | |
| { | |
| "epoch": 32.09777777777778, | |
| "grad_norm": 6.476632118225098, | |
| "learning_rate": 7.939217388082361e-05, | |
| "loss": 2.4694, | |
| "step": 7222 | |
| }, | |
| { | |
| "epoch": 32.2, | |
| "grad_norm": 7.325323104858398, | |
| "learning_rate": 7.925809829949312e-05, | |
| "loss": 2.4581, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 32.30222222222222, | |
| "grad_norm": 7.190999984741211, | |
| "learning_rate": 7.912370202285113e-05, | |
| "loss": 2.4829, | |
| "step": 7268 | |
| }, | |
| { | |
| "epoch": 32.404444444444444, | |
| "grad_norm": 7.949245452880859, | |
| "learning_rate": 7.898898652400281e-05, | |
| "loss": 2.5134, | |
| "step": 7291 | |
| }, | |
| { | |
| "epoch": 32.50666666666667, | |
| "grad_norm": 7.711633682250977, | |
| "learning_rate": 7.88539532795523e-05, | |
| "loss": 2.5374, | |
| "step": 7314 | |
| }, | |
| { | |
| "epoch": 32.60888888888889, | |
| "grad_norm": 7.286764621734619, | |
| "learning_rate": 7.87186037695865e-05, | |
| "loss": 2.4946, | |
| "step": 7337 | |
| }, | |
| { | |
| "epoch": 32.71111111111111, | |
| "grad_norm": 7.322375774383545, | |
| "learning_rate": 7.858293947765892e-05, | |
| "loss": 2.5086, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 32.81333333333333, | |
| "grad_norm": 7.134939670562744, | |
| "learning_rate": 7.844696189077328e-05, | |
| "loss": 2.4963, | |
| "step": 7383 | |
| }, | |
| { | |
| "epoch": 32.91555555555556, | |
| "grad_norm": 7.648177623748779, | |
| "learning_rate": 7.831067249936734e-05, | |
| "loss": 2.4857, | |
| "step": 7406 | |
| }, | |
| { | |
| "epoch": 33.01777777777778, | |
| "grad_norm": 6.730453968048096, | |
| "learning_rate": 7.817407279729657e-05, | |
| "loss": 2.4906, | |
| "step": 7429 | |
| }, | |
| { | |
| "epoch": 33.12, | |
| "grad_norm": 6.662753105163574, | |
| "learning_rate": 7.803716428181763e-05, | |
| "loss": 2.4054, | |
| "step": 7452 | |
| }, | |
| { | |
| "epoch": 33.22222222222222, | |
| "grad_norm": 6.583335876464844, | |
| "learning_rate": 7.789994845357212e-05, | |
| "loss": 2.3762, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 33.324444444444445, | |
| "grad_norm": 6.661638259887695, | |
| "learning_rate": 7.776242681657006e-05, | |
| "loss": 2.4166, | |
| "step": 7498 | |
| }, | |
| { | |
| "epoch": 33.42666666666667, | |
| "grad_norm": 6.506235599517822, | |
| "learning_rate": 7.762460087817343e-05, | |
| "loss": 2.4081, | |
| "step": 7521 | |
| }, | |
| { | |
| "epoch": 33.528888888888886, | |
| "grad_norm": 8.114941596984863, | |
| "learning_rate": 7.748647214907954e-05, | |
| "loss": 2.4189, | |
| "step": 7544 | |
| }, | |
| { | |
| "epoch": 33.63111111111111, | |
| "grad_norm": 7.059467315673828, | |
| "learning_rate": 7.73480421433047e-05, | |
| "loss": 2.4416, | |
| "step": 7567 | |
| }, | |
| { | |
| "epoch": 33.733333333333334, | |
| "grad_norm": 9.18146800994873, | |
| "learning_rate": 7.720931237816735e-05, | |
| "loss": 2.4374, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 33.83555555555556, | |
| "grad_norm": 7.458983898162842, | |
| "learning_rate": 7.707028437427164e-05, | |
| "loss": 2.4392, | |
| "step": 7613 | |
| }, | |
| { | |
| "epoch": 33.937777777777775, | |
| "grad_norm": 6.761877536773682, | |
| "learning_rate": 7.693095965549069e-05, | |
| "loss": 2.4354, | |
| "step": 7636 | |
| }, | |
| { | |
| "epoch": 34.04, | |
| "grad_norm": 7.720556735992432, | |
| "learning_rate": 7.679133974894983e-05, | |
| "loss": 2.3844, | |
| "step": 7659 | |
| }, | |
| { | |
| "epoch": 34.14222222222222, | |
| "grad_norm": 6.558327674865723, | |
| "learning_rate": 7.665142618501e-05, | |
| "loss": 2.3599, | |
| "step": 7682 | |
| }, | |
| { | |
| "epoch": 34.24444444444445, | |
| "grad_norm": 6.790546894073486, | |
| "learning_rate": 7.651122049725082e-05, | |
| "loss": 2.3541, | |
| "step": 7705 | |
| }, | |
| { | |
| "epoch": 34.346666666666664, | |
| "grad_norm": 6.559151649475098, | |
| "learning_rate": 7.637072422245386e-05, | |
| "loss": 2.3684, | |
| "step": 7728 | |
| }, | |
| { | |
| "epoch": 34.44888888888889, | |
| "grad_norm": 8.255489349365234, | |
| "learning_rate": 7.622993890058582e-05, | |
| "loss": 2.3799, | |
| "step": 7751 | |
| }, | |
| { | |
| "epoch": 34.55111111111111, | |
| "grad_norm": 8.185545921325684, | |
| "learning_rate": 7.60888660747816e-05, | |
| "loss": 2.3723, | |
| "step": 7774 | |
| }, | |
| { | |
| "epoch": 34.653333333333336, | |
| "grad_norm": 7.4899516105651855, | |
| "learning_rate": 7.594750729132743e-05, | |
| "loss": 2.3813, | |
| "step": 7797 | |
| }, | |
| { | |
| "epoch": 34.75555555555555, | |
| "grad_norm": 6.652093887329102, | |
| "learning_rate": 7.580586409964382e-05, | |
| "loss": 2.3641, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 34.85777777777778, | |
| "grad_norm": 6.916318893432617, | |
| "learning_rate": 7.566393805226874e-05, | |
| "loss": 2.3689, | |
| "step": 7843 | |
| }, | |
| { | |
| "epoch": 34.96, | |
| "grad_norm": 7.0521559715271, | |
| "learning_rate": 7.552173070484048e-05, | |
| "loss": 2.3528, | |
| "step": 7866 | |
| }, | |
| { | |
| "epoch": 35.062222222222225, | |
| "grad_norm": 7.043063163757324, | |
| "learning_rate": 7.537924361608062e-05, | |
| "loss": 2.2977, | |
| "step": 7889 | |
| }, | |
| { | |
| "epoch": 35.16444444444444, | |
| "grad_norm": 6.285613059997559, | |
| "learning_rate": 7.523647834777698e-05, | |
| "loss": 2.2593, | |
| "step": 7912 | |
| }, | |
| { | |
| "epoch": 35.266666666666666, | |
| "grad_norm": 7.13001012802124, | |
| "learning_rate": 7.509343646476646e-05, | |
| "loss": 2.268, | |
| "step": 7935 | |
| }, | |
| { | |
| "epoch": 35.36888888888889, | |
| "grad_norm": 6.38799524307251, | |
| "learning_rate": 7.495011953491793e-05, | |
| "loss": 2.291, | |
| "step": 7958 | |
| }, | |
| { | |
| "epoch": 35.471111111111114, | |
| "grad_norm": 7.488864421844482, | |
| "learning_rate": 7.480652912911501e-05, | |
| "loss": 2.3234, | |
| "step": 7981 | |
| }, | |
| { | |
| "epoch": 35.57333333333333, | |
| "grad_norm": 6.8178558349609375, | |
| "learning_rate": 7.466266682123888e-05, | |
| "loss": 2.3204, | |
| "step": 8004 | |
| }, | |
| { | |
| "epoch": 35.675555555555555, | |
| "grad_norm": 7.1541748046875, | |
| "learning_rate": 7.451853418815097e-05, | |
| "loss": 2.3137, | |
| "step": 8027 | |
| }, | |
| { | |
| "epoch": 35.77777777777778, | |
| "grad_norm": 8.040066719055176, | |
| "learning_rate": 7.437413280967578e-05, | |
| "loss": 2.3173, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 35.88, | |
| "grad_norm": 8.158806800842285, | |
| "learning_rate": 7.422946426858345e-05, | |
| "loss": 2.2952, | |
| "step": 8073 | |
| }, | |
| { | |
| "epoch": 35.98222222222222, | |
| "grad_norm": 7.60796594619751, | |
| "learning_rate": 7.408453015057252e-05, | |
| "loss": 2.2707, | |
| "step": 8096 | |
| }, | |
| { | |
| "epoch": 36.08444444444444, | |
| "grad_norm": 6.903555870056152, | |
| "learning_rate": 7.393933204425244e-05, | |
| "loss": 2.2153, | |
| "step": 8119 | |
| }, | |
| { | |
| "epoch": 36.18666666666667, | |
| "grad_norm": 7.1362624168396, | |
| "learning_rate": 7.379387154112625e-05, | |
| "loss": 2.2045, | |
| "step": 8142 | |
| }, | |
| { | |
| "epoch": 36.28888888888889, | |
| "grad_norm": 7.824875354766846, | |
| "learning_rate": 7.364815023557306e-05, | |
| "loss": 2.215, | |
| "step": 8165 | |
| }, | |
| { | |
| "epoch": 36.39111111111111, | |
| "grad_norm": 10.668073654174805, | |
| "learning_rate": 7.350216972483064e-05, | |
| "loss": 2.2303, | |
| "step": 8188 | |
| }, | |
| { | |
| "epoch": 36.49333333333333, | |
| "grad_norm": 5.577554225921631, | |
| "learning_rate": 7.33559316089779e-05, | |
| "loss": 2.2175, | |
| "step": 8211 | |
| }, | |
| { | |
| "epoch": 36.595555555555556, | |
| "grad_norm": 6.902368545532227, | |
| "learning_rate": 7.320943749091728e-05, | |
| "loss": 2.2207, | |
| "step": 8234 | |
| }, | |
| { | |
| "epoch": 36.69777777777778, | |
| "grad_norm": 6.997749328613281, | |
| "learning_rate": 7.30626889763573e-05, | |
| "loss": 2.2525, | |
| "step": 8257 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "grad_norm": 7.666829586029053, | |
| "learning_rate": 7.291568767379484e-05, | |
| "loss": 2.2427, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 36.90222222222222, | |
| "grad_norm": 6.811129093170166, | |
| "learning_rate": 7.27684351944976e-05, | |
| "loss": 2.25, | |
| "step": 8303 | |
| }, | |
| { | |
| "epoch": 37.004444444444445, | |
| "grad_norm": 5.935613632202148, | |
| "learning_rate": 7.262093315248641e-05, | |
| "loss": 2.2459, | |
| "step": 8326 | |
| }, | |
| { | |
| "epoch": 37.10666666666667, | |
| "grad_norm": 6.339777946472168, | |
| "learning_rate": 7.24731831645175e-05, | |
| "loss": 2.167, | |
| "step": 8349 | |
| }, | |
| { | |
| "epoch": 37.208888888888886, | |
| "grad_norm": 7.560238361358643, | |
| "learning_rate": 7.232518685006485e-05, | |
| "loss": 2.1952, | |
| "step": 8372 | |
| }, | |
| { | |
| "epoch": 37.31111111111111, | |
| "grad_norm": 6.586178779602051, | |
| "learning_rate": 7.21769458313024e-05, | |
| "loss": 2.1791, | |
| "step": 8395 | |
| }, | |
| { | |
| "epoch": 37.413333333333334, | |
| "grad_norm": 7.019660949707031, | |
| "learning_rate": 7.20284617330862e-05, | |
| "loss": 2.1754, | |
| "step": 8418 | |
| }, | |
| { | |
| "epoch": 37.51555555555556, | |
| "grad_norm": 7.03871488571167, | |
| "learning_rate": 7.187973618293678e-05, | |
| "loss": 2.1585, | |
| "step": 8441 | |
| }, | |
| { | |
| "epoch": 37.617777777777775, | |
| "grad_norm": 6.066256046295166, | |
| "learning_rate": 7.173077081102114e-05, | |
| "loss": 2.1424, | |
| "step": 8464 | |
| }, | |
| { | |
| "epoch": 37.72, | |
| "grad_norm": 6.991265773773193, | |
| "learning_rate": 7.158156725013493e-05, | |
| "loss": 2.1577, | |
| "step": 8487 | |
| }, | |
| { | |
| "epoch": 37.82222222222222, | |
| "grad_norm": 8.248811721801758, | |
| "learning_rate": 7.14321271356846e-05, | |
| "loss": 2.1603, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 37.92444444444445, | |
| "grad_norm": 8.15676212310791, | |
| "learning_rate": 7.128245210566947e-05, | |
| "loss": 2.1695, | |
| "step": 8533 | |
| }, | |
| { | |
| "epoch": 38.026666666666664, | |
| "grad_norm": 7.107559680938721, | |
| "learning_rate": 7.113254380066367e-05, | |
| "loss": 2.1488, | |
| "step": 8556 | |
| }, | |
| { | |
| "epoch": 38.12888888888889, | |
| "grad_norm": 8.755867004394531, | |
| "learning_rate": 7.098240386379831e-05, | |
| "loss": 2.1009, | |
| "step": 8579 | |
| }, | |
| { | |
| "epoch": 38.23111111111111, | |
| "grad_norm": 7.037129878997803, | |
| "learning_rate": 7.083203394074334e-05, | |
| "loss": 2.0954, | |
| "step": 8602 | |
| }, | |
| { | |
| "epoch": 38.333333333333336, | |
| "grad_norm": 6.437880039215088, | |
| "learning_rate": 7.068143567968957e-05, | |
| "loss": 2.085, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 38.43555555555555, | |
| "grad_norm": 10.530925750732422, | |
| "learning_rate": 7.053061073133067e-05, | |
| "loss": 2.1242, | |
| "step": 8648 | |
| }, | |
| { | |
| "epoch": 38.53777777777778, | |
| "grad_norm": 7.10654878616333, | |
| "learning_rate": 7.037956074884493e-05, | |
| "loss": 2.1354, | |
| "step": 8671 | |
| }, | |
| { | |
| "epoch": 38.64, | |
| "grad_norm": 6.740297794342041, | |
| "learning_rate": 7.022828738787724e-05, | |
| "loss": 2.1365, | |
| "step": 8694 | |
| }, | |
| { | |
| "epoch": 38.742222222222225, | |
| "grad_norm": 7.16520357131958, | |
| "learning_rate": 7.007679230652095e-05, | |
| "loss": 2.1163, | |
| "step": 8717 | |
| }, | |
| { | |
| "epoch": 38.84444444444444, | |
| "grad_norm": 7.305176258087158, | |
| "learning_rate": 6.992507716529965e-05, | |
| "loss": 2.1429, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 38.946666666666665, | |
| "grad_norm": 5.924234390258789, | |
| "learning_rate": 6.977314362714898e-05, | |
| "loss": 2.1132, | |
| "step": 8763 | |
| }, | |
| { | |
| "epoch": 39.04888888888889, | |
| "grad_norm": 8.262660026550293, | |
| "learning_rate": 6.962099335739837e-05, | |
| "loss": 2.0614, | |
| "step": 8786 | |
| }, | |
| { | |
| "epoch": 39.15111111111111, | |
| "grad_norm": 7.352762699127197, | |
| "learning_rate": 6.946862802375292e-05, | |
| "loss": 2.0194, | |
| "step": 8809 | |
| }, | |
| { | |
| "epoch": 39.25333333333333, | |
| "grad_norm": 6.5161824226379395, | |
| "learning_rate": 6.931604929627495e-05, | |
| "loss": 2.0356, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 39.355555555555554, | |
| "grad_norm": 6.718994140625, | |
| "learning_rate": 6.916325884736576e-05, | |
| "loss": 2.0442, | |
| "step": 8855 | |
| }, | |
| { | |
| "epoch": 39.45777777777778, | |
| "grad_norm": 6.267631530761719, | |
| "learning_rate": 6.901025835174739e-05, | |
| "loss": 2.0456, | |
| "step": 8878 | |
| }, | |
| { | |
| "epoch": 39.56, | |
| "grad_norm": 6.105040550231934, | |
| "learning_rate": 6.885704948644411e-05, | |
| "loss": 2.0319, | |
| "step": 8901 | |
| }, | |
| { | |
| "epoch": 39.66222222222222, | |
| "grad_norm": 6.807146072387695, | |
| "learning_rate": 6.870363393076413e-05, | |
| "loss": 2.051, | |
| "step": 8924 | |
| }, | |
| { | |
| "epoch": 39.76444444444444, | |
| "grad_norm": 6.0141987800598145, | |
| "learning_rate": 6.855001336628118e-05, | |
| "loss": 2.0376, | |
| "step": 8947 | |
| }, | |
| { | |
| "epoch": 39.86666666666667, | |
| "grad_norm": 7.84182596206665, | |
| "learning_rate": 6.839618947681609e-05, | |
| "loss": 2.0596, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 39.96888888888889, | |
| "grad_norm": 8.566624641418457, | |
| "learning_rate": 6.824216394841825e-05, | |
| "loss": 2.0607, | |
| "step": 8993 | |
| }, | |
| { | |
| "epoch": 40.07111111111111, | |
| "grad_norm": 6.4133992195129395, | |
| "learning_rate": 6.808793846934729e-05, | |
| "loss": 1.9994, | |
| "step": 9016 | |
| }, | |
| { | |
| "epoch": 40.17333333333333, | |
| "grad_norm": 10.160492897033691, | |
| "learning_rate": 6.79335147300544e-05, | |
| "loss": 1.9999, | |
| "step": 9039 | |
| }, | |
| { | |
| "epoch": 40.275555555555556, | |
| "grad_norm": 6.391870021820068, | |
| "learning_rate": 6.777889442316394e-05, | |
| "loss": 1.9972, | |
| "step": 9062 | |
| }, | |
| { | |
| "epoch": 40.37777777777778, | |
| "grad_norm": 9.107426643371582, | |
| "learning_rate": 6.762407924345479e-05, | |
| "loss": 1.9891, | |
| "step": 9085 | |
| }, | |
| { | |
| "epoch": 40.48, | |
| "grad_norm": 6.959272861480713, | |
| "learning_rate": 6.746907088784182e-05, | |
| "loss": 1.9765, | |
| "step": 9108 | |
| }, | |
| { | |
| "epoch": 40.58222222222222, | |
| "grad_norm": 6.614034175872803, | |
| "learning_rate": 6.73138710553573e-05, | |
| "loss": 1.993, | |
| "step": 9131 | |
| }, | |
| { | |
| "epoch": 40.684444444444445, | |
| "grad_norm": 7.331613063812256, | |
| "learning_rate": 6.715848144713227e-05, | |
| "loss": 1.9826, | |
| "step": 9154 | |
| }, | |
| { | |
| "epoch": 40.78666666666667, | |
| "grad_norm": 8.619832992553711, | |
| "learning_rate": 6.700290376637782e-05, | |
| "loss": 2.0247, | |
| "step": 9177 | |
| }, | |
| { | |
| "epoch": 40.888888888888886, | |
| "grad_norm": 7.282753944396973, | |
| "learning_rate": 6.684713971836656e-05, | |
| "loss": 2.0123, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 40.99111111111111, | |
| "grad_norm": 7.198232173919678, | |
| "learning_rate": 6.669119101041383e-05, | |
| "loss": 2.0095, | |
| "step": 9223 | |
| }, | |
| { | |
| "epoch": 41.093333333333334, | |
| "grad_norm": 6.148073673248291, | |
| "learning_rate": 6.6535059351859e-05, | |
| "loss": 1.9284, | |
| "step": 9246 | |
| }, | |
| { | |
| "epoch": 41.19555555555556, | |
| "grad_norm": 7.000942230224609, | |
| "learning_rate": 6.637874645404673e-05, | |
| "loss": 1.9308, | |
| "step": 9269 | |
| }, | |
| { | |
| "epoch": 41.297777777777775, | |
| "grad_norm": 9.497756004333496, | |
| "learning_rate": 6.622225403030828e-05, | |
| "loss": 1.9316, | |
| "step": 9292 | |
| }, | |
| { | |
| "epoch": 41.4, | |
| "grad_norm": 6.189666748046875, | |
| "learning_rate": 6.606558379594262e-05, | |
| "loss": 1.9304, | |
| "step": 9315 | |
| }, | |
| { | |
| "epoch": 41.50222222222222, | |
| "grad_norm": 6.823606014251709, | |
| "learning_rate": 6.590873746819772e-05, | |
| "loss": 1.9582, | |
| "step": 9338 | |
| }, | |
| { | |
| "epoch": 41.60444444444445, | |
| "grad_norm": 6.261486530303955, | |
| "learning_rate": 6.575171676625169e-05, | |
| "loss": 1.9322, | |
| "step": 9361 | |
| }, | |
| { | |
| "epoch": 41.70666666666666, | |
| "grad_norm": 6.920318603515625, | |
| "learning_rate": 6.559452341119389e-05, | |
| "loss": 1.9533, | |
| "step": 9384 | |
| }, | |
| { | |
| "epoch": 41.80888888888889, | |
| "grad_norm": 7.246551513671875, | |
| "learning_rate": 6.543715912600621e-05, | |
| "loss": 1.9548, | |
| "step": 9407 | |
| }, | |
| { | |
| "epoch": 41.91111111111111, | |
| "grad_norm": 6.377082824707031, | |
| "learning_rate": 6.527962563554402e-05, | |
| "loss": 1.9709, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 42.013333333333335, | |
| "grad_norm": 7.362649440765381, | |
| "learning_rate": 6.512192466651735e-05, | |
| "loss": 1.9402, | |
| "step": 9453 | |
| }, | |
| { | |
| "epoch": 42.11555555555555, | |
| "grad_norm": 9.08193588256836, | |
| "learning_rate": 6.496405794747193e-05, | |
| "loss": 1.8674, | |
| "step": 9476 | |
| }, | |
| { | |
| "epoch": 42.217777777777776, | |
| "grad_norm": 6.658238410949707, | |
| "learning_rate": 6.480602720877029e-05, | |
| "loss": 1.8556, | |
| "step": 9499 | |
| }, | |
| { | |
| "epoch": 42.32, | |
| "grad_norm": 6.951099395751953, | |
| "learning_rate": 6.464783418257277e-05, | |
| "loss": 1.8759, | |
| "step": 9522 | |
| }, | |
| { | |
| "epoch": 42.422222222222224, | |
| "grad_norm": 8.758234977722168, | |
| "learning_rate": 6.448948060281847e-05, | |
| "loss": 1.8712, | |
| "step": 9545 | |
| }, | |
| { | |
| "epoch": 42.52444444444444, | |
| "grad_norm": 6.225131988525391, | |
| "learning_rate": 6.433096820520639e-05, | |
| "loss": 1.8857, | |
| "step": 9568 | |
| }, | |
| { | |
| "epoch": 42.626666666666665, | |
| "grad_norm": 7.351943492889404, | |
| "learning_rate": 6.417229872717624e-05, | |
| "loss": 1.8809, | |
| "step": 9591 | |
| }, | |
| { | |
| "epoch": 42.72888888888889, | |
| "grad_norm": 7.482339859008789, | |
| "learning_rate": 6.401347390788952e-05, | |
| "loss": 1.8694, | |
| "step": 9614 | |
| }, | |
| { | |
| "epoch": 42.83111111111111, | |
| "grad_norm": 6.971664905548096, | |
| "learning_rate": 6.385449548821037e-05, | |
| "loss": 1.8744, | |
| "step": 9637 | |
| }, | |
| { | |
| "epoch": 42.93333333333333, | |
| "grad_norm": 6.296336650848389, | |
| "learning_rate": 6.36953652106866e-05, | |
| "loss": 1.8966, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 43.035555555555554, | |
| "grad_norm": 6.986079216003418, | |
| "learning_rate": 6.353608481953042e-05, | |
| "loss": 1.8555, | |
| "step": 9683 | |
| }, | |
| { | |
| "epoch": 43.13777777777778, | |
| "grad_norm": 5.542973041534424, | |
| "learning_rate": 6.337665606059953e-05, | |
| "loss": 1.8185, | |
| "step": 9706 | |
| }, | |
| { | |
| "epoch": 43.24, | |
| "grad_norm": 7.133216381072998, | |
| "learning_rate": 6.321708068137779e-05, | |
| "loss": 1.8241, | |
| "step": 9729 | |
| }, | |
| { | |
| "epoch": 43.34222222222222, | |
| "grad_norm": 6.318929672241211, | |
| "learning_rate": 6.305736043095619e-05, | |
| "loss": 1.8372, | |
| "step": 9752 | |
| }, | |
| { | |
| "epoch": 43.44444444444444, | |
| "grad_norm": 6.268241882324219, | |
| "learning_rate": 6.289749706001365e-05, | |
| "loss": 1.8602, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 43.54666666666667, | |
| "grad_norm": 5.881213665008545, | |
| "learning_rate": 6.273749232079778e-05, | |
| "loss": 1.8439, | |
| "step": 9798 | |
| }, | |
| { | |
| "epoch": 43.64888888888889, | |
| "grad_norm": 6.6124186515808105, | |
| "learning_rate": 6.257734796710575e-05, | |
| "loss": 1.8428, | |
| "step": 9821 | |
| }, | |
| { | |
| "epoch": 43.75111111111111, | |
| "grad_norm": 7.996447563171387, | |
| "learning_rate": 6.241706575426504e-05, | |
| "loss": 1.8354, | |
| "step": 9844 | |
| }, | |
| { | |
| "epoch": 43.85333333333333, | |
| "grad_norm": 7.1598639488220215, | |
| "learning_rate": 6.225664743911414e-05, | |
| "loss": 1.8185, | |
| "step": 9867 | |
| }, | |
| { | |
| "epoch": 43.955555555555556, | |
| "grad_norm": 7.8854265213012695, | |
| "learning_rate": 6.209609477998338e-05, | |
| "loss": 1.832, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 44.05777777777778, | |
| "grad_norm": 8.291993141174316, | |
| "learning_rate": 6.193540953667564e-05, | |
| "loss": 1.7871, | |
| "step": 9913 | |
| }, | |
| { | |
| "epoch": 44.16, | |
| "grad_norm": 8.600836753845215, | |
| "learning_rate": 6.177459347044703e-05, | |
| "loss": 1.7882, | |
| "step": 9936 | |
| }, | |
| { | |
| "epoch": 44.26222222222222, | |
| "grad_norm": 8.065147399902344, | |
| "learning_rate": 6.161364834398755e-05, | |
| "loss": 1.7799, | |
| "step": 9959 | |
| }, | |
| { | |
| "epoch": 44.364444444444445, | |
| "grad_norm": 8.459796905517578, | |
| "learning_rate": 6.145257592140188e-05, | |
| "loss": 1.763, | |
| "step": 9982 | |
| }, | |
| { | |
| "epoch": 44.46666666666667, | |
| "grad_norm": 6.006131649017334, | |
| "learning_rate": 6.129137796818997e-05, | |
| "loss": 1.7885, | |
| "step": 10005 | |
| }, | |
| { | |
| "epoch": 44.568888888888885, | |
| "grad_norm": 8.034002304077148, | |
| "learning_rate": 6.113005625122767e-05, | |
| "loss": 1.8008, | |
| "step": 10028 | |
| }, | |
| { | |
| "epoch": 44.67111111111111, | |
| "grad_norm": 6.57339334487915, | |
| "learning_rate": 6.09686125387474e-05, | |
| "loss": 1.786, | |
| "step": 10051 | |
| }, | |
| { | |
| "epoch": 44.77333333333333, | |
| "grad_norm": 7.233739376068115, | |
| "learning_rate": 6.080704860031879e-05, | |
| "loss": 1.7973, | |
| "step": 10074 | |
| }, | |
| { | |
| "epoch": 44.87555555555556, | |
| "grad_norm": 7.365921497344971, | |
| "learning_rate": 6.0645366206829244e-05, | |
| "loss": 1.8094, | |
| "step": 10097 | |
| }, | |
| { | |
| "epoch": 44.977777777777774, | |
| "grad_norm": 7.772608280181885, | |
| "learning_rate": 6.048356713046452e-05, | |
| "loss": 1.7963, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 45.08, | |
| "grad_norm": 6.320626258850098, | |
| "learning_rate": 6.032165314468935e-05, | |
| "loss": 1.7384, | |
| "step": 10143 | |
| }, | |
| { | |
| "epoch": 45.18222222222222, | |
| "grad_norm": 6.214219093322754, | |
| "learning_rate": 6.015962602422796e-05, | |
| "loss": 1.7253, | |
| "step": 10166 | |
| }, | |
| { | |
| "epoch": 45.284444444444446, | |
| "grad_norm": 6.484301567077637, | |
| "learning_rate": 5.999748754504465e-05, | |
| "loss": 1.7361, | |
| "step": 10189 | |
| }, | |
| { | |
| "epoch": 45.38666666666666, | |
| "grad_norm": 8.989522933959961, | |
| "learning_rate": 5.9835239484324304e-05, | |
| "loss": 1.7443, | |
| "step": 10212 | |
| }, | |
| { | |
| "epoch": 45.48888888888889, | |
| "grad_norm": 10.29185676574707, | |
| "learning_rate": 5.967288362045291e-05, | |
| "loss": 1.7423, | |
| "step": 10235 | |
| }, | |
| { | |
| "epoch": 45.59111111111111, | |
| "grad_norm": 7.059528350830078, | |
| "learning_rate": 5.951042173299811e-05, | |
| "loss": 1.7292, | |
| "step": 10258 | |
| }, | |
| { | |
| "epoch": 45.693333333333335, | |
| "grad_norm": 6.192359447479248, | |
| "learning_rate": 5.9347855602689616e-05, | |
| "loss": 1.7204, | |
| "step": 10281 | |
| }, | |
| { | |
| "epoch": 45.79555555555555, | |
| "grad_norm": 6.398216247558594, | |
| "learning_rate": 5.918518701139978e-05, | |
| "loss": 1.7395, | |
| "step": 10304 | |
| }, | |
| { | |
| "epoch": 45.897777777777776, | |
| "grad_norm": 6.21365213394165, | |
| "learning_rate": 5.902241774212398e-05, | |
| "loss": 1.7343, | |
| "step": 10327 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 6.119551658630371, | |
| "learning_rate": 5.885954957896115e-05, | |
| "loss": 1.7463, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 46.102222222222224, | |
| "grad_norm": 5.506466865539551, | |
| "learning_rate": 5.8696584307094146e-05, | |
| "loss": 1.657, | |
| "step": 10373 | |
| }, | |
| { | |
| "epoch": 46.20444444444445, | |
| "grad_norm": 6.575307369232178, | |
| "learning_rate": 5.853352371277029e-05, | |
| "loss": 1.6622, | |
| "step": 10396 | |
| }, | |
| { | |
| "epoch": 46.306666666666665, | |
| "grad_norm": 6.451313018798828, | |
| "learning_rate": 5.8370369583281634e-05, | |
| "loss": 1.6861, | |
| "step": 10419 | |
| }, | |
| { | |
| "epoch": 46.40888888888889, | |
| "grad_norm": 7.1156816482543945, | |
| "learning_rate": 5.820712370694558e-05, | |
| "loss": 1.6859, | |
| "step": 10442 | |
| }, | |
| { | |
| "epoch": 46.51111111111111, | |
| "grad_norm": 6.124991416931152, | |
| "learning_rate": 5.8043787873085044e-05, | |
| "loss": 1.6763, | |
| "step": 10465 | |
| }, | |
| { | |
| "epoch": 46.61333333333333, | |
| "grad_norm": 8.477898597717285, | |
| "learning_rate": 5.7880363872009016e-05, | |
| "loss": 1.6952, | |
| "step": 10488 | |
| }, | |
| { | |
| "epoch": 46.715555555555554, | |
| "grad_norm": 7.237541198730469, | |
| "learning_rate": 5.771685349499288e-05, | |
| "loss": 1.676, | |
| "step": 10511 | |
| }, | |
| { | |
| "epoch": 46.81777777777778, | |
| "grad_norm": 5.890578269958496, | |
| "learning_rate": 5.7553258534258756e-05, | |
| "loss": 1.6964, | |
| "step": 10534 | |
| }, | |
| { | |
| "epoch": 46.92, | |
| "grad_norm": 6.47843074798584, | |
| "learning_rate": 5.7389580782955896e-05, | |
| "loss": 1.7098, | |
| "step": 10557 | |
| }, | |
| { | |
| "epoch": 47.022222222222226, | |
| "grad_norm": 9.489853858947754, | |
| "learning_rate": 5.722582203514099e-05, | |
| "loss": 1.6894, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 47.12444444444444, | |
| "grad_norm": 5.722830295562744, | |
| "learning_rate": 5.7061984085758555e-05, | |
| "loss": 1.6463, | |
| "step": 10603 | |
| }, | |
| { | |
| "epoch": 47.22666666666667, | |
| "grad_norm": 5.548519134521484, | |
| "learning_rate": 5.689806873062122e-05, | |
| "loss": 1.6358, | |
| "step": 10626 | |
| }, | |
| { | |
| "epoch": 47.32888888888889, | |
| "grad_norm": 5.543103218078613, | |
| "learning_rate": 5.6734077766390023e-05, | |
| "loss": 1.6249, | |
| "step": 10649 | |
| }, | |
| { | |
| "epoch": 47.431111111111115, | |
| "grad_norm": 7.334754467010498, | |
| "learning_rate": 5.6570012990554774e-05, | |
| "loss": 1.6144, | |
| "step": 10672 | |
| }, | |
| { | |
| "epoch": 47.53333333333333, | |
| "grad_norm": 6.74175500869751, | |
| "learning_rate": 5.6405876201414334e-05, | |
| "loss": 1.6413, | |
| "step": 10695 | |
| }, | |
| { | |
| "epoch": 47.635555555555555, | |
| "grad_norm": 8.000964164733887, | |
| "learning_rate": 5.624166919805686e-05, | |
| "loss": 1.6583, | |
| "step": 10718 | |
| }, | |
| { | |
| "epoch": 47.73777777777778, | |
| "grad_norm": 6.7785797119140625, | |
| "learning_rate": 5.607739378034015e-05, | |
| "loss": 1.6346, | |
| "step": 10741 | |
| }, | |
| { | |
| "epoch": 47.84, | |
| "grad_norm": 8.0484619140625, | |
| "learning_rate": 5.591305174887185e-05, | |
| "loss": 1.6615, | |
| "step": 10764 | |
| }, | |
| { | |
| "epoch": 47.94222222222222, | |
| "grad_norm": 6.589325428009033, | |
| "learning_rate": 5.574864490498982e-05, | |
| "loss": 1.6556, | |
| "step": 10787 | |
| }, | |
| { | |
| "epoch": 48.044444444444444, | |
| "grad_norm": 5.7148942947387695, | |
| "learning_rate": 5.558417505074226e-05, | |
| "loss": 1.6129, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 48.14666666666667, | |
| "grad_norm": 6.063688278198242, | |
| "learning_rate": 5.541964398886805e-05, | |
| "loss": 1.5707, | |
| "step": 10833 | |
| }, | |
| { | |
| "epoch": 48.24888888888889, | |
| "grad_norm": 7.891332626342773, | |
| "learning_rate": 5.525505352277695e-05, | |
| "loss": 1.5966, | |
| "step": 10856 | |
| }, | |
| { | |
| "epoch": 48.35111111111111, | |
| "grad_norm": 6.462911605834961, | |
| "learning_rate": 5.509040545652984e-05, | |
| "loss": 1.5979, | |
| "step": 10879 | |
| }, | |
| { | |
| "epoch": 48.45333333333333, | |
| "grad_norm": 6.627693176269531, | |
| "learning_rate": 5.492570159481897e-05, | |
| "loss": 1.5835, | |
| "step": 10902 | |
| }, | |
| { | |
| "epoch": 48.55555555555556, | |
| "grad_norm": 7.016481399536133, | |
| "learning_rate": 5.4760943742948126e-05, | |
| "loss": 1.6114, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 48.65777777777778, | |
| "grad_norm": 6.203521251678467, | |
| "learning_rate": 5.4596133706812925e-05, | |
| "loss": 1.6261, | |
| "step": 10948 | |
| }, | |
| { | |
| "epoch": 48.76, | |
| "grad_norm": 8.625542640686035, | |
| "learning_rate": 5.443127329288092e-05, | |
| "loss": 1.6152, | |
| "step": 10971 | |
| }, | |
| { | |
| "epoch": 48.86222222222222, | |
| "grad_norm": 8.934986114501953, | |
| "learning_rate": 5.426636430817189e-05, | |
| "loss": 1.6155, | |
| "step": 10994 | |
| }, | |
| { | |
| "epoch": 48.964444444444446, | |
| "grad_norm": 6.330492973327637, | |
| "learning_rate": 5.4101408560237964e-05, | |
| "loss": 1.624, | |
| "step": 11017 | |
| }, | |
| { | |
| "epoch": 49.06666666666667, | |
| "grad_norm": 7.745333671569824, | |
| "learning_rate": 5.393640785714386e-05, | |
| "loss": 1.5832, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 49.16888888888889, | |
| "grad_norm": 7.9969682693481445, | |
| "learning_rate": 5.377136400744701e-05, | |
| "loss": 1.5664, | |
| "step": 11063 | |
| }, | |
| { | |
| "epoch": 49.27111111111111, | |
| "grad_norm": 6.262273788452148, | |
| "learning_rate": 5.3606278820177824e-05, | |
| "loss": 1.5464, | |
| "step": 11086 | |
| }, | |
| { | |
| "epoch": 49.373333333333335, | |
| "grad_norm": 6.109494686126709, | |
| "learning_rate": 5.344115410481977e-05, | |
| "loss": 1.5242, | |
| "step": 11109 | |
| }, | |
| { | |
| "epoch": 49.47555555555556, | |
| "grad_norm": 6.395167827606201, | |
| "learning_rate": 5.3275991671289594e-05, | |
| "loss": 1.5514, | |
| "step": 11132 | |
| }, | |
| { | |
| "epoch": 49.577777777777776, | |
| "grad_norm": 8.812541961669922, | |
| "learning_rate": 5.311079332991748e-05, | |
| "loss": 1.527, | |
| "step": 11155 | |
| }, | |
| { | |
| "epoch": 49.68, | |
| "grad_norm": 8.040874481201172, | |
| "learning_rate": 5.294556089142716e-05, | |
| "loss": 1.5469, | |
| "step": 11178 | |
| }, | |
| { | |
| "epoch": 49.782222222222224, | |
| "grad_norm": 6.935076713562012, | |
| "learning_rate": 5.278029616691613e-05, | |
| "loss": 1.566, | |
| "step": 11201 | |
| }, | |
| { | |
| "epoch": 49.88444444444445, | |
| "grad_norm": 7.0155181884765625, | |
| "learning_rate": 5.261500096783577e-05, | |
| "loss": 1.5642, | |
| "step": 11224 | |
| }, | |
| { | |
| "epoch": 49.986666666666665, | |
| "grad_norm": 8.399476051330566, | |
| "learning_rate": 5.2449677105971476e-05, | |
| "loss": 1.5664, | |
| "step": 11247 | |
| }, | |
| { | |
| "epoch": 50.08888888888889, | |
| "grad_norm": 6.229375839233398, | |
| "learning_rate": 5.22843263934228e-05, | |
| "loss": 1.5044, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 50.19111111111111, | |
| "grad_norm": 8.590860366821289, | |
| "learning_rate": 5.211895064258365e-05, | |
| "loss": 1.5104, | |
| "step": 11293 | |
| }, | |
| { | |
| "epoch": 50.29333333333334, | |
| "grad_norm": 6.563053607940674, | |
| "learning_rate": 5.195355166612234e-05, | |
| "loss": 1.5279, | |
| "step": 11316 | |
| }, | |
| { | |
| "epoch": 50.39555555555555, | |
| "grad_norm": 6.139184474945068, | |
| "learning_rate": 5.178813127696175e-05, | |
| "loss": 1.5323, | |
| "step": 11339 | |
| }, | |
| { | |
| "epoch": 50.49777777777778, | |
| "grad_norm": 6.862679958343506, | |
| "learning_rate": 5.162269128825949e-05, | |
| "loss": 1.526, | |
| "step": 11362 | |
| }, | |
| { | |
| "epoch": 50.6, | |
| "grad_norm": 7.023072719573975, | |
| "learning_rate": 5.1457233513387994e-05, | |
| "loss": 1.5244, | |
| "step": 11385 | |
| }, | |
| { | |
| "epoch": 50.702222222222225, | |
| "grad_norm": 6.219864368438721, | |
| "learning_rate": 5.1291759765914625e-05, | |
| "loss": 1.5333, | |
| "step": 11408 | |
| }, | |
| { | |
| "epoch": 50.80444444444444, | |
| "grad_norm": 6.453531265258789, | |
| "learning_rate": 5.112627185958184e-05, | |
| "loss": 1.5319, | |
| "step": 11431 | |
| }, | |
| { | |
| "epoch": 50.906666666666666, | |
| "grad_norm": 5.3879876136779785, | |
| "learning_rate": 5.096077160828728e-05, | |
| "loss": 1.5279, | |
| "step": 11454 | |
| }, | |
| { | |
| "epoch": 51.00888888888889, | |
| "grad_norm": 6.174513339996338, | |
| "learning_rate": 5.079526082606394e-05, | |
| "loss": 1.5157, | |
| "step": 11477 | |
| }, | |
| { | |
| "epoch": 51.111111111111114, | |
| "grad_norm": 8.612546920776367, | |
| "learning_rate": 5.062974132706016e-05, | |
| "loss": 1.4655, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 51.21333333333333, | |
| "grad_norm": 6.833427429199219, | |
| "learning_rate": 5.046421492551992e-05, | |
| "loss": 1.4723, | |
| "step": 11523 | |
| }, | |
| { | |
| "epoch": 51.315555555555555, | |
| "grad_norm": 6.863546371459961, | |
| "learning_rate": 5.029868343576276e-05, | |
| "loss": 1.4848, | |
| "step": 11546 | |
| }, | |
| { | |
| "epoch": 51.41777777777778, | |
| "grad_norm": 7.937037467956543, | |
| "learning_rate": 5.013314867216407e-05, | |
| "loss": 1.4613, | |
| "step": 11569 | |
| }, | |
| { | |
| "epoch": 51.52, | |
| "grad_norm": 6.1333699226379395, | |
| "learning_rate": 4.996761244913508e-05, | |
| "loss": 1.478, | |
| "step": 11592 | |
| }, | |
| { | |
| "epoch": 51.62222222222222, | |
| "grad_norm": 9.617277145385742, | |
| "learning_rate": 4.980207658110305e-05, | |
| "loss": 1.4705, | |
| "step": 11615 | |
| }, | |
| { | |
| "epoch": 51.724444444444444, | |
| "grad_norm": 6.086880207061768, | |
| "learning_rate": 4.963654288249134e-05, | |
| "loss": 1.4673, | |
| "step": 11638 | |
| }, | |
| { | |
| "epoch": 51.82666666666667, | |
| "grad_norm": 5.924047470092773, | |
| "learning_rate": 4.9471013167699476e-05, | |
| "loss": 1.4855, | |
| "step": 11661 | |
| }, | |
| { | |
| "epoch": 51.92888888888889, | |
| "grad_norm": 5.790915489196777, | |
| "learning_rate": 4.930548925108342e-05, | |
| "loss": 1.4879, | |
| "step": 11684 | |
| }, | |
| { | |
| "epoch": 52.03111111111111, | |
| "grad_norm": 10.055533409118652, | |
| "learning_rate": 4.913997294693547e-05, | |
| "loss": 1.4776, | |
| "step": 11707 | |
| }, | |
| { | |
| "epoch": 52.13333333333333, | |
| "grad_norm": 5.994448661804199, | |
| "learning_rate": 4.8974466069464586e-05, | |
| "loss": 1.4281, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 52.23555555555556, | |
| "grad_norm": 6.34792947769165, | |
| "learning_rate": 4.880897043277632e-05, | |
| "loss": 1.4232, | |
| "step": 11753 | |
| }, | |
| { | |
| "epoch": 52.33777777777778, | |
| "grad_norm": 6.8388285636901855, | |
| "learning_rate": 4.8643487850853093e-05, | |
| "loss": 1.4415, | |
| "step": 11776 | |
| }, | |
| { | |
| "epoch": 52.44, | |
| "grad_norm": 6.194220542907715, | |
| "learning_rate": 4.847802013753414e-05, | |
| "loss": 1.4363, | |
| "step": 11799 | |
| }, | |
| { | |
| "epoch": 52.54222222222222, | |
| "grad_norm": 7.254870891571045, | |
| "learning_rate": 4.831256910649582e-05, | |
| "loss": 1.445, | |
| "step": 11822 | |
| }, | |
| { | |
| "epoch": 52.644444444444446, | |
| "grad_norm": 6.243785858154297, | |
| "learning_rate": 4.814713657123158e-05, | |
| "loss": 1.4399, | |
| "step": 11845 | |
| }, | |
| { | |
| "epoch": 52.74666666666667, | |
| "grad_norm": 7.5753607749938965, | |
| "learning_rate": 4.798172434503213e-05, | |
| "loss": 1.4521, | |
| "step": 11868 | |
| }, | |
| { | |
| "epoch": 52.84888888888889, | |
| "grad_norm": 6.7162861824035645, | |
| "learning_rate": 4.781633424096562e-05, | |
| "loss": 1.4446, | |
| "step": 11891 | |
| }, | |
| { | |
| "epoch": 52.95111111111111, | |
| "grad_norm": 8.405692100524902, | |
| "learning_rate": 4.765096807185767e-05, | |
| "loss": 1.4712, | |
| "step": 11914 | |
| }, | |
| { | |
| "epoch": 53.053333333333335, | |
| "grad_norm": 5.832555294036865, | |
| "learning_rate": 4.748562765027162e-05, | |
| "loss": 1.4306, | |
| "step": 11937 | |
| }, | |
| { | |
| "epoch": 53.15555555555556, | |
| "grad_norm": 5.443018436431885, | |
| "learning_rate": 4.7320314788488496e-05, | |
| "loss": 1.3977, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 53.257777777777775, | |
| "grad_norm": 6.506402969360352, | |
| "learning_rate": 4.715503129848733e-05, | |
| "loss": 1.419, | |
| "step": 11983 | |
| }, | |
| { | |
| "epoch": 53.36, | |
| "grad_norm": 7.063472747802734, | |
| "learning_rate": 4.69897789919252e-05, | |
| "loss": 1.4188, | |
| "step": 12006 | |
| }, | |
| { | |
| "epoch": 53.46222222222222, | |
| "grad_norm": 6.49618673324585, | |
| "learning_rate": 4.682455968011731e-05, | |
| "loss": 1.421, | |
| "step": 12029 | |
| }, | |
| { | |
| "epoch": 53.56444444444445, | |
| "grad_norm": 7.384080410003662, | |
| "learning_rate": 4.6659375174017316e-05, | |
| "loss": 1.4157, | |
| "step": 12052 | |
| }, | |
| { | |
| "epoch": 53.666666666666664, | |
| "grad_norm": 6.499640464782715, | |
| "learning_rate": 4.6494227284197294e-05, | |
| "loss": 1.3914, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 53.76888888888889, | |
| "grad_norm": 8.480474472045898, | |
| "learning_rate": 4.632911782082804e-05, | |
| "loss": 1.387, | |
| "step": 12098 | |
| }, | |
| { | |
| "epoch": 53.87111111111111, | |
| "grad_norm": 7.255825519561768, | |
| "learning_rate": 4.616404859365907e-05, | |
| "loss": 1.4147, | |
| "step": 12121 | |
| }, | |
| { | |
| "epoch": 53.973333333333336, | |
| "grad_norm": 5.0700249671936035, | |
| "learning_rate": 4.599902141199897e-05, | |
| "loss": 1.389, | |
| "step": 12144 | |
| }, | |
| { | |
| "epoch": 54.07555555555555, | |
| "grad_norm": 5.912162780761719, | |
| "learning_rate": 4.583403808469542e-05, | |
| "loss": 1.3623, | |
| "step": 12167 | |
| }, | |
| { | |
| "epoch": 54.17777777777778, | |
| "grad_norm": 5.70848274230957, | |
| "learning_rate": 4.566910042011539e-05, | |
| "loss": 1.3513, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 54.28, | |
| "grad_norm": 8.14360523223877, | |
| "learning_rate": 4.550421022612542e-05, | |
| "loss": 1.3729, | |
| "step": 12213 | |
| }, | |
| { | |
| "epoch": 54.382222222222225, | |
| "grad_norm": 5.549880027770996, | |
| "learning_rate": 4.5339369310071654e-05, | |
| "loss": 1.3797, | |
| "step": 12236 | |
| }, | |
| { | |
| "epoch": 54.48444444444444, | |
| "grad_norm": 6.507516384124756, | |
| "learning_rate": 4.517457947876018e-05, | |
| "loss": 1.3824, | |
| "step": 12259 | |
| }, | |
| { | |
| "epoch": 54.586666666666666, | |
| "grad_norm": 6.413192272186279, | |
| "learning_rate": 4.500984253843707e-05, | |
| "loss": 1.3718, | |
| "step": 12282 | |
| }, | |
| { | |
| "epoch": 54.68888888888889, | |
| "grad_norm": 6.168595790863037, | |
| "learning_rate": 4.484516029476873e-05, | |
| "loss": 1.3726, | |
| "step": 12305 | |
| }, | |
| { | |
| "epoch": 54.791111111111114, | |
| "grad_norm": 6.176178932189941, | |
| "learning_rate": 4.4680534552821996e-05, | |
| "loss": 1.3776, | |
| "step": 12328 | |
| }, | |
| { | |
| "epoch": 54.89333333333333, | |
| "grad_norm": 6.18988561630249, | |
| "learning_rate": 4.45159671170444e-05, | |
| "loss": 1.3764, | |
| "step": 12351 | |
| }, | |
| { | |
| "epoch": 54.995555555555555, | |
| "grad_norm": 6.998044490814209, | |
| "learning_rate": 4.4351459791244435e-05, | |
| "loss": 1.375, | |
| "step": 12374 | |
| }, | |
| { | |
| "epoch": 55.09777777777778, | |
| "grad_norm": 6.069551467895508, | |
| "learning_rate": 4.418701437857166e-05, | |
| "loss": 1.3324, | |
| "step": 12397 | |
| }, | |
| { | |
| "epoch": 55.2, | |
| "grad_norm": 6.534727096557617, | |
| "learning_rate": 4.402263268149706e-05, | |
| "loss": 1.3301, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 55.30222222222222, | |
| "grad_norm": 6.363480567932129, | |
| "learning_rate": 4.385831650179322e-05, | |
| "loss": 1.3524, | |
| "step": 12443 | |
| }, | |
| { | |
| "epoch": 55.404444444444444, | |
| "grad_norm": 6.515593528747559, | |
| "learning_rate": 4.3694067640514614e-05, | |
| "loss": 1.3353, | |
| "step": 12466 | |
| }, | |
| { | |
| "epoch": 55.50666666666667, | |
| "grad_norm": 6.400863170623779, | |
| "learning_rate": 4.352988789797781e-05, | |
| "loss": 1.3292, | |
| "step": 12489 | |
| }, | |
| { | |
| "epoch": 55.60888888888889, | |
| "grad_norm": 6.897211074829102, | |
| "learning_rate": 4.336577907374181e-05, | |
| "loss": 1.3591, | |
| "step": 12512 | |
| }, | |
| { | |
| "epoch": 55.71111111111111, | |
| "grad_norm": 7.05909538269043, | |
| "learning_rate": 4.320174296658827e-05, | |
| "loss": 1.3636, | |
| "step": 12535 | |
| }, | |
| { | |
| "epoch": 55.81333333333333, | |
| "grad_norm": 5.776651859283447, | |
| "learning_rate": 4.303778137450178e-05, | |
| "loss": 1.3475, | |
| "step": 12558 | |
| }, | |
| { | |
| "epoch": 55.91555555555556, | |
| "grad_norm": 6.0230021476745605, | |
| "learning_rate": 4.287389609465022e-05, | |
| "loss": 1.3681, | |
| "step": 12581 | |
| }, | |
| { | |
| "epoch": 56.01777777777778, | |
| "grad_norm": 6.32971715927124, | |
| "learning_rate": 4.271008892336497e-05, | |
| "loss": 1.3458, | |
| "step": 12604 | |
| }, | |
| { | |
| "epoch": 56.12, | |
| "grad_norm": 8.626049041748047, | |
| "learning_rate": 4.2546361656121346e-05, | |
| "loss": 1.2829, | |
| "step": 12627 | |
| }, | |
| { | |
| "epoch": 56.22222222222222, | |
| "grad_norm": 6.015228748321533, | |
| "learning_rate": 4.238271608751874e-05, | |
| "loss": 1.2816, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 56.324444444444445, | |
| "grad_norm": 5.704399108886719, | |
| "learning_rate": 4.221915401126113e-05, | |
| "loss": 1.3026, | |
| "step": 12673 | |
| }, | |
| { | |
| "epoch": 56.42666666666667, | |
| "grad_norm": 5.911527156829834, | |
| "learning_rate": 4.205567722013733e-05, | |
| "loss": 1.2857, | |
| "step": 12696 | |
| }, | |
| { | |
| "epoch": 56.528888888888886, | |
| "grad_norm": 6.171534538269043, | |
| "learning_rate": 4.18922875060013e-05, | |
| "loss": 1.2873, | |
| "step": 12719 | |
| }, | |
| { | |
| "epoch": 56.63111111111111, | |
| "grad_norm": 7.097690105438232, | |
| "learning_rate": 4.1728986659752636e-05, | |
| "loss": 1.3012, | |
| "step": 12742 | |
| }, | |
| { | |
| "epoch": 56.733333333333334, | |
| "grad_norm": 5.469725608825684, | |
| "learning_rate": 4.156577647131679e-05, | |
| "loss": 1.2895, | |
| "step": 12765 | |
| }, | |
| { | |
| "epoch": 56.83555555555556, | |
| "grad_norm": 6.386800765991211, | |
| "learning_rate": 4.1402658729625596e-05, | |
| "loss": 1.3026, | |
| "step": 12788 | |
| }, | |
| { | |
| "epoch": 56.937777777777775, | |
| "grad_norm": 5.86681604385376, | |
| "learning_rate": 4.1239635222597494e-05, | |
| "loss": 1.3072, | |
| "step": 12811 | |
| }, | |
| { | |
| "epoch": 57.04, | |
| "grad_norm": 6.062530517578125, | |
| "learning_rate": 4.107670773711812e-05, | |
| "loss": 1.284, | |
| "step": 12834 | |
| }, | |
| { | |
| "epoch": 57.14222222222222, | |
| "grad_norm": 5.922295570373535, | |
| "learning_rate": 4.091387805902058e-05, | |
| "loss": 1.2621, | |
| "step": 12857 | |
| }, | |
| { | |
| "epoch": 57.24444444444445, | |
| "grad_norm": 5.438425064086914, | |
| "learning_rate": 4.075114797306589e-05, | |
| "loss": 1.264, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 57.346666666666664, | |
| "grad_norm": 7.964729309082031, | |
| "learning_rate": 4.058851926292353e-05, | |
| "loss": 1.2781, | |
| "step": 12903 | |
| }, | |
| { | |
| "epoch": 57.44888888888889, | |
| "grad_norm": 6.432003498077393, | |
| "learning_rate": 4.042599371115172e-05, | |
| "loss": 1.2787, | |
| "step": 12926 | |
| }, | |
| { | |
| "epoch": 57.55111111111111, | |
| "grad_norm": 5.485337257385254, | |
| "learning_rate": 4.026357309917806e-05, | |
| "loss": 1.2663, | |
| "step": 12949 | |
| }, | |
| { | |
| "epoch": 57.653333333333336, | |
| "grad_norm": 6.874802112579346, | |
| "learning_rate": 4.010125920727982e-05, | |
| "loss": 1.2733, | |
| "step": 12972 | |
| }, | |
| { | |
| "epoch": 57.75555555555555, | |
| "grad_norm": 5.767955303192139, | |
| "learning_rate": 3.993905381456462e-05, | |
| "loss": 1.2763, | |
| "step": 12995 | |
| }, | |
| { | |
| "epoch": 57.85777777777778, | |
| "grad_norm": 5.2443389892578125, | |
| "learning_rate": 3.977695869895073e-05, | |
| "loss": 1.273, | |
| "step": 13018 | |
| }, | |
| { | |
| "epoch": 57.96, | |
| "grad_norm": 7.763814926147461, | |
| "learning_rate": 3.961497563714774e-05, | |
| "loss": 1.2851, | |
| "step": 13041 | |
| }, | |
| { | |
| "epoch": 58.062222222222225, | |
| "grad_norm": 6.231062412261963, | |
| "learning_rate": 3.945310640463705e-05, | |
| "loss": 1.2581, | |
| "step": 13064 | |
| }, | |
| { | |
| "epoch": 58.16444444444444, | |
| "grad_norm": 5.801052093505859, | |
| "learning_rate": 3.9291352775652325e-05, | |
| "loss": 1.2376, | |
| "step": 13087 | |
| }, | |
| { | |
| "epoch": 58.266666666666666, | |
| "grad_norm": 8.022377967834473, | |
| "learning_rate": 3.9129716523160165e-05, | |
| "loss": 1.2403, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 58.36888888888889, | |
| "grad_norm": 6.449449062347412, | |
| "learning_rate": 3.8968199418840575e-05, | |
| "loss": 1.2353, | |
| "step": 13133 | |
| }, | |
| { | |
| "epoch": 58.471111111111114, | |
| "grad_norm": 5.934969902038574, | |
| "learning_rate": 3.880680323306765e-05, | |
| "loss": 1.2575, | |
| "step": 13156 | |
| }, | |
| { | |
| "epoch": 58.57333333333333, | |
| "grad_norm": 6.265482425689697, | |
| "learning_rate": 3.8645529734890014e-05, | |
| "loss": 1.247, | |
| "step": 13179 | |
| }, | |
| { | |
| "epoch": 58.675555555555555, | |
| "grad_norm": 5.975387096405029, | |
| "learning_rate": 3.8484380692011605e-05, | |
| "loss": 1.2634, | |
| "step": 13202 | |
| }, | |
| { | |
| "epoch": 58.77777777777778, | |
| "grad_norm": 6.401468753814697, | |
| "learning_rate": 3.83233578707722e-05, | |
| "loss": 1.244, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 58.88, | |
| "grad_norm": 5.331010341644287, | |
| "learning_rate": 3.816246303612802e-05, | |
| "loss": 1.2459, | |
| "step": 13248 | |
| }, | |
| { | |
| "epoch": 58.98222222222222, | |
| "grad_norm": 5.550204277038574, | |
| "learning_rate": 3.800169795163252e-05, | |
| "loss": 1.2541, | |
| "step": 13271 | |
| }, | |
| { | |
| "epoch": 59.08444444444444, | |
| "grad_norm": 5.241280555725098, | |
| "learning_rate": 3.7841064379416903e-05, | |
| "loss": 1.2155, | |
| "step": 13294 | |
| }, | |
| { | |
| "epoch": 59.18666666666667, | |
| "grad_norm": 6.312388896942139, | |
| "learning_rate": 3.768056408017094e-05, | |
| "loss": 1.2055, | |
| "step": 13317 | |
| }, | |
| { | |
| "epoch": 59.28888888888889, | |
| "grad_norm": 5.525976181030273, | |
| "learning_rate": 3.752019881312354e-05, | |
| "loss": 1.211, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 59.39111111111111, | |
| "grad_norm": 6.092748641967773, | |
| "learning_rate": 3.735997033602361e-05, | |
| "loss": 1.2133, | |
| "step": 13363 | |
| }, | |
| { | |
| "epoch": 59.49333333333333, | |
| "grad_norm": 5.471757888793945, | |
| "learning_rate": 3.719988040512067e-05, | |
| "loss": 1.2267, | |
| "step": 13386 | |
| }, | |
| { | |
| "epoch": 59.595555555555556, | |
| "grad_norm": 6.422407150268555, | |
| "learning_rate": 3.703993077514563e-05, | |
| "loss": 1.2223, | |
| "step": 13409 | |
| }, | |
| { | |
| "epoch": 59.69777777777778, | |
| "grad_norm": 5.488748073577881, | |
| "learning_rate": 3.6880123199291635e-05, | |
| "loss": 1.2217, | |
| "step": 13432 | |
| }, | |
| { | |
| "epoch": 59.8, | |
| "grad_norm": 5.826624393463135, | |
| "learning_rate": 3.672045942919474e-05, | |
| "loss": 1.2216, | |
| "step": 13455 | |
| }, | |
| { | |
| "epoch": 59.90222222222222, | |
| "grad_norm": 5.7313008308410645, | |
| "learning_rate": 3.656094121491479e-05, | |
| "loss": 1.2271, | |
| "step": 13478 | |
| }, | |
| { | |
| "epoch": 60.004444444444445, | |
| "grad_norm": 7.073070526123047, | |
| "learning_rate": 3.6401570304916166e-05, | |
| "loss": 1.222, | |
| "step": 13501 | |
| }, | |
| { | |
| "epoch": 60.10666666666667, | |
| "grad_norm": 5.049999713897705, | |
| "learning_rate": 3.624234844604869e-05, | |
| "loss": 1.1695, | |
| "step": 13524 | |
| }, | |
| { | |
| "epoch": 60.208888888888886, | |
| "grad_norm": 5.1560211181640625, | |
| "learning_rate": 3.6083277383528466e-05, | |
| "loss": 1.1792, | |
| "step": 13547 | |
| }, | |
| { | |
| "epoch": 60.31111111111111, | |
| "grad_norm": 5.553138256072998, | |
| "learning_rate": 3.592435886091867e-05, | |
| "loss": 1.1853, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 60.413333333333334, | |
| "grad_norm": 5.489965438842773, | |
| "learning_rate": 3.576559462011057e-05, | |
| "loss": 1.1918, | |
| "step": 13593 | |
| }, | |
| { | |
| "epoch": 60.51555555555556, | |
| "grad_norm": 6.636351108551025, | |
| "learning_rate": 3.5606986401304324e-05, | |
| "loss": 1.2002, | |
| "step": 13616 | |
| }, | |
| { | |
| "epoch": 60.617777777777775, | |
| "grad_norm": 8.49821662902832, | |
| "learning_rate": 3.544853594298997e-05, | |
| "loss": 1.2062, | |
| "step": 13639 | |
| }, | |
| { | |
| "epoch": 60.72, | |
| "grad_norm": 5.866752624511719, | |
| "learning_rate": 3.529024498192832e-05, | |
| "loss": 1.205, | |
| "step": 13662 | |
| }, | |
| { | |
| "epoch": 60.82222222222222, | |
| "grad_norm": 12.07309627532959, | |
| "learning_rate": 3.5132115253132005e-05, | |
| "loss": 1.2112, | |
| "step": 13685 | |
| }, | |
| { | |
| "epoch": 60.92444444444445, | |
| "grad_norm": 7.421104431152344, | |
| "learning_rate": 3.4974148489846315e-05, | |
| "loss": 1.2229, | |
| "step": 13708 | |
| }, | |
| { | |
| "epoch": 61.026666666666664, | |
| "grad_norm": 5.546532154083252, | |
| "learning_rate": 3.4816346423530385e-05, | |
| "loss": 1.1952, | |
| "step": 13731 | |
| }, | |
| { | |
| "epoch": 61.12888888888889, | |
| "grad_norm": 5.055679798126221, | |
| "learning_rate": 3.465871078383809e-05, | |
| "loss": 1.1628, | |
| "step": 13754 | |
| }, | |
| { | |
| "epoch": 61.23111111111111, | |
| "grad_norm": 6.14479923248291, | |
| "learning_rate": 3.4501243298599055e-05, | |
| "loss": 1.1767, | |
| "step": 13777 | |
| }, | |
| { | |
| "epoch": 61.333333333333336, | |
| "grad_norm": 5.632229328155518, | |
| "learning_rate": 3.434394569379988e-05, | |
| "loss": 1.179, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 61.43555555555555, | |
| "grad_norm": 5.1467671394348145, | |
| "learning_rate": 3.4186819693565046e-05, | |
| "loss": 1.1745, | |
| "step": 13823 | |
| }, | |
| { | |
| "epoch": 61.53777777777778, | |
| "grad_norm": 5.162554740905762, | |
| "learning_rate": 3.4029867020138155e-05, | |
| "loss": 1.1672, | |
| "step": 13846 | |
| }, | |
| { | |
| "epoch": 61.64, | |
| "grad_norm": 5.325419902801514, | |
| "learning_rate": 3.387308939386291e-05, | |
| "loss": 1.1793, | |
| "step": 13869 | |
| }, | |
| { | |
| "epoch": 61.742222222222225, | |
| "grad_norm": 5.7772626876831055, | |
| "learning_rate": 3.371648853316442e-05, | |
| "loss": 1.1706, | |
| "step": 13892 | |
| }, | |
| { | |
| "epoch": 61.84444444444444, | |
| "grad_norm": 7.251054763793945, | |
| "learning_rate": 3.356006615453025e-05, | |
| "loss": 1.1572, | |
| "step": 13915 | |
| }, | |
| { | |
| "epoch": 61.946666666666665, | |
| "grad_norm": 6.169683933258057, | |
| "learning_rate": 3.340382397249159e-05, | |
| "loss": 1.1553, | |
| "step": 13938 | |
| }, | |
| { | |
| "epoch": 62.04888888888889, | |
| "grad_norm": 6.773545742034912, | |
| "learning_rate": 3.324776369960461e-05, | |
| "loss": 1.1603, | |
| "step": 13961 | |
| }, | |
| { | |
| "epoch": 62.15111111111111, | |
| "grad_norm": 6.104127407073975, | |
| "learning_rate": 3.309188704643149e-05, | |
| "loss": 1.1209, | |
| "step": 13984 | |
| }, | |
| { | |
| "epoch": 62.25333333333333, | |
| "grad_norm": 5.433740615844727, | |
| "learning_rate": 3.2936195721521866e-05, | |
| "loss": 1.1373, | |
| "step": 14007 | |
| }, | |
| { | |
| "epoch": 62.355555555555554, | |
| "grad_norm": 5.472240924835205, | |
| "learning_rate": 3.2780691431393926e-05, | |
| "loss": 1.143, | |
| "step": 14030 | |
| }, | |
| { | |
| "epoch": 62.45777777777778, | |
| "grad_norm": 5.382284164428711, | |
| "learning_rate": 3.2625375880515854e-05, | |
| "loss": 1.1471, | |
| "step": 14053 | |
| }, | |
| { | |
| "epoch": 62.56, | |
| "grad_norm": 5.667013168334961, | |
| "learning_rate": 3.2470250771287036e-05, | |
| "loss": 1.1391, | |
| "step": 14076 | |
| }, | |
| { | |
| "epoch": 62.66222222222222, | |
| "grad_norm": 5.519725322723389, | |
| "learning_rate": 3.231531780401943e-05, | |
| "loss": 1.1335, | |
| "step": 14099 | |
| }, | |
| { | |
| "epoch": 62.76444444444444, | |
| "grad_norm": 5.530640125274658, | |
| "learning_rate": 3.2160578676919016e-05, | |
| "loss": 1.1386, | |
| "step": 14122 | |
| }, | |
| { | |
| "epoch": 62.86666666666667, | |
| "grad_norm": 6.683435440063477, | |
| "learning_rate": 3.200603508606703e-05, | |
| "loss": 1.1362, | |
| "step": 14145 | |
| }, | |
| { | |
| "epoch": 62.96888888888889, | |
| "grad_norm": 5.929420471191406, | |
| "learning_rate": 3.185168872540153e-05, | |
| "loss": 1.1455, | |
| "step": 14168 | |
| }, | |
| { | |
| "epoch": 63.07111111111111, | |
| "grad_norm": 6.305390357971191, | |
| "learning_rate": 3.169754128669866e-05, | |
| "loss": 1.1242, | |
| "step": 14191 | |
| }, | |
| { | |
| "epoch": 63.17333333333333, | |
| "grad_norm": 6.4048542976379395, | |
| "learning_rate": 3.154359445955429e-05, | |
| "loss": 1.1263, | |
| "step": 14214 | |
| }, | |
| { | |
| "epoch": 63.275555555555556, | |
| "grad_norm": 5.409482002258301, | |
| "learning_rate": 3.138984993136535e-05, | |
| "loss": 1.1052, | |
| "step": 14237 | |
| }, | |
| { | |
| "epoch": 63.37777777777778, | |
| "grad_norm": 5.47636079788208, | |
| "learning_rate": 3.12363093873114e-05, | |
| "loss": 1.1196, | |
| "step": 14260 | |
| }, | |
| { | |
| "epoch": 63.48, | |
| "grad_norm": 5.092154026031494, | |
| "learning_rate": 3.108297451033616e-05, | |
| "loss": 1.1193, | |
| "step": 14283 | |
| }, | |
| { | |
| "epoch": 63.58222222222222, | |
| "grad_norm": 5.453930377960205, | |
| "learning_rate": 3.092984698112904e-05, | |
| "loss": 1.1182, | |
| "step": 14306 | |
| }, | |
| { | |
| "epoch": 63.684444444444445, | |
| "grad_norm": 6.511165618896484, | |
| "learning_rate": 3.0776928478106754e-05, | |
| "loss": 1.1295, | |
| "step": 14329 | |
| }, | |
| { | |
| "epoch": 63.78666666666667, | |
| "grad_norm": 5.347112655639648, | |
| "learning_rate": 3.062422067739485e-05, | |
| "loss": 1.1239, | |
| "step": 14352 | |
| }, | |
| { | |
| "epoch": 63.888888888888886, | |
| "grad_norm": 5.500729084014893, | |
| "learning_rate": 3.0471725252809458e-05, | |
| "loss": 1.1227, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 63.99111111111111, | |
| "grad_norm": 5.913949489593506, | |
| "learning_rate": 3.0319443875838794e-05, | |
| "loss": 1.1306, | |
| "step": 14398 | |
| }, | |
| { | |
| "epoch": 64.09333333333333, | |
| "grad_norm": 5.112490177154541, | |
| "learning_rate": 3.0167378215624974e-05, | |
| "loss": 1.0993, | |
| "step": 14421 | |
| }, | |
| { | |
| "epoch": 64.19555555555556, | |
| "grad_norm": 5.541341304779053, | |
| "learning_rate": 3.0015529938945668e-05, | |
| "loss": 1.0976, | |
| "step": 14444 | |
| }, | |
| { | |
| "epoch": 64.29777777777778, | |
| "grad_norm": 5.937663555145264, | |
| "learning_rate": 2.9863900710195758e-05, | |
| "loss": 1.0953, | |
| "step": 14467 | |
| }, | |
| { | |
| "epoch": 64.4, | |
| "grad_norm": 5.4565558433532715, | |
| "learning_rate": 2.9712492191369244e-05, | |
| "loss": 1.0998, | |
| "step": 14490 | |
| }, | |
| { | |
| "epoch": 64.50222222222222, | |
| "grad_norm": 6.276011943817139, | |
| "learning_rate": 2.956130604204089e-05, | |
| "loss": 1.1113, | |
| "step": 14513 | |
| }, | |
| { | |
| "epoch": 64.60444444444444, | |
| "grad_norm": 5.444122791290283, | |
| "learning_rate": 2.9410343919348127e-05, | |
| "loss": 1.108, | |
| "step": 14536 | |
| }, | |
| { | |
| "epoch": 64.70666666666666, | |
| "grad_norm": 5.791774749755859, | |
| "learning_rate": 2.9259607477972794e-05, | |
| "loss": 1.1149, | |
| "step": 14559 | |
| }, | |
| { | |
| "epoch": 64.80888888888889, | |
| "grad_norm": 6.028242588043213, | |
| "learning_rate": 2.9109098370123132e-05, | |
| "loss": 1.1236, | |
| "step": 14582 | |
| }, | |
| { | |
| "epoch": 64.91111111111111, | |
| "grad_norm": 6.835079193115234, | |
| "learning_rate": 2.8958818245515533e-05, | |
| "loss": 1.1148, | |
| "step": 14605 | |
| }, | |
| { | |
| "epoch": 65.01333333333334, | |
| "grad_norm": 5.5959792137146, | |
| "learning_rate": 2.8808768751356564e-05, | |
| "loss": 1.1054, | |
| "step": 14628 | |
| }, | |
| { | |
| "epoch": 65.11555555555556, | |
| "grad_norm": 5.705920219421387, | |
| "learning_rate": 2.865895153232489e-05, | |
| "loss": 1.0824, | |
| "step": 14651 | |
| }, | |
| { | |
| "epoch": 65.21777777777778, | |
| "grad_norm": 4.9849934577941895, | |
| "learning_rate": 2.8509368230553157e-05, | |
| "loss": 1.077, | |
| "step": 14674 | |
| }, | |
| { | |
| "epoch": 65.32, | |
| "grad_norm": 5.702665328979492, | |
| "learning_rate": 2.8360020485610163e-05, | |
| "loss": 1.0514, | |
| "step": 14697 | |
| }, | |
| { | |
| "epoch": 65.42222222222222, | |
| "grad_norm": 5.4493207931518555, | |
| "learning_rate": 2.8210909934482678e-05, | |
| "loss": 1.0653, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 65.52444444444444, | |
| "grad_norm": 5.684943199157715, | |
| "learning_rate": 2.8062038211557728e-05, | |
| "loss": 1.0641, | |
| "step": 14743 | |
| }, | |
| { | |
| "epoch": 65.62666666666667, | |
| "grad_norm": 5.757254123687744, | |
| "learning_rate": 2.791340694860446e-05, | |
| "loss": 1.0754, | |
| "step": 14766 | |
| }, | |
| { | |
| "epoch": 65.72888888888889, | |
| "grad_norm": 5.588274955749512, | |
| "learning_rate": 2.776501777475644e-05, | |
| "loss": 1.0768, | |
| "step": 14789 | |
| }, | |
| { | |
| "epoch": 65.83111111111111, | |
| "grad_norm": 5.547431945800781, | |
| "learning_rate": 2.7616872316493708e-05, | |
| "loss": 1.078, | |
| "step": 14812 | |
| }, | |
| { | |
| "epoch": 65.93333333333334, | |
| "grad_norm": 5.201080322265625, | |
| "learning_rate": 2.7468972197624897e-05, | |
| "loss": 1.0824, | |
| "step": 14835 | |
| }, | |
| { | |
| "epoch": 66.03555555555556, | |
| "grad_norm": 6.8083271980285645, | |
| "learning_rate": 2.7321319039269576e-05, | |
| "loss": 1.07, | |
| "step": 14858 | |
| }, | |
| { | |
| "epoch": 66.13777777777777, | |
| "grad_norm": 6.262781620025635, | |
| "learning_rate": 2.7173914459840342e-05, | |
| "loss": 1.0395, | |
| "step": 14881 | |
| }, | |
| { | |
| "epoch": 66.24, | |
| "grad_norm": 5.109470844268799, | |
| "learning_rate": 2.7026760075025192e-05, | |
| "loss": 1.0467, | |
| "step": 14904 | |
| }, | |
| { | |
| "epoch": 66.34222222222222, | |
| "grad_norm": 5.397584915161133, | |
| "learning_rate": 2.6879857497769712e-05, | |
| "loss": 1.0531, | |
| "step": 14927 | |
| }, | |
| { | |
| "epoch": 66.44444444444444, | |
| "grad_norm": 5.602553844451904, | |
| "learning_rate": 2.6733208338259486e-05, | |
| "loss": 1.045, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 66.54666666666667, | |
| "grad_norm": 5.551428318023682, | |
| "learning_rate": 2.6586814203902422e-05, | |
| "loss": 1.042, | |
| "step": 14973 | |
| }, | |
| { | |
| "epoch": 66.64888888888889, | |
| "grad_norm": 5.80933952331543, | |
| "learning_rate": 2.6440676699311062e-05, | |
| "loss": 1.0555, | |
| "step": 14996 | |
| }, | |
| { | |
| "epoch": 66.75111111111111, | |
| "grad_norm": 5.058752536773682, | |
| "learning_rate": 2.6294797426285112e-05, | |
| "loss": 1.0507, | |
| "step": 15019 | |
| }, | |
| { | |
| "epoch": 66.85333333333334, | |
| "grad_norm": 7.067930221557617, | |
| "learning_rate": 2.6149177983793783e-05, | |
| "loss": 1.0599, | |
| "step": 15042 | |
| }, | |
| { | |
| "epoch": 66.95555555555555, | |
| "grad_norm": 5.901451587677002, | |
| "learning_rate": 2.6003819967958344e-05, | |
| "loss": 1.0527, | |
| "step": 15065 | |
| }, | |
| { | |
| "epoch": 67.05777777777777, | |
| "grad_norm": 5.727104663848877, | |
| "learning_rate": 2.5858724972034555e-05, | |
| "loss": 1.0395, | |
| "step": 15088 | |
| }, | |
| { | |
| "epoch": 67.16, | |
| "grad_norm": 7.644411563873291, | |
| "learning_rate": 2.5713894586395283e-05, | |
| "loss": 1.0326, | |
| "step": 15111 | |
| }, | |
| { | |
| "epoch": 67.26222222222222, | |
| "grad_norm": 4.788581848144531, | |
| "learning_rate": 2.5569330398512957e-05, | |
| "loss": 1.0388, | |
| "step": 15134 | |
| }, | |
| { | |
| "epoch": 67.36444444444444, | |
| "grad_norm": 4.921880722045898, | |
| "learning_rate": 2.5425033992942316e-05, | |
| "loss": 1.0413, | |
| "step": 15157 | |
| }, | |
| { | |
| "epoch": 67.46666666666667, | |
| "grad_norm": 5.7385735511779785, | |
| "learning_rate": 2.5281006951302934e-05, | |
| "loss": 1.0328, | |
| "step": 15180 | |
| }, | |
| { | |
| "epoch": 67.56888888888889, | |
| "grad_norm": 5.9198689460754395, | |
| "learning_rate": 2.5137250852261862e-05, | |
| "loss": 1.0416, | |
| "step": 15203 | |
| }, | |
| { | |
| "epoch": 67.67111111111112, | |
| "grad_norm": 5.01896858215332, | |
| "learning_rate": 2.499376727151646e-05, | |
| "loss": 1.0455, | |
| "step": 15226 | |
| }, | |
| { | |
| "epoch": 67.77333333333333, | |
| "grad_norm": 5.580973148345947, | |
| "learning_rate": 2.485055778177696e-05, | |
| "loss": 1.0487, | |
| "step": 15249 | |
| }, | |
| { | |
| "epoch": 67.87555555555555, | |
| "grad_norm": 4.777526378631592, | |
| "learning_rate": 2.470762395274938e-05, | |
| "loss": 1.0434, | |
| "step": 15272 | |
| }, | |
| { | |
| "epoch": 67.97777777777777, | |
| "grad_norm": 7.526794910430908, | |
| "learning_rate": 2.4564967351118175e-05, | |
| "loss": 1.0477, | |
| "step": 15295 | |
| }, | |
| { | |
| "epoch": 68.08, | |
| "grad_norm": 6.90614128112793, | |
| "learning_rate": 2.4422589540529185e-05, | |
| "loss": 1.0341, | |
| "step": 15318 | |
| }, | |
| { | |
| "epoch": 68.18222222222222, | |
| "grad_norm": 6.120336532592773, | |
| "learning_rate": 2.4280492081572455e-05, | |
| "loss": 1.0169, | |
| "step": 15341 | |
| }, | |
| { | |
| "epoch": 68.28444444444445, | |
| "grad_norm": 5.239770889282227, | |
| "learning_rate": 2.413867653176506e-05, | |
| "loss": 1.0155, | |
| "step": 15364 | |
| }, | |
| { | |
| "epoch": 68.38666666666667, | |
| "grad_norm": 5.342464923858643, | |
| "learning_rate": 2.3997144445534175e-05, | |
| "loss": 1.0343, | |
| "step": 15387 | |
| }, | |
| { | |
| "epoch": 68.4888888888889, | |
| "grad_norm": 6.170787811279297, | |
| "learning_rate": 2.3855897374199883e-05, | |
| "loss": 1.0101, | |
| "step": 15410 | |
| }, | |
| { | |
| "epoch": 68.5911111111111, | |
| "grad_norm": 7.313038349151611, | |
| "learning_rate": 2.371493686595831e-05, | |
| "loss": 1.0369, | |
| "step": 15433 | |
| }, | |
| { | |
| "epoch": 68.69333333333333, | |
| "grad_norm": 5.434996604919434, | |
| "learning_rate": 2.3574264465864527e-05, | |
| "loss": 1.0345, | |
| "step": 15456 | |
| }, | |
| { | |
| "epoch": 68.79555555555555, | |
| "grad_norm": 6.723358631134033, | |
| "learning_rate": 2.343388171581573e-05, | |
| "loss": 1.0309, | |
| "step": 15479 | |
| }, | |
| { | |
| "epoch": 68.89777777777778, | |
| "grad_norm": 5.317188262939453, | |
| "learning_rate": 2.3293790154534283e-05, | |
| "loss": 1.0314, | |
| "step": 15502 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "grad_norm": 6.149099349975586, | |
| "learning_rate": 2.315399131755081e-05, | |
| "loss": 1.0313, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 69.10222222222222, | |
| "grad_norm": 5.885578632354736, | |
| "learning_rate": 2.3014486737187475e-05, | |
| "loss": 1.0127, | |
| "step": 15548 | |
| }, | |
| { | |
| "epoch": 69.20444444444445, | |
| "grad_norm": 5.442347049713135, | |
| "learning_rate": 2.2875277942541057e-05, | |
| "loss": 1.0002, | |
| "step": 15571 | |
| }, | |
| { | |
| "epoch": 69.30666666666667, | |
| "grad_norm": 5.002798080444336, | |
| "learning_rate": 2.2736366459466326e-05, | |
| "loss": 1.0208, | |
| "step": 15594 | |
| }, | |
| { | |
| "epoch": 69.4088888888889, | |
| "grad_norm": 4.764693737030029, | |
| "learning_rate": 2.259775381055917e-05, | |
| "loss": 1.0147, | |
| "step": 15617 | |
| }, | |
| { | |
| "epoch": 69.5111111111111, | |
| "grad_norm": 5.556255340576172, | |
| "learning_rate": 2.2459441515140044e-05, | |
| "loss": 0.9888, | |
| "step": 15640 | |
| }, | |
| { | |
| "epoch": 69.61333333333333, | |
| "grad_norm": 5.241755485534668, | |
| "learning_rate": 2.2321431089237256e-05, | |
| "loss": 0.9846, | |
| "step": 15663 | |
| }, | |
| { | |
| "epoch": 69.71555555555555, | |
| "grad_norm": 5.701202869415283, | |
| "learning_rate": 2.2183724045570286e-05, | |
| "loss": 0.9872, | |
| "step": 15686 | |
| }, | |
| { | |
| "epoch": 69.81777777777778, | |
| "grad_norm": 8.224358558654785, | |
| "learning_rate": 2.2046321893533362e-05, | |
| "loss": 0.9898, | |
| "step": 15709 | |
| }, | |
| { | |
| "epoch": 69.92, | |
| "grad_norm": 5.965829849243164, | |
| "learning_rate": 2.1909226139178723e-05, | |
| "loss": 0.9831, | |
| "step": 15732 | |
| }, | |
| { | |
| "epoch": 70.02222222222223, | |
| "grad_norm": 5.391206741333008, | |
| "learning_rate": 2.1772438285200312e-05, | |
| "loss": 0.9954, | |
| "step": 15755 | |
| }, | |
| { | |
| "epoch": 70.12444444444445, | |
| "grad_norm": 6.74372673034668, | |
| "learning_rate": 2.1635959830917107e-05, | |
| "loss": 0.9651, | |
| "step": 15778 | |
| }, | |
| { | |
| "epoch": 70.22666666666667, | |
| "grad_norm": 5.2756123542785645, | |
| "learning_rate": 2.149979227225688e-05, | |
| "loss": 0.9698, | |
| "step": 15801 | |
| }, | |
| { | |
| "epoch": 70.32888888888888, | |
| "grad_norm": 6.822518825531006, | |
| "learning_rate": 2.1363937101739613e-05, | |
| "loss": 0.9771, | |
| "step": 15824 | |
| }, | |
| { | |
| "epoch": 70.43111111111111, | |
| "grad_norm": 5.256137847900391, | |
| "learning_rate": 2.1228395808461294e-05, | |
| "loss": 0.9962, | |
| "step": 15847 | |
| }, | |
| { | |
| "epoch": 70.53333333333333, | |
| "grad_norm": 4.483437538146973, | |
| "learning_rate": 2.1093169878077533e-05, | |
| "loss": 0.9735, | |
| "step": 15870 | |
| }, | |
| { | |
| "epoch": 70.63555555555556, | |
| "grad_norm": 6.114633083343506, | |
| "learning_rate": 2.0958260792787215e-05, | |
| "loss": 0.9839, | |
| "step": 15893 | |
| }, | |
| { | |
| "epoch": 70.73777777777778, | |
| "grad_norm": 5.309250831604004, | |
| "learning_rate": 2.08236700313164e-05, | |
| "loss": 0.9745, | |
| "step": 15916 | |
| }, | |
| { | |
| "epoch": 70.84, | |
| "grad_norm": 5.820844650268555, | |
| "learning_rate": 2.068939906890194e-05, | |
| "loss": 0.9786, | |
| "step": 15939 | |
| }, | |
| { | |
| "epoch": 70.94222222222223, | |
| "grad_norm": 5.038022041320801, | |
| "learning_rate": 2.055544937727549e-05, | |
| "loss": 0.9912, | |
| "step": 15962 | |
| }, | |
| { | |
| "epoch": 71.04444444444445, | |
| "grad_norm": 5.100025177001953, | |
| "learning_rate": 2.042182242464719e-05, | |
| "loss": 0.9748, | |
| "step": 15985 | |
| }, | |
| { | |
| "epoch": 71.14666666666666, | |
| "grad_norm": 5.8269829750061035, | |
| "learning_rate": 2.0288519675689755e-05, | |
| "loss": 0.9614, | |
| "step": 16008 | |
| }, | |
| { | |
| "epoch": 71.24888888888889, | |
| "grad_norm": 5.484350681304932, | |
| "learning_rate": 2.0155542591522303e-05, | |
| "loss": 0.9655, | |
| "step": 16031 | |
| }, | |
| { | |
| "epoch": 71.35111111111111, | |
| "grad_norm": 5.463179111480713, | |
| "learning_rate": 2.0022892629694335e-05, | |
| "loss": 0.9633, | |
| "step": 16054 | |
| }, | |
| { | |
| "epoch": 71.45333333333333, | |
| "grad_norm": 6.4749579429626465, | |
| "learning_rate": 1.9890571244169854e-05, | |
| "loss": 0.9643, | |
| "step": 16077 | |
| }, | |
| { | |
| "epoch": 71.55555555555556, | |
| "grad_norm": 5.12134313583374, | |
| "learning_rate": 1.97585798853113e-05, | |
| "loss": 0.9771, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 71.65777777777778, | |
| "grad_norm": 5.494293212890625, | |
| "learning_rate": 1.9626919999863802e-05, | |
| "loss": 0.9833, | |
| "step": 16123 | |
| }, | |
| { | |
| "epoch": 71.76, | |
| "grad_norm": 6.645090579986572, | |
| "learning_rate": 1.9495593030939157e-05, | |
| "loss": 0.966, | |
| "step": 16146 | |
| }, | |
| { | |
| "epoch": 71.86222222222223, | |
| "grad_norm": 5.469064235687256, | |
| "learning_rate": 1.9364600418000156e-05, | |
| "loss": 0.9752, | |
| "step": 16169 | |
| }, | |
| { | |
| "epoch": 71.96444444444444, | |
| "grad_norm": 7.400743007659912, | |
| "learning_rate": 1.9233943596844734e-05, | |
| "loss": 0.9729, | |
| "step": 16192 | |
| }, | |
| { | |
| "epoch": 72.06666666666666, | |
| "grad_norm": 5.228180408477783, | |
| "learning_rate": 1.9103623999590202e-05, | |
| "loss": 0.9706, | |
| "step": 16215 | |
| }, | |
| { | |
| "epoch": 72.16888888888889, | |
| "grad_norm": 5.571268081665039, | |
| "learning_rate": 1.897364305465766e-05, | |
| "loss": 0.9544, | |
| "step": 16238 | |
| }, | |
| { | |
| "epoch": 72.27111111111111, | |
| "grad_norm": 5.692650318145752, | |
| "learning_rate": 1.884400218675619e-05, | |
| "loss": 0.9577, | |
| "step": 16261 | |
| }, | |
| { | |
| "epoch": 72.37333333333333, | |
| "grad_norm": 5.098461151123047, | |
| "learning_rate": 1.87147028168674e-05, | |
| "loss": 0.952, | |
| "step": 16284 | |
| }, | |
| { | |
| "epoch": 72.47555555555556, | |
| "grad_norm": 5.3133745193481445, | |
| "learning_rate": 1.8585746362229706e-05, | |
| "loss": 0.9623, | |
| "step": 16307 | |
| }, | |
| { | |
| "epoch": 72.57777777777778, | |
| "grad_norm": 5.299659729003906, | |
| "learning_rate": 1.8457134236322903e-05, | |
| "loss": 0.9505, | |
| "step": 16330 | |
| }, | |
| { | |
| "epoch": 72.68, | |
| "grad_norm": 6.57431173324585, | |
| "learning_rate": 1.832886784885263e-05, | |
| "loss": 0.9665, | |
| "step": 16353 | |
| }, | |
| { | |
| "epoch": 72.78222222222222, | |
| "grad_norm": 5.018616199493408, | |
| "learning_rate": 1.820094860573488e-05, | |
| "loss": 0.9565, | |
| "step": 16376 | |
| }, | |
| { | |
| "epoch": 72.88444444444444, | |
| "grad_norm": 5.487111568450928, | |
| "learning_rate": 1.8073377909080685e-05, | |
| "loss": 0.9551, | |
| "step": 16399 | |
| }, | |
| { | |
| "epoch": 72.98666666666666, | |
| "grad_norm": 6.0984086990356445, | |
| "learning_rate": 1.7946157157180628e-05, | |
| "loss": 0.9743, | |
| "step": 16422 | |
| }, | |
| { | |
| "epoch": 73.08888888888889, | |
| "grad_norm": 5.412441730499268, | |
| "learning_rate": 1.7819287744489636e-05, | |
| "loss": 0.9316, | |
| "step": 16445 | |
| }, | |
| { | |
| "epoch": 73.19111111111111, | |
| "grad_norm": 5.8434929847717285, | |
| "learning_rate": 1.7692771061611603e-05, | |
| "loss": 0.947, | |
| "step": 16468 | |
| }, | |
| { | |
| "epoch": 73.29333333333334, | |
| "grad_norm": 5.178957462310791, | |
| "learning_rate": 1.756660849528422e-05, | |
| "loss": 0.9455, | |
| "step": 16491 | |
| }, | |
| { | |
| "epoch": 73.39555555555556, | |
| "grad_norm": 6.5831499099731445, | |
| "learning_rate": 1.7440801428363677e-05, | |
| "loss": 0.9469, | |
| "step": 16514 | |
| }, | |
| { | |
| "epoch": 73.49777777777778, | |
| "grad_norm": 5.628024101257324, | |
| "learning_rate": 1.731535123980964e-05, | |
| "loss": 0.961, | |
| "step": 16537 | |
| }, | |
| { | |
| "epoch": 73.6, | |
| "grad_norm": 4.770416736602783, | |
| "learning_rate": 1.7190259304670038e-05, | |
| "loss": 0.9489, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 73.70222222222222, | |
| "grad_norm": 5.419926166534424, | |
| "learning_rate": 1.7065526994065973e-05, | |
| "loss": 0.9384, | |
| "step": 16583 | |
| }, | |
| { | |
| "epoch": 73.80444444444444, | |
| "grad_norm": 5.695985794067383, | |
| "learning_rate": 1.6941155675176823e-05, | |
| "loss": 0.9386, | |
| "step": 16606 | |
| }, | |
| { | |
| "epoch": 73.90666666666667, | |
| "grad_norm": 5.251271724700928, | |
| "learning_rate": 1.6817146711225073e-05, | |
| "loss": 0.9577, | |
| "step": 16629 | |
| }, | |
| { | |
| "epoch": 74.00888888888889, | |
| "grad_norm": 5.220533847808838, | |
| "learning_rate": 1.669350146146156e-05, | |
| "loss": 0.9513, | |
| "step": 16652 | |
| }, | |
| { | |
| "epoch": 74.11111111111111, | |
| "grad_norm": 5.326650142669678, | |
| "learning_rate": 1.65702212811504e-05, | |
| "loss": 0.9399, | |
| "step": 16675 | |
| }, | |
| { | |
| "epoch": 74.21333333333334, | |
| "grad_norm": 5.140909194946289, | |
| "learning_rate": 1.6447307521554273e-05, | |
| "loss": 0.9273, | |
| "step": 16698 | |
| }, | |
| { | |
| "epoch": 74.31555555555556, | |
| "grad_norm": 5.344797611236572, | |
| "learning_rate": 1.6324761529919556e-05, | |
| "loss": 0.942, | |
| "step": 16721 | |
| }, | |
| { | |
| "epoch": 74.41777777777777, | |
| "grad_norm": 5.0787835121154785, | |
| "learning_rate": 1.6202584649461505e-05, | |
| "loss": 0.9358, | |
| "step": 16744 | |
| }, | |
| { | |
| "epoch": 74.52, | |
| "grad_norm": 4.678197383880615, | |
| "learning_rate": 1.608077821934965e-05, | |
| "loss": 0.9313, | |
| "step": 16767 | |
| }, | |
| { | |
| "epoch": 74.62222222222222, | |
| "grad_norm": 5.813838005065918, | |
| "learning_rate": 1.5959343574692982e-05, | |
| "loss": 0.9375, | |
| "step": 16790 | |
| }, | |
| { | |
| "epoch": 74.72444444444444, | |
| "grad_norm": 7.276843070983887, | |
| "learning_rate": 1.5838282046525444e-05, | |
| "loss": 0.9359, | |
| "step": 16813 | |
| }, | |
| { | |
| "epoch": 74.82666666666667, | |
| "grad_norm": 5.635644435882568, | |
| "learning_rate": 1.571759496179123e-05, | |
| "loss": 0.9444, | |
| "step": 16836 | |
| }, | |
| { | |
| "epoch": 74.92888888888889, | |
| "grad_norm": 5.5287556648254395, | |
| "learning_rate": 1.5597283643330347e-05, | |
| "loss": 0.9345, | |
| "step": 16859 | |
| }, | |
| { | |
| "epoch": 75.03111111111112, | |
| "grad_norm": 5.956721782684326, | |
| "learning_rate": 1.547734940986404e-05, | |
| "loss": 0.9618, | |
| "step": 16882 | |
| }, | |
| { | |
| "epoch": 75.13333333333334, | |
| "grad_norm": 6.450102806091309, | |
| "learning_rate": 1.535779357598033e-05, | |
| "loss": 0.9266, | |
| "step": 16905 | |
| }, | |
| { | |
| "epoch": 75.23555555555555, | |
| "grad_norm": 5.966337203979492, | |
| "learning_rate": 1.5238617452119697e-05, | |
| "loss": 0.9089, | |
| "step": 16928 | |
| }, | |
| { | |
| "epoch": 75.33777777777777, | |
| "grad_norm": 5.400455474853516, | |
| "learning_rate": 1.5119822344560591e-05, | |
| "loss": 0.8967, | |
| "step": 16951 | |
| }, | |
| { | |
| "epoch": 75.44, | |
| "grad_norm": 5.6878180503845215, | |
| "learning_rate": 1.5001409555405238e-05, | |
| "loss": 0.9058, | |
| "step": 16974 | |
| }, | |
| { | |
| "epoch": 75.54222222222222, | |
| "grad_norm": 5.092850685119629, | |
| "learning_rate": 1.4883380382565244e-05, | |
| "loss": 0.9037, | |
| "step": 16997 | |
| }, | |
| { | |
| "epoch": 75.64444444444445, | |
| "grad_norm": 7.444413185119629, | |
| "learning_rate": 1.4765736119747475e-05, | |
| "loss": 0.9191, | |
| "step": 17020 | |
| }, | |
| { | |
| "epoch": 75.74666666666667, | |
| "grad_norm": 5.114320755004883, | |
| "learning_rate": 1.4648478056439847e-05, | |
| "loss": 0.9132, | |
| "step": 17043 | |
| }, | |
| { | |
| "epoch": 75.8488888888889, | |
| "grad_norm": 5.615855693817139, | |
| "learning_rate": 1.453160747789712e-05, | |
| "loss": 0.9064, | |
| "step": 17066 | |
| }, | |
| { | |
| "epoch": 75.95111111111112, | |
| "grad_norm": 5.120584964752197, | |
| "learning_rate": 1.4415125665126933e-05, | |
| "loss": 0.9149, | |
| "step": 17089 | |
| }, | |
| { | |
| "epoch": 76.05333333333333, | |
| "grad_norm": 5.242557048797607, | |
| "learning_rate": 1.4299033894875647e-05, | |
| "loss": 0.8938, | |
| "step": 17112 | |
| }, | |
| { | |
| "epoch": 76.15555555555555, | |
| "grad_norm": 5.4338297843933105, | |
| "learning_rate": 1.4183333439614449e-05, | |
| "loss": 0.8982, | |
| "step": 17135 | |
| }, | |
| { | |
| "epoch": 76.25777777777778, | |
| "grad_norm": 4.58558988571167, | |
| "learning_rate": 1.4068025567525317e-05, | |
| "loss": 0.8992, | |
| "step": 17158 | |
| }, | |
| { | |
| "epoch": 76.36, | |
| "grad_norm": 5.754461765289307, | |
| "learning_rate": 1.3953111542487202e-05, | |
| "loss": 0.91, | |
| "step": 17181 | |
| }, | |
| { | |
| "epoch": 76.46222222222222, | |
| "grad_norm": 4.953834533691406, | |
| "learning_rate": 1.383859262406208e-05, | |
| "loss": 0.9014, | |
| "step": 17204 | |
| }, | |
| { | |
| "epoch": 76.56444444444445, | |
| "grad_norm": 5.375875473022461, | |
| "learning_rate": 1.3724470067481255e-05, | |
| "loss": 0.9027, | |
| "step": 17227 | |
| }, | |
| { | |
| "epoch": 76.66666666666667, | |
| "grad_norm": 5.019064426422119, | |
| "learning_rate": 1.3610745123631535e-05, | |
| "loss": 0.8902, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 76.7688888888889, | |
| "grad_norm": 7.214736461639404, | |
| "learning_rate": 1.3497419039041488e-05, | |
| "loss": 0.9004, | |
| "step": 17273 | |
| }, | |
| { | |
| "epoch": 76.8711111111111, | |
| "grad_norm": 5.181694507598877, | |
| "learning_rate": 1.3384493055867885e-05, | |
| "loss": 0.8949, | |
| "step": 17296 | |
| }, | |
| { | |
| "epoch": 76.97333333333333, | |
| "grad_norm": 5.116537094116211, | |
| "learning_rate": 1.3271968411881963e-05, | |
| "loss": 0.8958, | |
| "step": 17319 | |
| }, | |
| { | |
| "epoch": 77.07555555555555, | |
| "grad_norm": 4.765411853790283, | |
| "learning_rate": 1.3159846340455967e-05, | |
| "loss": 0.8901, | |
| "step": 17342 | |
| }, | |
| { | |
| "epoch": 77.17777777777778, | |
| "grad_norm": 4.765920639038086, | |
| "learning_rate": 1.3048128070549543e-05, | |
| "loss": 0.8875, | |
| "step": 17365 | |
| }, | |
| { | |
| "epoch": 77.28, | |
| "grad_norm": 4.69777250289917, | |
| "learning_rate": 1.2936814826696324e-05, | |
| "loss": 0.881, | |
| "step": 17388 | |
| }, | |
| { | |
| "epoch": 77.38222222222223, | |
| "grad_norm": 4.7684550285339355, | |
| "learning_rate": 1.2825907828990518e-05, | |
| "loss": 0.8835, | |
| "step": 17411 | |
| }, | |
| { | |
| "epoch": 77.48444444444445, | |
| "grad_norm": 4.776817321777344, | |
| "learning_rate": 1.271540829307344e-05, | |
| "loss": 0.8896, | |
| "step": 17434 | |
| }, | |
| { | |
| "epoch": 77.58666666666667, | |
| "grad_norm": 4.983736038208008, | |
| "learning_rate": 1.2605317430120311e-05, | |
| "loss": 0.8845, | |
| "step": 17457 | |
| }, | |
| { | |
| "epoch": 77.68888888888888, | |
| "grad_norm": 5.313802719116211, | |
| "learning_rate": 1.2495636446826891e-05, | |
| "loss": 0.8922, | |
| "step": 17480 | |
| }, | |
| { | |
| "epoch": 77.7911111111111, | |
| "grad_norm": 4.997971534729004, | |
| "learning_rate": 1.2386366545396328e-05, | |
| "loss": 0.8856, | |
| "step": 17503 | |
| }, | |
| { | |
| "epoch": 77.89333333333333, | |
| "grad_norm": 5.876720905303955, | |
| "learning_rate": 1.2277508923525876e-05, | |
| "loss": 0.8838, | |
| "step": 17526 | |
| }, | |
| { | |
| "epoch": 77.99555555555555, | |
| "grad_norm": 4.762071132659912, | |
| "learning_rate": 1.216906477439389e-05, | |
| "loss": 0.8814, | |
| "step": 17549 | |
| }, | |
| { | |
| "epoch": 78.09777777777778, | |
| "grad_norm": 4.621342658996582, | |
| "learning_rate": 1.2061035286646677e-05, | |
| "loss": 0.8764, | |
| "step": 17572 | |
| }, | |
| { | |
| "epoch": 78.2, | |
| "grad_norm": 5.084928035736084, | |
| "learning_rate": 1.1953421644385443e-05, | |
| "loss": 0.8747, | |
| "step": 17595 | |
| }, | |
| { | |
| "epoch": 78.30222222222223, | |
| "grad_norm": 4.952382564544678, | |
| "learning_rate": 1.1846225027153401e-05, | |
| "loss": 0.8886, | |
| "step": 17618 | |
| }, | |
| { | |
| "epoch": 78.40444444444445, | |
| "grad_norm": 4.579256534576416, | |
| "learning_rate": 1.1739446609922739e-05, | |
| "loss": 0.8729, | |
| "step": 17641 | |
| }, | |
| { | |
| "epoch": 78.50666666666666, | |
| "grad_norm": 5.518742561340332, | |
| "learning_rate": 1.1633087563081847e-05, | |
| "loss": 0.8863, | |
| "step": 17664 | |
| }, | |
| { | |
| "epoch": 78.60888888888888, | |
| "grad_norm": 4.966059684753418, | |
| "learning_rate": 1.1527149052422382e-05, | |
| "loss": 0.8839, | |
| "step": 17687 | |
| }, | |
| { | |
| "epoch": 78.71111111111111, | |
| "grad_norm": 5.001364707946777, | |
| "learning_rate": 1.1421632239126578e-05, | |
| "loss": 0.8893, | |
| "step": 17710 | |
| }, | |
| { | |
| "epoch": 78.81333333333333, | |
| "grad_norm": 4.7873854637146, | |
| "learning_rate": 1.131653827975449e-05, | |
| "loss": 0.8695, | |
| "step": 17733 | |
| }, | |
| { | |
| "epoch": 78.91555555555556, | |
| "grad_norm": 5.2424516677856445, | |
| "learning_rate": 1.1211868326231273e-05, | |
| "loss": 0.8857, | |
| "step": 17756 | |
| }, | |
| { | |
| "epoch": 79.01777777777778, | |
| "grad_norm": 4.72099494934082, | |
| "learning_rate": 1.1107623525834631e-05, | |
| "loss": 0.8844, | |
| "step": 17779 | |
| }, | |
| { | |
| "epoch": 79.12, | |
| "grad_norm": 5.387650489807129, | |
| "learning_rate": 1.1003805021182168e-05, | |
| "loss": 0.8672, | |
| "step": 17802 | |
| }, | |
| { | |
| "epoch": 79.22222222222223, | |
| "grad_norm": 6.549093246459961, | |
| "learning_rate": 1.0900413950218947e-05, | |
| "loss": 0.8639, | |
| "step": 17825 | |
| }, | |
| { | |
| "epoch": 79.32444444444444, | |
| "grad_norm": 5.805511951446533, | |
| "learning_rate": 1.0797451446204904e-05, | |
| "loss": 0.8738, | |
| "step": 17848 | |
| }, | |
| { | |
| "epoch": 79.42666666666666, | |
| "grad_norm": 5.417078018188477, | |
| "learning_rate": 1.0694918637702562e-05, | |
| "loss": 0.8815, | |
| "step": 17871 | |
| }, | |
| { | |
| "epoch": 79.52888888888889, | |
| "grad_norm": 4.696217060089111, | |
| "learning_rate": 1.0592816648564535e-05, | |
| "loss": 0.8824, | |
| "step": 17894 | |
| }, | |
| { | |
| "epoch": 79.63111111111111, | |
| "grad_norm": 4.98297119140625, | |
| "learning_rate": 1.0491146597921309e-05, | |
| "loss": 0.8617, | |
| "step": 17917 | |
| }, | |
| { | |
| "epoch": 79.73333333333333, | |
| "grad_norm": 4.85457181930542, | |
| "learning_rate": 1.0389909600168911e-05, | |
| "loss": 0.8715, | |
| "step": 17940 | |
| }, | |
| { | |
| "epoch": 79.83555555555556, | |
| "grad_norm": 5.266817092895508, | |
| "learning_rate": 1.0289106764956702e-05, | |
| "loss": 0.8754, | |
| "step": 17963 | |
| }, | |
| { | |
| "epoch": 79.93777777777778, | |
| "grad_norm": 5.948962688446045, | |
| "learning_rate": 1.0188739197175268e-05, | |
| "loss": 0.8806, | |
| "step": 17986 | |
| }, | |
| { | |
| "epoch": 80.04, | |
| "grad_norm": 6.155448913574219, | |
| "learning_rate": 1.0088807996944211e-05, | |
| "loss": 0.8767, | |
| "step": 18009 | |
| }, | |
| { | |
| "epoch": 80.14222222222222, | |
| "grad_norm": 4.785376071929932, | |
| "learning_rate": 9.989314259600219e-06, | |
| "loss": 0.8719, | |
| "step": 18032 | |
| }, | |
| { | |
| "epoch": 80.24444444444444, | |
| "grad_norm": 4.980493545532227, | |
| "learning_rate": 9.890259075684915e-06, | |
| "loss": 0.866, | |
| "step": 18055 | |
| }, | |
| { | |
| "epoch": 80.34666666666666, | |
| "grad_norm": 6.7485032081604, | |
| "learning_rate": 9.791643530933032e-06, | |
| "loss": 0.8639, | |
| "step": 18078 | |
| }, | |
| { | |
| "epoch": 80.44888888888889, | |
| "grad_norm": 5.030679225921631, | |
| "learning_rate": 9.693468706260456e-06, | |
| "loss": 0.8707, | |
| "step": 18101 | |
| }, | |
| { | |
| "epoch": 80.55111111111111, | |
| "grad_norm": 5.043888568878174, | |
| "learning_rate": 9.595735677752343e-06, | |
| "loss": 0.8603, | |
| "step": 18124 | |
| }, | |
| { | |
| "epoch": 80.65333333333334, | |
| "grad_norm": 5.022198677062988, | |
| "learning_rate": 9.49844551665141e-06, | |
| "loss": 0.8598, | |
| "step": 18147 | |
| }, | |
| { | |
| "epoch": 80.75555555555556, | |
| "grad_norm": 6.346147060394287, | |
| "learning_rate": 9.401599289346091e-06, | |
| "loss": 0.8663, | |
| "step": 18170 | |
| }, | |
| { | |
| "epoch": 80.85777777777778, | |
| "grad_norm": 5.1296610832214355, | |
| "learning_rate": 9.305198057358972e-06, | |
| "loss": 0.8703, | |
| "step": 18193 | |
| }, | |
| { | |
| "epoch": 80.96, | |
| "grad_norm": 5.117784023284912, | |
| "learning_rate": 9.209242877335005e-06, | |
| "loss": 0.8624, | |
| "step": 18216 | |
| }, | |
| { | |
| "epoch": 81.06222222222222, | |
| "grad_norm": 4.949360370635986, | |
| "learning_rate": 9.113734801030076e-06, | |
| "loss": 0.8559, | |
| "step": 18239 | |
| }, | |
| { | |
| "epoch": 81.16444444444444, | |
| "grad_norm": 4.507094860076904, | |
| "learning_rate": 9.018674875299393e-06, | |
| "loss": 0.861, | |
| "step": 18262 | |
| }, | |
| { | |
| "epoch": 81.26666666666667, | |
| "grad_norm": 5.280154705047607, | |
| "learning_rate": 8.924064142085985e-06, | |
| "loss": 0.8558, | |
| "step": 18285 | |
| }, | |
| { | |
| "epoch": 81.36888888888889, | |
| "grad_norm": 4.777374267578125, | |
| "learning_rate": 8.829903638409388e-06, | |
| "loss": 0.8598, | |
| "step": 18308 | |
| }, | |
| { | |
| "epoch": 81.47111111111111, | |
| "grad_norm": 5.726168632507324, | |
| "learning_rate": 8.736194396354153e-06, | |
| "loss": 0.8649, | |
| "step": 18331 | |
| }, | |
| { | |
| "epoch": 81.57333333333334, | |
| "grad_norm": 5.1066484451293945, | |
| "learning_rate": 8.642937443058646e-06, | |
| "loss": 0.8558, | |
| "step": 18354 | |
| }, | |
| { | |
| "epoch": 81.67555555555556, | |
| "grad_norm": 5.291098117828369, | |
| "learning_rate": 8.550133800703686e-06, | |
| "loss": 0.8572, | |
| "step": 18377 | |
| }, | |
| { | |
| "epoch": 81.77777777777777, | |
| "grad_norm": 4.3951334953308105, | |
| "learning_rate": 8.457784486501452e-06, | |
| "loss": 0.8713, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 81.88, | |
| "grad_norm": 4.807311058044434, | |
| "learning_rate": 8.36589051268421e-06, | |
| "loss": 0.8704, | |
| "step": 18423 | |
| }, | |
| { | |
| "epoch": 81.98222222222222, | |
| "grad_norm": 6.832765579223633, | |
| "learning_rate": 8.274452886493333e-06, | |
| "loss": 0.862, | |
| "step": 18446 | |
| }, | |
| { | |
| "epoch": 82.08444444444444, | |
| "grad_norm": 4.566845417022705, | |
| "learning_rate": 8.183472610168197e-06, | |
| "loss": 0.8604, | |
| "step": 18469 | |
| }, | |
| { | |
| "epoch": 82.18666666666667, | |
| "grad_norm": 4.8708648681640625, | |
| "learning_rate": 8.092950680935185e-06, | |
| "loss": 0.8589, | |
| "step": 18492 | |
| }, | |
| { | |
| "epoch": 82.28888888888889, | |
| "grad_norm": 5.396876335144043, | |
| "learning_rate": 8.002888090996814e-06, | |
| "loss": 0.8608, | |
| "step": 18515 | |
| }, | |
| { | |
| "epoch": 82.39111111111112, | |
| "grad_norm": 4.885883808135986, | |
| "learning_rate": 7.913285827520794e-06, | |
| "loss": 0.8484, | |
| "step": 18538 | |
| }, | |
| { | |
| "epoch": 82.49333333333334, | |
| "grad_norm": 4.598787307739258, | |
| "learning_rate": 7.824144872629269e-06, | |
| "loss": 0.8576, | |
| "step": 18561 | |
| }, | |
| { | |
| "epoch": 82.59555555555555, | |
| "grad_norm": 4.590323448181152, | |
| "learning_rate": 7.735466203387992e-06, | |
| "loss": 0.8554, | |
| "step": 18584 | |
| }, | |
| { | |
| "epoch": 82.69777777777777, | |
| "grad_norm": 5.497690200805664, | |
| "learning_rate": 7.647250791795668e-06, | |
| "loss": 0.855, | |
| "step": 18607 | |
| }, | |
| { | |
| "epoch": 82.8, | |
| "grad_norm": 4.905009746551514, | |
| "learning_rate": 7.559499604773279e-06, | |
| "loss": 0.8563, | |
| "step": 18630 | |
| }, | |
| { | |
| "epoch": 82.90222222222222, | |
| "grad_norm": 4.675111770629883, | |
| "learning_rate": 7.47221360415346e-06, | |
| "loss": 0.8597, | |
| "step": 18653 | |
| }, | |
| { | |
| "epoch": 83.00444444444445, | |
| "grad_norm": 5.6808576583862305, | |
| "learning_rate": 7.385393746670022e-06, | |
| "loss": 0.8566, | |
| "step": 18676 | |
| }, | |
| { | |
| "epoch": 83.10666666666667, | |
| "grad_norm": 6.699379920959473, | |
| "learning_rate": 7.299040983947369e-06, | |
| "loss": 0.856, | |
| "step": 18699 | |
| }, | |
| { | |
| "epoch": 83.2088888888889, | |
| "grad_norm": 5.053982257843018, | |
| "learning_rate": 7.213156262490173e-06, | |
| "loss": 0.8481, | |
| "step": 18722 | |
| }, | |
| { | |
| "epoch": 83.31111111111112, | |
| "grad_norm": 5.297053337097168, | |
| "learning_rate": 7.127740523672915e-06, | |
| "loss": 0.85, | |
| "step": 18745 | |
| }, | |
| { | |
| "epoch": 83.41333333333333, | |
| "grad_norm": 5.744291305541992, | |
| "learning_rate": 7.042794703729622e-06, | |
| "loss": 0.8618, | |
| "step": 18768 | |
| }, | |
| { | |
| "epoch": 83.51555555555555, | |
| "grad_norm": 4.679412364959717, | |
| "learning_rate": 6.95831973374359e-06, | |
| "loss": 0.8403, | |
| "step": 18791 | |
| }, | |
| { | |
| "epoch": 83.61777777777777, | |
| "grad_norm": 4.38852596282959, | |
| "learning_rate": 6.874316539637127e-06, | |
| "loss": 0.8464, | |
| "step": 18814 | |
| }, | |
| { | |
| "epoch": 83.72, | |
| "grad_norm": 4.899384021759033, | |
| "learning_rate": 6.7907860421615066e-06, | |
| "loss": 0.8523, | |
| "step": 18837 | |
| }, | |
| { | |
| "epoch": 83.82222222222222, | |
| "grad_norm": 5.16193962097168, | |
| "learning_rate": 6.707729156886777e-06, | |
| "loss": 0.8502, | |
| "step": 18860 | |
| }, | |
| { | |
| "epoch": 83.92444444444445, | |
| "grad_norm": 4.833446979522705, | |
| "learning_rate": 6.625146794191794e-06, | |
| "loss": 0.8551, | |
| "step": 18883 | |
| }, | |
| { | |
| "epoch": 84.02666666666667, | |
| "grad_norm": 4.920324325561523, | |
| "learning_rate": 6.543039859254185e-06, | |
| "loss": 0.8525, | |
| "step": 18906 | |
| }, | |
| { | |
| "epoch": 84.1288888888889, | |
| "grad_norm": 5.322509765625, | |
| "learning_rate": 6.4614092520404905e-06, | |
| "loss": 0.8534, | |
| "step": 18929 | |
| }, | |
| { | |
| "epoch": 84.2311111111111, | |
| "grad_norm": 5.062963485717773, | |
| "learning_rate": 6.380255867296253e-06, | |
| "loss": 0.8519, | |
| "step": 18952 | |
| }, | |
| { | |
| "epoch": 84.33333333333333, | |
| "grad_norm": 5.186446666717529, | |
| "learning_rate": 6.299580594536214e-06, | |
| "loss": 0.8445, | |
| "step": 18975 | |
| }, | |
| { | |
| "epoch": 84.43555555555555, | |
| "grad_norm": 5.609063148498535, | |
| "learning_rate": 6.219384318034588e-06, | |
| "loss": 0.8432, | |
| "step": 18998 | |
| }, | |
| { | |
| "epoch": 84.53777777777778, | |
| "grad_norm": 4.684319972991943, | |
| "learning_rate": 6.1396679168153445e-06, | |
| "loss": 0.8434, | |
| "step": 19021 | |
| }, | |
| { | |
| "epoch": 84.64, | |
| "grad_norm": 4.717188835144043, | |
| "learning_rate": 6.060432264642601e-06, | |
| "loss": 0.8451, | |
| "step": 19044 | |
| }, | |
| { | |
| "epoch": 84.74222222222222, | |
| "grad_norm": 6.810020446777344, | |
| "learning_rate": 5.981678230011006e-06, | |
| "loss": 0.8425, | |
| "step": 19067 | |
| }, | |
| { | |
| "epoch": 84.84444444444445, | |
| "grad_norm": 4.562713146209717, | |
| "learning_rate": 5.903406676136264e-06, | |
| "loss": 0.8468, | |
| "step": 19090 | |
| }, | |
| { | |
| "epoch": 84.94666666666667, | |
| "grad_norm": 5.388665199279785, | |
| "learning_rate": 5.825618460945636e-06, | |
| "loss": 0.8418, | |
| "step": 19113 | |
| }, | |
| { | |
| "epoch": 85.04888888888888, | |
| "grad_norm": 5.054759979248047, | |
| "learning_rate": 5.748314437068558e-06, | |
| "loss": 0.8417, | |
| "step": 19136 | |
| }, | |
| { | |
| "epoch": 85.1511111111111, | |
| "grad_norm": 4.943572521209717, | |
| "learning_rate": 5.671495451827308e-06, | |
| "loss": 0.8444, | |
| "step": 19159 | |
| }, | |
| { | |
| "epoch": 85.25333333333333, | |
| "grad_norm": 4.801841735839844, | |
| "learning_rate": 5.595162347227661e-06, | |
| "loss": 0.8407, | |
| "step": 19182 | |
| }, | |
| { | |
| "epoch": 85.35555555555555, | |
| "grad_norm": 4.94541072845459, | |
| "learning_rate": 5.519315959949745e-06, | |
| "loss": 0.8413, | |
| "step": 19205 | |
| }, | |
| { | |
| "epoch": 85.45777777777778, | |
| "grad_norm": 5.529304027557373, | |
| "learning_rate": 5.443957121338777e-06, | |
| "loss": 0.8462, | |
| "step": 19228 | |
| }, | |
| { | |
| "epoch": 85.56, | |
| "grad_norm": 4.735396385192871, | |
| "learning_rate": 5.36908665739605e-06, | |
| "loss": 0.8491, | |
| "step": 19251 | |
| }, | |
| { | |
| "epoch": 85.66222222222223, | |
| "grad_norm": 5.091115474700928, | |
| "learning_rate": 5.294705388769772e-06, | |
| "loss": 0.8444, | |
| "step": 19274 | |
| }, | |
| { | |
| "epoch": 85.76444444444445, | |
| "grad_norm": 4.820996284484863, | |
| "learning_rate": 5.220814130746165e-06, | |
| "loss": 0.8509, | |
| "step": 19297 | |
| }, | |
| { | |
| "epoch": 85.86666666666666, | |
| "grad_norm": 4.448352336883545, | |
| "learning_rate": 5.1474136932404935e-06, | |
| "loss": 0.8339, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 85.96888888888888, | |
| "grad_norm": 4.6064019203186035, | |
| "learning_rate": 5.07450488078815e-06, | |
| "loss": 0.8115, | |
| "step": 19343 | |
| }, | |
| { | |
| "epoch": 86.07111111111111, | |
| "grad_norm": 6.598939895629883, | |
| "learning_rate": 5.002088492535906e-06, | |
| "loss": 0.818, | |
| "step": 19366 | |
| }, | |
| { | |
| "epoch": 86.17333333333333, | |
| "grad_norm": 4.426856994628906, | |
| "learning_rate": 4.930165322233082e-06, | |
| "loss": 0.8147, | |
| "step": 19389 | |
| }, | |
| { | |
| "epoch": 86.27555555555556, | |
| "grad_norm": 4.873010635375977, | |
| "learning_rate": 4.858736158222921e-06, | |
| "loss": 0.8146, | |
| "step": 19412 | |
| }, | |
| { | |
| "epoch": 86.37777777777778, | |
| "grad_norm": 4.8856520652771, | |
| "learning_rate": 4.787801783433871e-06, | |
| "loss": 0.8158, | |
| "step": 19435 | |
| }, | |
| { | |
| "epoch": 86.48, | |
| "grad_norm": 5.177906513214111, | |
| "learning_rate": 4.717362975371059e-06, | |
| "loss": 0.8187, | |
| "step": 19458 | |
| }, | |
| { | |
| "epoch": 86.58222222222223, | |
| "grad_norm": 4.954709529876709, | |
| "learning_rate": 4.647420506107775e-06, | |
| "loss": 0.8131, | |
| "step": 19481 | |
| }, | |
| { | |
| "epoch": 86.68444444444444, | |
| "grad_norm": 4.427014350891113, | |
| "learning_rate": 4.577975142276925e-06, | |
| "loss": 0.8263, | |
| "step": 19504 | |
| }, | |
| { | |
| "epoch": 86.78666666666666, | |
| "grad_norm": 5.581162929534912, | |
| "learning_rate": 4.509027645062758e-06, | |
| "loss": 0.8201, | |
| "step": 19527 | |
| }, | |
| { | |
| "epoch": 86.88888888888889, | |
| "grad_norm": 4.889328479766846, | |
| "learning_rate": 4.4405787701923885e-06, | |
| "loss": 0.8239, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 86.99111111111111, | |
| "grad_norm": 4.658565998077393, | |
| "learning_rate": 4.3726292679276305e-06, | |
| "loss": 0.8211, | |
| "step": 19573 | |
| }, | |
| { | |
| "epoch": 87.09333333333333, | |
| "grad_norm": 5.102555751800537, | |
| "learning_rate": 4.305179883056687e-06, | |
| "loss": 0.8154, | |
| "step": 19596 | |
| }, | |
| { | |
| "epoch": 87.19555555555556, | |
| "grad_norm": 4.951329231262207, | |
| "learning_rate": 4.23823135488603e-06, | |
| "loss": 0.8182, | |
| "step": 19619 | |
| }, | |
| { | |
| "epoch": 87.29777777777778, | |
| "grad_norm": 5.642242908477783, | |
| "learning_rate": 4.171784417232305e-06, | |
| "loss": 0.8076, | |
| "step": 19642 | |
| }, | |
| { | |
| "epoch": 87.4, | |
| "grad_norm": 5.003154277801514, | |
| "learning_rate": 4.10583979841424e-06, | |
| "loss": 0.8129, | |
| "step": 19665 | |
| }, | |
| { | |
| "epoch": 87.50222222222222, | |
| "grad_norm": 5.778168678283691, | |
| "learning_rate": 4.040398221244718e-06, | |
| "loss": 0.8123, | |
| "step": 19688 | |
| }, | |
| { | |
| "epoch": 87.60444444444444, | |
| "grad_norm": 5.08914852142334, | |
| "learning_rate": 3.975460403022801e-06, | |
| "loss": 0.8149, | |
| "step": 19711 | |
| }, | |
| { | |
| "epoch": 87.70666666666666, | |
| "grad_norm": 4.585403919219971, | |
| "learning_rate": 3.9110270555259345e-06, | |
| "loss": 0.8197, | |
| "step": 19734 | |
| }, | |
| { | |
| "epoch": 87.80888888888889, | |
| "grad_norm": 4.91745138168335, | |
| "learning_rate": 3.84709888500207e-06, | |
| "loss": 0.8175, | |
| "step": 19757 | |
| }, | |
| { | |
| "epoch": 87.91111111111111, | |
| "grad_norm": 5.540400981903076, | |
| "learning_rate": 3.7836765921619888e-06, | |
| "loss": 0.8115, | |
| "step": 19780 | |
| }, | |
| { | |
| "epoch": 88.01333333333334, | |
| "grad_norm": 4.485517501831055, | |
| "learning_rate": 3.720760872171569e-06, | |
| "loss": 0.8122, | |
| "step": 19803 | |
| }, | |
| { | |
| "epoch": 88.11555555555556, | |
| "grad_norm": 4.355061054229736, | |
| "learning_rate": 3.658352414644206e-06, | |
| "loss": 0.8105, | |
| "step": 19826 | |
| }, | |
| { | |
| "epoch": 88.21777777777778, | |
| "grad_norm": 5.2161784172058105, | |
| "learning_rate": 3.596451903633247e-06, | |
| "loss": 0.8115, | |
| "step": 19849 | |
| }, | |
| { | |
| "epoch": 88.32, | |
| "grad_norm": 4.382901191711426, | |
| "learning_rate": 3.535060017624453e-06, | |
| "loss": 0.8118, | |
| "step": 19872 | |
| }, | |
| { | |
| "epoch": 88.42222222222222, | |
| "grad_norm": 5.805255889892578, | |
| "learning_rate": 3.47417742952863e-06, | |
| "loss": 0.8046, | |
| "step": 19895 | |
| }, | |
| { | |
| "epoch": 88.52444444444444, | |
| "grad_norm": 4.063962936401367, | |
| "learning_rate": 3.4138048066741867e-06, | |
| "loss": 0.8136, | |
| "step": 19918 | |
| }, | |
| { | |
| "epoch": 88.62666666666667, | |
| "grad_norm": 5.049718379974365, | |
| "learning_rate": 3.3539428107998814e-06, | |
| "loss": 0.8071, | |
| "step": 19941 | |
| }, | |
| { | |
| "epoch": 88.72888888888889, | |
| "grad_norm": 4.287143230438232, | |
| "learning_rate": 3.294592098047494e-06, | |
| "loss": 0.8064, | |
| "step": 19964 | |
| }, | |
| { | |
| "epoch": 88.83111111111111, | |
| "grad_norm": 5.841145992279053, | |
| "learning_rate": 3.2357533189547098e-06, | |
| "loss": 0.8188, | |
| "step": 19987 | |
| }, | |
| { | |
| "epoch": 88.93333333333334, | |
| "grad_norm": 6.014995098114014, | |
| "learning_rate": 3.1774271184479675e-06, | |
| "loss": 0.8114, | |
| "step": 20010 | |
| }, | |
| { | |
| "epoch": 89.03555555555556, | |
| "grad_norm": 4.5376386642456055, | |
| "learning_rate": 3.1196141358353357e-06, | |
| "loss": 0.8135, | |
| "step": 20033 | |
| }, | |
| { | |
| "epoch": 89.13777777777777, | |
| "grad_norm": 4.438096523284912, | |
| "learning_rate": 3.0623150047995873e-06, | |
| "loss": 0.8091, | |
| "step": 20056 | |
| }, | |
| { | |
| "epoch": 89.24, | |
| "grad_norm": 4.940515518188477, | |
| "learning_rate": 3.005530353391195e-06, | |
| "loss": 0.812, | |
| "step": 20079 | |
| }, | |
| { | |
| "epoch": 89.34222222222222, | |
| "grad_norm": 4.826828479766846, | |
| "learning_rate": 2.9492608040214862e-06, | |
| "loss": 0.8123, | |
| "step": 20102 | |
| }, | |
| { | |
| "epoch": 89.44444444444444, | |
| "grad_norm": 4.983479976654053, | |
| "learning_rate": 2.893506973455773e-06, | |
| "loss": 0.8081, | |
| "step": 20125 | |
| }, | |
| { | |
| "epoch": 89.54666666666667, | |
| "grad_norm": 6.005835056304932, | |
| "learning_rate": 2.838269472806654e-06, | |
| "loss": 0.8095, | |
| "step": 20148 | |
| }, | |
| { | |
| "epoch": 89.64888888888889, | |
| "grad_norm": 4.9561662673950195, | |
| "learning_rate": 2.7835489075272727e-06, | |
| "loss": 0.8061, | |
| "step": 20171 | |
| }, | |
| { | |
| "epoch": 89.75111111111111, | |
| "grad_norm": 5.078367233276367, | |
| "learning_rate": 2.729345877404671e-06, | |
| "loss": 0.7997, | |
| "step": 20194 | |
| }, | |
| { | |
| "epoch": 89.85333333333334, | |
| "grad_norm": 4.345983505249023, | |
| "learning_rate": 2.675660976553268e-06, | |
| "loss": 0.8101, | |
| "step": 20217 | |
| }, | |
| { | |
| "epoch": 89.95555555555555, | |
| "grad_norm": 4.390908241271973, | |
| "learning_rate": 2.6224947934082923e-06, | |
| "loss": 0.8016, | |
| "step": 20240 | |
| }, | |
| { | |
| "epoch": 90.05777777777777, | |
| "grad_norm": 4.5562028884887695, | |
| "learning_rate": 2.5698479107193697e-06, | |
| "loss": 0.8039, | |
| "step": 20263 | |
| }, | |
| { | |
| "epoch": 90.16, | |
| "grad_norm": 4.685390472412109, | |
| "learning_rate": 2.517720905544102e-06, | |
| "loss": 0.7952, | |
| "step": 20286 | |
| }, | |
| { | |
| "epoch": 90.26222222222222, | |
| "grad_norm": 4.973295211791992, | |
| "learning_rate": 2.466114349241794e-06, | |
| "loss": 0.809, | |
| "step": 20309 | |
| }, | |
| { | |
| "epoch": 90.36444444444444, | |
| "grad_norm": 5.430562496185303, | |
| "learning_rate": 2.4150288074671346e-06, | |
| "loss": 0.8088, | |
| "step": 20332 | |
| }, | |
| { | |
| "epoch": 90.46666666666667, | |
| "grad_norm": 4.49529504776001, | |
| "learning_rate": 2.3644648401640156e-06, | |
| "loss": 0.8057, | |
| "step": 20355 | |
| }, | |
| { | |
| "epoch": 90.56888888888889, | |
| "grad_norm": 5.173520565032959, | |
| "learning_rate": 2.314423001559424e-06, | |
| "loss": 0.8205, | |
| "step": 20378 | |
| }, | |
| { | |
| "epoch": 90.67111111111112, | |
| "grad_norm": 5.084122657775879, | |
| "learning_rate": 2.264903840157312e-06, | |
| "loss": 0.8096, | |
| "step": 20401 | |
| }, | |
| { | |
| "epoch": 90.77333333333333, | |
| "grad_norm": 4.675368309020996, | |
| "learning_rate": 2.2159078987326554e-06, | |
| "loss": 0.8109, | |
| "step": 20424 | |
| }, | |
| { | |
| "epoch": 90.87555555555555, | |
| "grad_norm": 4.598373889923096, | |
| "learning_rate": 2.167435714325411e-06, | |
| "loss": 0.7989, | |
| "step": 20447 | |
| }, | |
| { | |
| "epoch": 90.97777777777777, | |
| "grad_norm": 4.149188995361328, | |
| "learning_rate": 2.1194878182347334e-06, | |
| "loss": 0.8142, | |
| "step": 20470 | |
| }, | |
| { | |
| "epoch": 91.08, | |
| "grad_norm": 5.164962291717529, | |
| "learning_rate": 2.0720647360130685e-06, | |
| "loss": 0.8096, | |
| "step": 20493 | |
| }, | |
| { | |
| "epoch": 91.18222222222222, | |
| "grad_norm": 5.351869106292725, | |
| "learning_rate": 2.0251669874604474e-06, | |
| "loss": 0.8036, | |
| "step": 20516 | |
| }, | |
| { | |
| "epoch": 91.28444444444445, | |
| "grad_norm": 5.2852935791015625, | |
| "learning_rate": 1.9787950866187565e-06, | |
| "loss": 0.8057, | |
| "step": 20539 | |
| }, | |
| { | |
| "epoch": 91.38666666666667, | |
| "grad_norm": 6.784205436706543, | |
| "learning_rate": 1.9329495417661046e-06, | |
| "loss": 0.8031, | |
| "step": 20562 | |
| }, | |
| { | |
| "epoch": 91.4888888888889, | |
| "grad_norm": 4.940450668334961, | |
| "learning_rate": 1.887630855411282e-06, | |
| "loss": 0.8066, | |
| "step": 20585 | |
| }, | |
| { | |
| "epoch": 91.5911111111111, | |
| "grad_norm": 4.77994441986084, | |
| "learning_rate": 1.84283952428822e-06, | |
| "loss": 0.8038, | |
| "step": 20608 | |
| }, | |
| { | |
| "epoch": 91.69333333333333, | |
| "grad_norm": 4.902866840362549, | |
| "learning_rate": 1.798576039350558e-06, | |
| "loss": 0.8043, | |
| "step": 20631 | |
| }, | |
| { | |
| "epoch": 91.79555555555555, | |
| "grad_norm": 5.100454330444336, | |
| "learning_rate": 1.7548408857662623e-06, | |
| "loss": 0.8008, | |
| "step": 20654 | |
| }, | |
| { | |
| "epoch": 91.89777777777778, | |
| "grad_norm": 4.9377264976501465, | |
| "learning_rate": 1.7116345429123104e-06, | |
| "loss": 0.8098, | |
| "step": 20677 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "grad_norm": 5.0082292556762695, | |
| "learning_rate": 1.6689574843694433e-06, | |
| "loss": 0.7992, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 92.10222222222222, | |
| "grad_norm": 4.688179016113281, | |
| "learning_rate": 1.6268101779169375e-06, | |
| "loss": 0.7928, | |
| "step": 20723 | |
| }, | |
| { | |
| "epoch": 92.20444444444445, | |
| "grad_norm": 4.243449687957764, | |
| "learning_rate": 1.5851930855275365e-06, | |
| "loss": 0.7957, | |
| "step": 20746 | |
| }, | |
| { | |
| "epoch": 92.30666666666667, | |
| "grad_norm": 4.956583499908447, | |
| "learning_rate": 1.544106663362338e-06, | |
| "loss": 0.8073, | |
| "step": 20769 | |
| }, | |
| { | |
| "epoch": 92.4088888888889, | |
| "grad_norm": 4.556548118591309, | |
| "learning_rate": 1.503551361765826e-06, | |
| "loss": 0.8019, | |
| "step": 20792 | |
| }, | |
| { | |
| "epoch": 92.5111111111111, | |
| "grad_norm": 6.762635707855225, | |
| "learning_rate": 1.4635276252608965e-06, | |
| "loss": 0.8084, | |
| "step": 20815 | |
| }, | |
| { | |
| "epoch": 92.61333333333333, | |
| "grad_norm": 5.724966049194336, | |
| "learning_rate": 1.4240358925440457e-06, | |
| "loss": 0.8008, | |
| "step": 20838 | |
| }, | |
| { | |
| "epoch": 92.71555555555555, | |
| "grad_norm": 5.445995330810547, | |
| "learning_rate": 1.3850765964805e-06, | |
| "loss": 0.802, | |
| "step": 20861 | |
| }, | |
| { | |
| "epoch": 92.81777777777778, | |
| "grad_norm": 4.807301044464111, | |
| "learning_rate": 1.3466501640994944e-06, | |
| "loss": 0.8038, | |
| "step": 20884 | |
| }, | |
| { | |
| "epoch": 92.92, | |
| "grad_norm": 5.612717151641846, | |
| "learning_rate": 1.308757016589618e-06, | |
| "loss": 0.7996, | |
| "step": 20907 | |
| }, | |
| { | |
| "epoch": 93.02222222222223, | |
| "grad_norm": 4.5359296798706055, | |
| "learning_rate": 1.2713975692941415e-06, | |
| "loss": 0.801, | |
| "step": 20930 | |
| }, | |
| { | |
| "epoch": 93.12444444444445, | |
| "grad_norm": 4.222482681274414, | |
| "learning_rate": 1.2345722317065267e-06, | |
| "loss": 0.7996, | |
| "step": 20953 | |
| }, | |
| { | |
| "epoch": 93.22666666666667, | |
| "grad_norm": 4.250333786010742, | |
| "learning_rate": 1.19828140746589e-06, | |
| "loss": 0.8072, | |
| "step": 20976 | |
| }, | |
| { | |
| "epoch": 93.32888888888888, | |
| "grad_norm": 4.197777271270752, | |
| "learning_rate": 1.1625254943526065e-06, | |
| "loss": 0.795, | |
| "step": 20999 | |
| }, | |
| { | |
| "epoch": 93.43111111111111, | |
| "grad_norm": 5.79392671585083, | |
| "learning_rate": 1.1273048842839307e-06, | |
| "loss": 0.8076, | |
| "step": 21022 | |
| }, | |
| { | |
| "epoch": 93.53333333333333, | |
| "grad_norm": 4.919564723968506, | |
| "learning_rate": 1.0926199633097157e-06, | |
| "loss": 0.802, | |
| "step": 21045 | |
| }, | |
| { | |
| "epoch": 93.63555555555556, | |
| "grad_norm": 5.422025203704834, | |
| "learning_rate": 1.0584711116081837e-06, | |
| "loss": 0.8141, | |
| "step": 21068 | |
| }, | |
| { | |
| "epoch": 93.73777777777778, | |
| "grad_norm": 4.949449062347412, | |
| "learning_rate": 1.0248587034817237e-06, | |
| "loss": 0.8001, | |
| "step": 21091 | |
| }, | |
| { | |
| "epoch": 93.84, | |
| "grad_norm": 4.578461647033691, | |
| "learning_rate": 9.917831073528504e-07, | |
| "loss": 0.7959, | |
| "step": 21114 | |
| }, | |
| { | |
| "epoch": 93.94222222222223, | |
| "grad_norm": 4.7736592292785645, | |
| "learning_rate": 9.59244685760108e-07, | |
| "loss": 0.8007, | |
| "step": 21137 | |
| }, | |
| { | |
| "epoch": 94.04444444444445, | |
| "grad_norm": 4.64253044128418, | |
| "learning_rate": 9.27243795354138e-07, | |
| "loss": 0.8042, | |
| "step": 21160 | |
| }, | |
| { | |
| "epoch": 94.14666666666666, | |
| "grad_norm": 5.671309471130371, | |
| "learning_rate": 8.957807868937296e-07, | |
| "loss": 0.7971, | |
| "step": 21183 | |
| }, | |
| { | |
| "epoch": 94.24888888888889, | |
| "grad_norm": 4.637156963348389, | |
| "learning_rate": 8.648560052420151e-07, | |
| "loss": 0.8008, | |
| "step": 21206 | |
| }, | |
| { | |
| "epoch": 94.35111111111111, | |
| "grad_norm": 4.140064239501953, | |
| "learning_rate": 8.344697893626741e-07, | |
| "loss": 0.7955, | |
| "step": 21229 | |
| }, | |
| { | |
| "epoch": 94.45333333333333, | |
| "grad_norm": 4.615813732147217, | |
| "learning_rate": 8.046224723162077e-07, | |
| "loss": 0.7998, | |
| "step": 21252 | |
| }, | |
| { | |
| "epoch": 94.55555555555556, | |
| "grad_norm": 5.006037712097168, | |
| "learning_rate": 7.75314381256298e-07, | |
| "loss": 0.7944, | |
| "step": 21275 | |
| }, | |
| { | |
| "epoch": 94.65777777777778, | |
| "grad_norm": 4.940041542053223, | |
| "learning_rate": 7.465458374262213e-07, | |
| "loss": 0.7944, | |
| "step": 21298 | |
| }, | |
| { | |
| "epoch": 94.76, | |
| "grad_norm": 4.452148914337158, | |
| "learning_rate": 7.183171561553348e-07, | |
| "loss": 0.8021, | |
| "step": 21321 | |
| }, | |
| { | |
| "epoch": 94.86222222222223, | |
| "grad_norm": 4.3342509269714355, | |
| "learning_rate": 6.906286468555955e-07, | |
| "loss": 0.8016, | |
| "step": 21344 | |
| }, | |
| { | |
| "epoch": 94.96444444444444, | |
| "grad_norm": 5.098360538482666, | |
| "learning_rate": 6.634806130182025e-07, | |
| "loss": 0.7997, | |
| "step": 21367 | |
| }, | |
| { | |
| "epoch": 95.06666666666666, | |
| "grad_norm": 4.704761028289795, | |
| "learning_rate": 6.368733522102432e-07, | |
| "loss": 0.8007, | |
| "step": 21390 | |
| }, | |
| { | |
| "epoch": 95.16888888888889, | |
| "grad_norm": 4.529531002044678, | |
| "learning_rate": 6.108071560714413e-07, | |
| "loss": 0.7976, | |
| "step": 21413 | |
| }, | |
| { | |
| "epoch": 95.27111111111111, | |
| "grad_norm": 4.470498561859131, | |
| "learning_rate": 5.852823103109639e-07, | |
| "loss": 0.7871, | |
| "step": 21436 | |
| }, | |
| { | |
| "epoch": 95.37333333333333, | |
| "grad_norm": 4.434628486633301, | |
| "learning_rate": 5.602990947042919e-07, | |
| "loss": 0.8027, | |
| "step": 21459 | |
| }, | |
| { | |
| "epoch": 95.47555555555556, | |
| "grad_norm": 4.518807411193848, | |
| "learning_rate": 5.358577830901435e-07, | |
| "loss": 0.7986, | |
| "step": 21482 | |
| }, | |
| { | |
| "epoch": 95.57777777777778, | |
| "grad_norm": 4.176888942718506, | |
| "learning_rate": 5.119586433674661e-07, | |
| "loss": 0.7951, | |
| "step": 21505 | |
| }, | |
| { | |
| "epoch": 95.68, | |
| "grad_norm": 4.806949138641357, | |
| "learning_rate": 4.886019374925333e-07, | |
| "loss": 0.7995, | |
| "step": 21528 | |
| }, | |
| { | |
| "epoch": 95.78222222222222, | |
| "grad_norm": 4.371096611022949, | |
| "learning_rate": 4.657879214760297e-07, | |
| "loss": 0.7991, | |
| "step": 21551 | |
| }, | |
| { | |
| "epoch": 95.88444444444444, | |
| "grad_norm": 4.214781761169434, | |
| "learning_rate": 4.435168453802874e-07, | |
| "loss": 0.7912, | |
| "step": 21574 | |
| }, | |
| { | |
| "epoch": 95.98666666666666, | |
| "grad_norm": 4.71865177154541, | |
| "learning_rate": 4.2178895331650427e-07, | |
| "loss": 0.804, | |
| "step": 21597 | |
| }, | |
| { | |
| "epoch": 96.08888888888889, | |
| "grad_norm": 4.573912143707275, | |
| "learning_rate": 4.0060448344209634e-07, | |
| "loss": 0.7969, | |
| "step": 21620 | |
| }, | |
| { | |
| "epoch": 96.19111111111111, | |
| "grad_norm": 5.047268390655518, | |
| "learning_rate": 3.799636679580887e-07, | |
| "loss": 0.7964, | |
| "step": 21643 | |
| }, | |
| { | |
| "epoch": 96.29333333333334, | |
| "grad_norm": 4.307917594909668, | |
| "learning_rate": 3.598667331065397e-07, | |
| "loss": 0.7957, | |
| "step": 21666 | |
| }, | |
| { | |
| "epoch": 96.39555555555556, | |
| "grad_norm": 4.763662815093994, | |
| "learning_rate": 3.403138991681043e-07, | |
| "loss": 0.7958, | |
| "step": 21689 | |
| }, | |
| { | |
| "epoch": 96.49777777777778, | |
| "grad_norm": 4.808367729187012, | |
| "learning_rate": 3.213053804595911e-07, | |
| "loss": 0.809, | |
| "step": 21712 | |
| }, | |
| { | |
| "epoch": 96.6, | |
| "grad_norm": 5.026544570922852, | |
| "learning_rate": 3.0284138533160924e-07, | |
| "loss": 0.8024, | |
| "step": 21735 | |
| }, | |
| { | |
| "epoch": 96.70222222222222, | |
| "grad_norm": 6.12026834487915, | |
| "learning_rate": 2.849221161663085e-07, | |
| "loss": 0.8041, | |
| "step": 21758 | |
| }, | |
| { | |
| "epoch": 96.80444444444444, | |
| "grad_norm": 4.895252227783203, | |
| "learning_rate": 2.6754776937513717e-07, | |
| "loss": 0.7966, | |
| "step": 21781 | |
| }, | |
| { | |
| "epoch": 96.90666666666667, | |
| "grad_norm": 4.611559867858887, | |
| "learning_rate": 2.507185353967101e-07, | |
| "loss": 0.8041, | |
| "step": 21804 | |
| }, | |
| { | |
| "epoch": 97.00888888888889, | |
| "grad_norm": 4.198352813720703, | |
| "learning_rate": 2.344345986946994e-07, | |
| "loss": 0.8013, | |
| "step": 21827 | |
| }, | |
| { | |
| "epoch": 97.11111111111111, | |
| "grad_norm": 4.63875675201416, | |
| "learning_rate": 2.186961377558361e-07, | |
| "loss": 0.8015, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 97.21333333333334, | |
| "grad_norm": 4.243088245391846, | |
| "learning_rate": 2.0350332508793367e-07, | |
| "loss": 0.7829, | |
| "step": 21873 | |
| }, | |
| { | |
| "epoch": 97.31555555555556, | |
| "grad_norm": 4.228803634643555, | |
| "learning_rate": 1.8885632721800106e-07, | |
| "loss": 0.7999, | |
| "step": 21896 | |
| }, | |
| { | |
| "epoch": 97.41777777777777, | |
| "grad_norm": 5.103250980377197, | |
| "learning_rate": 1.7475530469044376e-07, | |
| "loss": 0.7979, | |
| "step": 21919 | |
| }, | |
| { | |
| "epoch": 97.52, | |
| "grad_norm": 4.691418170928955, | |
| "learning_rate": 1.6120041206524883e-07, | |
| "loss": 0.7972, | |
| "step": 21942 | |
| }, | |
| { | |
| "epoch": 97.62222222222222, | |
| "grad_norm": 4.644149303436279, | |
| "learning_rate": 1.481917979163583e-07, | |
| "loss": 0.7897, | |
| "step": 21965 | |
| }, | |
| { | |
| "epoch": 97.72444444444444, | |
| "grad_norm": 4.451114654541016, | |
| "learning_rate": 1.357296048299761e-07, | |
| "loss": 0.8001, | |
| "step": 21988 | |
| }, | |
| { | |
| "epoch": 97.82666666666667, | |
| "grad_norm": 4.836966037750244, | |
| "learning_rate": 1.2381396940305824e-07, | |
| "loss": 0.7994, | |
| "step": 22011 | |
| }, | |
| { | |
| "epoch": 97.92888888888889, | |
| "grad_norm": 4.453198432922363, | |
| "learning_rate": 1.12445022241775e-07, | |
| "loss": 0.7969, | |
| "step": 22034 | |
| }, | |
| { | |
| "epoch": 98.03111111111112, | |
| "grad_norm": 5.4233903884887695, | |
| "learning_rate": 1.0162288796011221e-07, | |
| "loss": 0.8006, | |
| "step": 22057 | |
| }, | |
| { | |
| "epoch": 98.13333333333334, | |
| "grad_norm": 4.528837203979492, | |
| "learning_rate": 9.134768517848336e-08, | |
| "loss": 0.8031, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 98.23555555555555, | |
| "grad_norm": 5.245551586151123, | |
| "learning_rate": 8.161952652243621e-08, | |
| "loss": 0.8005, | |
| "step": 22103 | |
| }, | |
| { | |
| "epoch": 98.33777777777777, | |
| "grad_norm": 4.625002861022949, | |
| "learning_rate": 7.243851862141492e-08, | |
| "loss": 0.8075, | |
| "step": 22126 | |
| }, | |
| { | |
| "epoch": 98.44, | |
| "grad_norm": 4.824587345123291, | |
| "learning_rate": 6.38047621075999e-08, | |
| "loss": 0.7925, | |
| "step": 22149 | |
| }, | |
| { | |
| "epoch": 98.54222222222222, | |
| "grad_norm": 4.704883098602295, | |
| "learning_rate": 5.5718351614797437e-08, | |
| "loss": 0.7953, | |
| "step": 22172 | |
| }, | |
| { | |
| "epoch": 98.64444444444445, | |
| "grad_norm": 4.561920642852783, | |
| "learning_rate": 4.817937577741294e-08, | |
| "loss": 0.7976, | |
| "step": 22195 | |
| }, | |
| { | |
| "epoch": 98.74666666666667, | |
| "grad_norm": 4.796523094177246, | |
| "learning_rate": 4.118791722945159e-08, | |
| "loss": 0.8026, | |
| "step": 22218 | |
| }, | |
| { | |
| "epoch": 98.8488888888889, | |
| "grad_norm": 4.576013565063477, | |
| "learning_rate": 3.474405260365798e-08, | |
| "loss": 0.794, | |
| "step": 22241 | |
| }, | |
| { | |
| "epoch": 98.95111111111112, | |
| "grad_norm": 5.13820743560791, | |
| "learning_rate": 2.8847852530622387e-08, | |
| "loss": 0.7895, | |
| "step": 22264 | |
| }, | |
| { | |
| "epoch": 99.05333333333333, | |
| "grad_norm": 4.2987060546875, | |
| "learning_rate": 2.3499381638064645e-08, | |
| "loss": 0.7919, | |
| "step": 22287 | |
| }, | |
| { | |
| "epoch": 99.15555555555555, | |
| "grad_norm": 4.3480305671691895, | |
| "learning_rate": 1.8698698550068117e-08, | |
| "loss": 0.798, | |
| "step": 22310 | |
| }, | |
| { | |
| "epoch": 99.25777777777778, | |
| "grad_norm": 5.037069797515869, | |
| "learning_rate": 1.4445855886480176e-08, | |
| "loss": 0.8026, | |
| "step": 22333 | |
| }, | |
| { | |
| "epoch": 99.36, | |
| "grad_norm": 4.374788284301758, | |
| "learning_rate": 1.074090026231267e-08, | |
| "loss": 0.7926, | |
| "step": 22356 | |
| }, | |
| { | |
| "epoch": 99.46222222222222, | |
| "grad_norm": 4.93529748916626, | |
| "learning_rate": 7.583872287253436e-09, | |
| "loss": 0.8044, | |
| "step": 22379 | |
| }, | |
| { | |
| "epoch": 99.56444444444445, | |
| "grad_norm": 4.404996395111084, | |
| "learning_rate": 4.974806565177792e-09, | |
| "loss": 0.802, | |
| "step": 22402 | |
| }, | |
| { | |
| "epoch": 99.66666666666667, | |
| "grad_norm": 4.556636333465576, | |
| "learning_rate": 2.9137316938265825e-09, | |
| "loss": 0.793, | |
| "step": 22425 | |
| }, | |
| { | |
| "epoch": 99.7688888888889, | |
| "grad_norm": 4.4638190269470215, | |
| "learning_rate": 1.4006702644453474e-09, | |
| "loss": 0.7999, | |
| "step": 22448 | |
| }, | |
| { | |
| "epoch": 99.8711111111111, | |
| "grad_norm": 4.293120861053467, | |
| "learning_rate": 4.3563886156228196e-10, | |
| "loss": 0.8048, | |
| "step": 22471 | |
| }, | |
| { | |
| "epoch": 99.97333333333333, | |
| "grad_norm": 4.100605010986328, | |
| "learning_rate": 1.8648062799497822e-11, | |
| "loss": 0.7996, | |
| "step": 22494 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "step": 22500, | |
| "total_flos": 2.1925440120390943e+18, | |
| "train_loss": 2.6133422136730617, | |
| "train_runtime": 133573.7106, | |
| "train_samples_per_second": 86.157, | |
| "train_steps_per_second": 0.168 | |
| } | |
| ], | |
| "logging_steps": 23, | |
| "max_steps": 22500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1925440120390943e+18, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |