| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4705.895522388059, |
| "eval_steps": 1000.0, |
| "global_step": 80000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 11.776119402985074, |
| "grad_norm": 2824.542724609375, |
| "learning_rate": 6.333333333333334e-07, |
| "loss": 4464.7338, |
| "step": 200 |
| }, |
| { |
| "epoch": 23.53731343283582, |
| "grad_norm": 4310.4130859375, |
| "learning_rate": 1.3e-06, |
| "loss": 4264.3419, |
| "step": 400 |
| }, |
| { |
| "epoch": 35.298507462686565, |
| "grad_norm": 4456.80908203125, |
| "learning_rate": 1.9666666666666668e-06, |
| "loss": 3192.1663, |
| "step": 600 |
| }, |
| { |
| "epoch": 47.059701492537314, |
| "grad_norm": 3549.643310546875, |
| "learning_rate": 2.6333333333333332e-06, |
| "loss": 1894.5492, |
| "step": 800 |
| }, |
| { |
| "epoch": 58.83582089552239, |
| "grad_norm": 2974.13720703125, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 1464.722, |
| "step": 1000 |
| }, |
| { |
| "epoch": 70.59701492537313, |
| "grad_norm": 2222.13427734375, |
| "learning_rate": 3.966666666666667e-06, |
| "loss": 1245.5541, |
| "step": 1200 |
| }, |
| { |
| "epoch": 82.35820895522389, |
| "grad_norm": 1658.634033203125, |
| "learning_rate": 4.633333333333334e-06, |
| "loss": 1112.0441, |
| "step": 1400 |
| }, |
| { |
| "epoch": 94.11940298507463, |
| "grad_norm": 800.1384887695312, |
| "learning_rate": 5.300000000000001e-06, |
| "loss": 1024.3591, |
| "step": 1600 |
| }, |
| { |
| "epoch": 105.8955223880597, |
| "grad_norm": 472.2015686035156, |
| "learning_rate": 5.966666666666667e-06, |
| "loss": 959.4488, |
| "step": 1800 |
| }, |
| { |
| "epoch": 117.65671641791045, |
| "grad_norm": 395.40087890625, |
| "learning_rate": 6.633333333333334e-06, |
| "loss": 921.3352, |
| "step": 2000 |
| }, |
| { |
| "epoch": 129.4179104477612, |
| "grad_norm": 214.59837341308594, |
| "learning_rate": 7.3e-06, |
| "loss": 892.8059, |
| "step": 2200 |
| }, |
| { |
| "epoch": 141.17910447761193, |
| "grad_norm": 69.00880432128906, |
| "learning_rate": 7.966666666666668e-06, |
| "loss": 879.1008, |
| "step": 2400 |
| }, |
| { |
| "epoch": 152.955223880597, |
| "grad_norm": 168.0197296142578, |
| "learning_rate": 8.633333333333334e-06, |
| "loss": 871.8709, |
| "step": 2600 |
| }, |
| { |
| "epoch": 164.71641791044777, |
| "grad_norm": 300.1562194824219, |
| "learning_rate": 9.3e-06, |
| "loss": 865.7897, |
| "step": 2800 |
| }, |
| { |
| "epoch": 176.47761194029852, |
| "grad_norm": 96.16655731201172, |
| "learning_rate": 9.966666666666667e-06, |
| "loss": 858.404, |
| "step": 3000 |
| }, |
| { |
| "epoch": 188.23880597014926, |
| "grad_norm": 158.73681640625, |
| "learning_rate": 9.975324675324676e-06, |
| "loss": 829.5743, |
| "step": 3200 |
| }, |
| { |
| "epoch": 200.0, |
| "grad_norm": 175.4840087890625, |
| "learning_rate": 9.94935064935065e-06, |
| "loss": 763.1604, |
| "step": 3400 |
| }, |
| { |
| "epoch": 211.77611940298507, |
| "grad_norm": 258.41546630859375, |
| "learning_rate": 9.923376623376623e-06, |
| "loss": 667.2331, |
| "step": 3600 |
| }, |
| { |
| "epoch": 223.53731343283582, |
| "grad_norm": 301.9793701171875, |
| "learning_rate": 9.897402597402598e-06, |
| "loss": 580.1272, |
| "step": 3800 |
| }, |
| { |
| "epoch": 235.29850746268656, |
| "grad_norm": 365.6127014160156, |
| "learning_rate": 9.871428571428572e-06, |
| "loss": 509.478, |
| "step": 4000 |
| }, |
| { |
| "epoch": 247.0597014925373, |
| "grad_norm": 580.1858520507812, |
| "learning_rate": 9.845454545454546e-06, |
| "loss": 456.7661, |
| "step": 4200 |
| }, |
| { |
| "epoch": 258.8358208955224, |
| "grad_norm": 405.80657958984375, |
| "learning_rate": 9.81948051948052e-06, |
| "loss": 413.5111, |
| "step": 4400 |
| }, |
| { |
| "epoch": 270.5970149253731, |
| "grad_norm": 558.102783203125, |
| "learning_rate": 9.793506493506494e-06, |
| "loss": 381.9194, |
| "step": 4600 |
| }, |
| { |
| "epoch": 282.35820895522386, |
| "grad_norm": 537.9163208007812, |
| "learning_rate": 9.767532467532468e-06, |
| "loss": 352.0065, |
| "step": 4800 |
| }, |
| { |
| "epoch": 294.1194029850746, |
| "grad_norm": 477.7449951171875, |
| "learning_rate": 9.741558441558442e-06, |
| "loss": 318.2346, |
| "step": 5000 |
| }, |
| { |
| "epoch": 305.8955223880597, |
| "grad_norm": 515.5437622070312, |
| "learning_rate": 9.715584415584415e-06, |
| "loss": 295.5646, |
| "step": 5200 |
| }, |
| { |
| "epoch": 317.65671641791045, |
| "grad_norm": 390.6947021484375, |
| "learning_rate": 9.68961038961039e-06, |
| "loss": 274.0082, |
| "step": 5400 |
| }, |
| { |
| "epoch": 329.4179104477612, |
| "grad_norm": 398.3788146972656, |
| "learning_rate": 9.663636363636364e-06, |
| "loss": 256.2253, |
| "step": 5600 |
| }, |
| { |
| "epoch": 341.17910447761193, |
| "grad_norm": 522.6358642578125, |
| "learning_rate": 9.637662337662338e-06, |
| "loss": 241.4042, |
| "step": 5800 |
| }, |
| { |
| "epoch": 352.95522388059703, |
| "grad_norm": 464.41546630859375, |
| "learning_rate": 9.611688311688311e-06, |
| "loss": 225.5165, |
| "step": 6000 |
| }, |
| { |
| "epoch": 364.7164179104478, |
| "grad_norm": 859.1548461914062, |
| "learning_rate": 9.585714285714286e-06, |
| "loss": 208.6093, |
| "step": 6200 |
| }, |
| { |
| "epoch": 376.4776119402985, |
| "grad_norm": 590.9322509765625, |
| "learning_rate": 9.55974025974026e-06, |
| "loss": 197.202, |
| "step": 6400 |
| }, |
| { |
| "epoch": 388.23880597014926, |
| "grad_norm": 498.85870361328125, |
| "learning_rate": 9.533766233766234e-06, |
| "loss": 182.2606, |
| "step": 6600 |
| }, |
| { |
| "epoch": 400.0, |
| "grad_norm": 310.8315124511719, |
| "learning_rate": 9.507792207792209e-06, |
| "loss": 177.0946, |
| "step": 6800 |
| }, |
| { |
| "epoch": 411.7761194029851, |
| "grad_norm": 503.26812744140625, |
| "learning_rate": 9.481818181818182e-06, |
| "loss": 167.5973, |
| "step": 7000 |
| }, |
| { |
| "epoch": 423.53731343283584, |
| "grad_norm": 564.5276489257812, |
| "learning_rate": 9.455844155844158e-06, |
| "loss": 157.7437, |
| "step": 7200 |
| }, |
| { |
| "epoch": 435.2985074626866, |
| "grad_norm": 325.49566650390625, |
| "learning_rate": 9.429870129870131e-06, |
| "loss": 146.1547, |
| "step": 7400 |
| }, |
| { |
| "epoch": 447.05970149253733, |
| "grad_norm": 446.79388427734375, |
| "learning_rate": 9.403896103896105e-06, |
| "loss": 138.5613, |
| "step": 7600 |
| }, |
| { |
| "epoch": 458.8358208955224, |
| "grad_norm": 582.2409057617188, |
| "learning_rate": 9.37792207792208e-06, |
| "loss": 133.6703, |
| "step": 7800 |
| }, |
| { |
| "epoch": 470.5970149253731, |
| "grad_norm": 503.8074951171875, |
| "learning_rate": 9.351948051948054e-06, |
| "loss": 125.3042, |
| "step": 8000 |
| }, |
| { |
| "epoch": 482.35820895522386, |
| "grad_norm": 523.983642578125, |
| "learning_rate": 9.326103896103897e-06, |
| "loss": 119.7444, |
| "step": 8200 |
| }, |
| { |
| "epoch": 494.1194029850746, |
| "grad_norm": 797.6224975585938, |
| "learning_rate": 9.30012987012987e-06, |
| "loss": 112.4131, |
| "step": 8400 |
| }, |
| { |
| "epoch": 505.8955223880597, |
| "grad_norm": 491.4894714355469, |
| "learning_rate": 9.274155844155846e-06, |
| "loss": 112.5327, |
| "step": 8600 |
| }, |
| { |
| "epoch": 517.6567164179105, |
| "grad_norm": 503.34698486328125, |
| "learning_rate": 9.24818181818182e-06, |
| "loss": 101.2935, |
| "step": 8800 |
| }, |
| { |
| "epoch": 529.4179104477612, |
| "grad_norm": 418.223876953125, |
| "learning_rate": 9.222207792207793e-06, |
| "loss": 97.0487, |
| "step": 9000 |
| }, |
| { |
| "epoch": 541.179104477612, |
| "grad_norm": 475.534423828125, |
| "learning_rate": 9.196233766233767e-06, |
| "loss": 95.3661, |
| "step": 9200 |
| }, |
| { |
| "epoch": 552.955223880597, |
| "grad_norm": 406.5447082519531, |
| "learning_rate": 9.170259740259742e-06, |
| "loss": 91.4412, |
| "step": 9400 |
| }, |
| { |
| "epoch": 564.7164179104477, |
| "grad_norm": 394.3955078125, |
| "learning_rate": 9.144285714285716e-06, |
| "loss": 87.2209, |
| "step": 9600 |
| }, |
| { |
| "epoch": 576.4776119402985, |
| "grad_norm": 426.0124206542969, |
| "learning_rate": 9.118311688311689e-06, |
| "loss": 81.9725, |
| "step": 9800 |
| }, |
| { |
| "epoch": 588.2388059701492, |
| "grad_norm": 499.22869873046875, |
| "learning_rate": 9.092337662337664e-06, |
| "loss": 82.7026, |
| "step": 10000 |
| }, |
| { |
| "epoch": 600.0, |
| "grad_norm": 378.9816589355469, |
| "learning_rate": 9.066493506493508e-06, |
| "loss": 78.9563, |
| "step": 10200 |
| }, |
| { |
| "epoch": 611.776119402985, |
| "grad_norm": 440.2572021484375, |
| "learning_rate": 9.040519480519482e-06, |
| "loss": 74.7577, |
| "step": 10400 |
| }, |
| { |
| "epoch": 623.5373134328358, |
| "grad_norm": 492.1644592285156, |
| "learning_rate": 9.014545454545455e-06, |
| "loss": 72.7476, |
| "step": 10600 |
| }, |
| { |
| "epoch": 635.2985074626865, |
| "grad_norm": 412.267822265625, |
| "learning_rate": 8.98857142857143e-06, |
| "loss": 70.3234, |
| "step": 10800 |
| }, |
| { |
| "epoch": 647.0597014925373, |
| "grad_norm": 439.10736083984375, |
| "learning_rate": 8.962597402597404e-06, |
| "loss": 69.0891, |
| "step": 11000 |
| }, |
| { |
| "epoch": 658.8358208955224, |
| "grad_norm": 349.216064453125, |
| "learning_rate": 8.936623376623378e-06, |
| "loss": 63.9953, |
| "step": 11200 |
| }, |
| { |
| "epoch": 670.5970149253732, |
| "grad_norm": 414.66741943359375, |
| "learning_rate": 8.910649350649351e-06, |
| "loss": 65.5338, |
| "step": 11400 |
| }, |
| { |
| "epoch": 682.3582089552239, |
| "grad_norm": 379.8702392578125, |
| "learning_rate": 8.884675324675326e-06, |
| "loss": 62.1837, |
| "step": 11600 |
| }, |
| { |
| "epoch": 694.1194029850747, |
| "grad_norm": 338.27685546875, |
| "learning_rate": 8.858831168831168e-06, |
| "loss": 61.4667, |
| "step": 11800 |
| }, |
| { |
| "epoch": 705.8955223880597, |
| "grad_norm": 375.4732360839844, |
| "learning_rate": 8.832857142857143e-06, |
| "loss": 59.5297, |
| "step": 12000 |
| }, |
| { |
| "epoch": 717.6567164179105, |
| "grad_norm": 441.6203308105469, |
| "learning_rate": 8.806883116883119e-06, |
| "loss": 55.1731, |
| "step": 12200 |
| }, |
| { |
| "epoch": 729.4179104477612, |
| "grad_norm": 393.1416320800781, |
| "learning_rate": 8.780909090909092e-06, |
| "loss": 57.3119, |
| "step": 12400 |
| }, |
| { |
| "epoch": 741.179104477612, |
| "grad_norm": 281.7967224121094, |
| "learning_rate": 8.754935064935066e-06, |
| "loss": 53.6714, |
| "step": 12600 |
| }, |
| { |
| "epoch": 752.955223880597, |
| "grad_norm": 441.4906921386719, |
| "learning_rate": 8.72896103896104e-06, |
| "loss": 52.6136, |
| "step": 12800 |
| }, |
| { |
| "epoch": 764.7164179104477, |
| "grad_norm": 514.225830078125, |
| "learning_rate": 8.702987012987015e-06, |
| "loss": 51.2136, |
| "step": 13000 |
| }, |
| { |
| "epoch": 776.4776119402985, |
| "grad_norm": 358.94439697265625, |
| "learning_rate": 8.677012987012988e-06, |
| "loss": 50.1962, |
| "step": 13200 |
| }, |
| { |
| "epoch": 788.2388059701492, |
| "grad_norm": 403.89691162109375, |
| "learning_rate": 8.651038961038962e-06, |
| "loss": 47.0859, |
| "step": 13400 |
| }, |
| { |
| "epoch": 800.0, |
| "grad_norm": 324.1061096191406, |
| "learning_rate": 8.625064935064935e-06, |
| "loss": 46.5822, |
| "step": 13600 |
| }, |
| { |
| "epoch": 811.776119402985, |
| "grad_norm": 297.4976501464844, |
| "learning_rate": 8.59909090909091e-06, |
| "loss": 47.6056, |
| "step": 13800 |
| }, |
| { |
| "epoch": 823.5373134328358, |
| "grad_norm": 350.8284912109375, |
| "learning_rate": 8.573116883116884e-06, |
| "loss": 42.6472, |
| "step": 14000 |
| }, |
| { |
| "epoch": 835.2985074626865, |
| "grad_norm": 291.62103271484375, |
| "learning_rate": 8.547142857142858e-06, |
| "loss": 43.605, |
| "step": 14200 |
| }, |
| { |
| "epoch": 847.0597014925373, |
| "grad_norm": 450.9121398925781, |
| "learning_rate": 8.521168831168833e-06, |
| "loss": 42.5453, |
| "step": 14400 |
| }, |
| { |
| "epoch": 858.8358208955224, |
| "grad_norm": 312.8192138671875, |
| "learning_rate": 8.495324675324677e-06, |
| "loss": 41.1642, |
| "step": 14600 |
| }, |
| { |
| "epoch": 870.5970149253732, |
| "grad_norm": 346.3138427734375, |
| "learning_rate": 8.46935064935065e-06, |
| "loss": 39.1892, |
| "step": 14800 |
| }, |
| { |
| "epoch": 882.3582089552239, |
| "grad_norm": 365.8443908691406, |
| "learning_rate": 8.443376623376624e-06, |
| "loss": 43.7121, |
| "step": 15000 |
| }, |
| { |
| "epoch": 894.1194029850747, |
| "grad_norm": 335.4299011230469, |
| "learning_rate": 8.417402597402599e-06, |
| "loss": 38.9868, |
| "step": 15200 |
| }, |
| { |
| "epoch": 905.8955223880597, |
| "grad_norm": 406.51416015625, |
| "learning_rate": 8.391428571428573e-06, |
| "loss": 38.384, |
| "step": 15400 |
| }, |
| { |
| "epoch": 917.6567164179105, |
| "grad_norm": 242.07505798339844, |
| "learning_rate": 8.365454545454546e-06, |
| "loss": 35.6221, |
| "step": 15600 |
| }, |
| { |
| "epoch": 929.4179104477612, |
| "grad_norm": 450.2956237792969, |
| "learning_rate": 8.33948051948052e-06, |
| "loss": 38.2171, |
| "step": 15800 |
| }, |
| { |
| "epoch": 941.179104477612, |
| "grad_norm": 367.16973876953125, |
| "learning_rate": 8.313636363636365e-06, |
| "loss": 35.7045, |
| "step": 16000 |
| }, |
| { |
| "epoch": 952.955223880597, |
| "grad_norm": 263.6134948730469, |
| "learning_rate": 8.287662337662339e-06, |
| "loss": 34.7425, |
| "step": 16200 |
| }, |
| { |
| "epoch": 964.7164179104477, |
| "grad_norm": 249.75967407226562, |
| "learning_rate": 8.261688311688312e-06, |
| "loss": 32.8498, |
| "step": 16400 |
| }, |
| { |
| "epoch": 976.4776119402985, |
| "grad_norm": 447.4524841308594, |
| "learning_rate": 8.235714285714287e-06, |
| "loss": 32.5018, |
| "step": 16600 |
| }, |
| { |
| "epoch": 988.2388059701492, |
| "grad_norm": 375.5469055175781, |
| "learning_rate": 8.209740259740261e-06, |
| "loss": 33.4849, |
| "step": 16800 |
| }, |
| { |
| "epoch": 1000.0, |
| "grad_norm": 235.9437713623047, |
| "learning_rate": 8.183896103896105e-06, |
| "loss": 34.2183, |
| "step": 17000 |
| }, |
| { |
| "epoch": 1011.776119402985, |
| "grad_norm": 388.0195617675781, |
| "learning_rate": 8.15792207792208e-06, |
| "loss": 31.4981, |
| "step": 17200 |
| }, |
| { |
| "epoch": 1023.5373134328358, |
| "grad_norm": 252.05894470214844, |
| "learning_rate": 8.131948051948053e-06, |
| "loss": 31.8987, |
| "step": 17400 |
| }, |
| { |
| "epoch": 1035.2985074626865, |
| "grad_norm": 244.4244842529297, |
| "learning_rate": 8.105974025974027e-06, |
| "loss": 33.8581, |
| "step": 17600 |
| }, |
| { |
| "epoch": 1047.0597014925372, |
| "grad_norm": 397.1546936035156, |
| "learning_rate": 8.08e-06, |
| "loss": 30.2649, |
| "step": 17800 |
| }, |
| { |
| "epoch": 1058.8358208955224, |
| "grad_norm": 338.2776184082031, |
| "learning_rate": 8.054025974025976e-06, |
| "loss": 30.8272, |
| "step": 18000 |
| }, |
| { |
| "epoch": 1070.597014925373, |
| "grad_norm": 653.6060791015625, |
| "learning_rate": 8.02805194805195e-06, |
| "loss": 28.4248, |
| "step": 18200 |
| }, |
| { |
| "epoch": 1082.358208955224, |
| "grad_norm": 317.8432922363281, |
| "learning_rate": 8.002077922077923e-06, |
| "loss": 29.007, |
| "step": 18400 |
| }, |
| { |
| "epoch": 1094.1194029850747, |
| "grad_norm": 362.0513916015625, |
| "learning_rate": 7.976103896103897e-06, |
| "loss": 29.2207, |
| "step": 18600 |
| }, |
| { |
| "epoch": 1105.8955223880596, |
| "grad_norm": 377.53857421875, |
| "learning_rate": 7.950129870129872e-06, |
| "loss": 27.3833, |
| "step": 18800 |
| }, |
| { |
| "epoch": 1117.6567164179105, |
| "grad_norm": 298.9675598144531, |
| "learning_rate": 7.924155844155845e-06, |
| "loss": 29.8298, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1129.4179104477612, |
| "grad_norm": 333.05206298828125, |
| "learning_rate": 7.898181818181819e-06, |
| "loss": 27.2648, |
| "step": 19200 |
| }, |
| { |
| "epoch": 1141.1791044776119, |
| "grad_norm": 227.57530212402344, |
| "learning_rate": 7.872207792207793e-06, |
| "loss": 25.75, |
| "step": 19400 |
| }, |
| { |
| "epoch": 1152.955223880597, |
| "grad_norm": 345.0716552734375, |
| "learning_rate": 7.846233766233768e-06, |
| "loss": 25.266, |
| "step": 19600 |
| }, |
| { |
| "epoch": 1164.7164179104477, |
| "grad_norm": 302.9230651855469, |
| "learning_rate": 7.820259740259741e-06, |
| "loss": 25.8148, |
| "step": 19800 |
| }, |
| { |
| "epoch": 1176.4776119402984, |
| "grad_norm": 346.07598876953125, |
| "learning_rate": 7.794285714285715e-06, |
| "loss": 25.3416, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1188.2388059701493, |
| "grad_norm": 354.4360046386719, |
| "learning_rate": 7.76831168831169e-06, |
| "loss": 24.6638, |
| "step": 20200 |
| }, |
| { |
| "epoch": 1200.0, |
| "grad_norm": 368.4764709472656, |
| "learning_rate": 7.742337662337664e-06, |
| "loss": 24.0062, |
| "step": 20400 |
| }, |
| { |
| "epoch": 1211.7761194029852, |
| "grad_norm": 248.7994384765625, |
| "learning_rate": 7.716363636363637e-06, |
| "loss": 23.8738, |
| "step": 20600 |
| }, |
| { |
| "epoch": 1223.5373134328358, |
| "grad_norm": 283.0020751953125, |
| "learning_rate": 7.690389610389611e-06, |
| "loss": 24.369, |
| "step": 20800 |
| }, |
| { |
| "epoch": 1235.2985074626865, |
| "grad_norm": 490.0590515136719, |
| "learning_rate": 7.664415584415586e-06, |
| "loss": 22.5971, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1247.0597014925372, |
| "grad_norm": 338.49102783203125, |
| "learning_rate": 7.63844155844156e-06, |
| "loss": 23.1806, |
| "step": 21200 |
| }, |
| { |
| "epoch": 1258.8358208955224, |
| "grad_norm": 201.65057373046875, |
| "learning_rate": 7.612467532467533e-06, |
| "loss": 21.7898, |
| "step": 21400 |
| }, |
| { |
| "epoch": 1270.597014925373, |
| "grad_norm": 176.30361938476562, |
| "learning_rate": 7.586493506493508e-06, |
| "loss": 22.0611, |
| "step": 21600 |
| }, |
| { |
| "epoch": 1282.358208955224, |
| "grad_norm": 145.49542236328125, |
| "learning_rate": 7.560519480519481e-06, |
| "loss": 21.875, |
| "step": 21800 |
| }, |
| { |
| "epoch": 1294.1194029850747, |
| "grad_norm": 271.5858154296875, |
| "learning_rate": 7.534545454545456e-06, |
| "loss": 21.199, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1305.8955223880596, |
| "grad_norm": 682.5701904296875, |
| "learning_rate": 7.508571428571429e-06, |
| "loss": 22.5543, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1317.6567164179105, |
| "grad_norm": 629.8733520507812, |
| "learning_rate": 7.482597402597404e-06, |
| "loss": 21.7533, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1329.4179104477612, |
| "grad_norm": 447.5409240722656, |
| "learning_rate": 7.456623376623377e-06, |
| "loss": 20.4621, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1341.1791044776119, |
| "grad_norm": 190.6186065673828, |
| "learning_rate": 7.430649350649352e-06, |
| "loss": 20.3926, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1352.955223880597, |
| "grad_norm": 480.3921813964844, |
| "learning_rate": 7.404675324675325e-06, |
| "loss": 21.3989, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1364.7164179104477, |
| "grad_norm": 432.7933044433594, |
| "learning_rate": 7.3787012987013e-06, |
| "loss": 20.2977, |
| "step": 23200 |
| }, |
| { |
| "epoch": 1376.4776119402984, |
| "grad_norm": 263.83331298828125, |
| "learning_rate": 7.352727272727273e-06, |
| "loss": 21.8715, |
| "step": 23400 |
| }, |
| { |
| "epoch": 1388.2388059701493, |
| "grad_norm": 225.55624389648438, |
| "learning_rate": 7.326753246753248e-06, |
| "loss": 19.0286, |
| "step": 23600 |
| }, |
| { |
| "epoch": 1400.0, |
| "grad_norm": 271.21759033203125, |
| "learning_rate": 7.300779220779221e-06, |
| "loss": 20.0349, |
| "step": 23800 |
| }, |
| { |
| "epoch": 1411.7761194029852, |
| "grad_norm": 273.9780578613281, |
| "learning_rate": 7.274805194805196e-06, |
| "loss": 17.947, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1423.5373134328358, |
| "grad_norm": 464.6848449707031, |
| "learning_rate": 7.24883116883117e-06, |
| "loss": 19.3846, |
| "step": 24200 |
| }, |
| { |
| "epoch": 1435.2985074626865, |
| "grad_norm": 376.6822814941406, |
| "learning_rate": 7.222857142857144e-06, |
| "loss": 17.7826, |
| "step": 24400 |
| }, |
| { |
| "epoch": 1447.0597014925372, |
| "grad_norm": 236.9201202392578, |
| "learning_rate": 7.196883116883118e-06, |
| "loss": 18.7341, |
| "step": 24600 |
| }, |
| { |
| "epoch": 1458.8358208955224, |
| "grad_norm": 269.235595703125, |
| "learning_rate": 7.171038961038962e-06, |
| "loss": 16.5044, |
| "step": 24800 |
| }, |
| { |
| "epoch": 1470.597014925373, |
| "grad_norm": 224.2851104736328, |
| "learning_rate": 7.145064935064936e-06, |
| "loss": 17.9869, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1482.358208955224, |
| "grad_norm": 424.98834228515625, |
| "learning_rate": 7.11909090909091e-06, |
| "loss": 17.7338, |
| "step": 25200 |
| }, |
| { |
| "epoch": 1494.1194029850747, |
| "grad_norm": 244.76785278320312, |
| "learning_rate": 7.093116883116884e-06, |
| "loss": 15.3534, |
| "step": 25400 |
| }, |
| { |
| "epoch": 1505.8955223880596, |
| "grad_norm": 89.87413787841797, |
| "learning_rate": 7.067142857142858e-06, |
| "loss": 17.2717, |
| "step": 25600 |
| }, |
| { |
| "epoch": 1517.6567164179105, |
| "grad_norm": 278.63165283203125, |
| "learning_rate": 7.041168831168832e-06, |
| "loss": 17.7088, |
| "step": 25800 |
| }, |
| { |
| "epoch": 1529.4179104477612, |
| "grad_norm": 257.7251281738281, |
| "learning_rate": 7.015194805194806e-06, |
| "loss": 16.9006, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1541.1791044776119, |
| "grad_norm": 352.7020263671875, |
| "learning_rate": 6.98935064935065e-06, |
| "loss": 17.136, |
| "step": 26200 |
| }, |
| { |
| "epoch": 1552.955223880597, |
| "grad_norm": 124.77161407470703, |
| "learning_rate": 6.9633766233766244e-06, |
| "loss": 17.4147, |
| "step": 26400 |
| }, |
| { |
| "epoch": 1564.7164179104477, |
| "grad_norm": 321.99993896484375, |
| "learning_rate": 6.937402597402598e-06, |
| "loss": 15.1883, |
| "step": 26600 |
| }, |
| { |
| "epoch": 1576.4776119402984, |
| "grad_norm": 211.46424865722656, |
| "learning_rate": 6.911428571428572e-06, |
| "loss": 14.7355, |
| "step": 26800 |
| }, |
| { |
| "epoch": 1588.2388059701493, |
| "grad_norm": 147.95535278320312, |
| "learning_rate": 6.885454545454546e-06, |
| "loss": 15.0797, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1600.0, |
| "grad_norm": 359.3253173828125, |
| "learning_rate": 6.85948051948052e-06, |
| "loss": 15.7509, |
| "step": 27200 |
| }, |
| { |
| "epoch": 1611.7761194029852, |
| "grad_norm": 360.4601135253906, |
| "learning_rate": 6.833506493506494e-06, |
| "loss": 16.636, |
| "step": 27400 |
| }, |
| { |
| "epoch": 1623.5373134328358, |
| "grad_norm": 217.2965545654297, |
| "learning_rate": 6.807532467532468e-06, |
| "loss": 15.6866, |
| "step": 27600 |
| }, |
| { |
| "epoch": 1635.2985074626865, |
| "grad_norm": 220.64784240722656, |
| "learning_rate": 6.781558441558442e-06, |
| "loss": 15.5503, |
| "step": 27800 |
| }, |
| { |
| "epoch": 1647.0597014925372, |
| "grad_norm": 258.07672119140625, |
| "learning_rate": 6.755584415584416e-06, |
| "loss": 14.64, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1658.8358208955224, |
| "grad_norm": 255.86795043945312, |
| "learning_rate": 6.72961038961039e-06, |
| "loss": 14.2442, |
| "step": 28200 |
| }, |
| { |
| "epoch": 1670.597014925373, |
| "grad_norm": 183.0586395263672, |
| "learning_rate": 6.703636363636364e-06, |
| "loss": 14.277, |
| "step": 28400 |
| }, |
| { |
| "epoch": 1682.358208955224, |
| "grad_norm": 133.0940399169922, |
| "learning_rate": 6.677662337662339e-06, |
| "loss": 14.0652, |
| "step": 28600 |
| }, |
| { |
| "epoch": 1694.1194029850747, |
| "grad_norm": 292.6916198730469, |
| "learning_rate": 6.651688311688312e-06, |
| "loss": 13.645, |
| "step": 28800 |
| }, |
| { |
| "epoch": 1705.8955223880596, |
| "grad_norm": 305.353515625, |
| "learning_rate": 6.625714285714287e-06, |
| "loss": 14.4846, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1717.6567164179105, |
| "grad_norm": 242.2213592529297, |
| "learning_rate": 6.59974025974026e-06, |
| "loss": 14.0155, |
| "step": 29200 |
| }, |
| { |
| "epoch": 1729.4179104477612, |
| "grad_norm": 309.6208801269531, |
| "learning_rate": 6.573766233766235e-06, |
| "loss": 13.813, |
| "step": 29400 |
| }, |
| { |
| "epoch": 1741.1791044776119, |
| "grad_norm": 295.8955078125, |
| "learning_rate": 6.547792207792208e-06, |
| "loss": 14.3597, |
| "step": 29600 |
| }, |
| { |
| "epoch": 1752.955223880597, |
| "grad_norm": 221.8771514892578, |
| "learning_rate": 6.521818181818183e-06, |
| "loss": 14.1165, |
| "step": 29800 |
| }, |
| { |
| "epoch": 1764.7164179104477, |
| "grad_norm": 276.3559875488281, |
| "learning_rate": 6.495844155844156e-06, |
| "loss": 12.6467, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1776.4776119402984, |
| "grad_norm": 242.14724731445312, |
| "learning_rate": 6.469870129870131e-06, |
| "loss": 13.3587, |
| "step": 30200 |
| }, |
| { |
| "epoch": 1788.2388059701493, |
| "grad_norm": 170.22076416015625, |
| "learning_rate": 6.443896103896104e-06, |
| "loss": 13.2378, |
| "step": 30400 |
| }, |
| { |
| "epoch": 1800.0, |
| "grad_norm": 164.98995971679688, |
| "learning_rate": 6.417922077922079e-06, |
| "loss": 12.556, |
| "step": 30600 |
| }, |
| { |
| "epoch": 1811.7761194029852, |
| "grad_norm": 355.3738098144531, |
| "learning_rate": 6.391948051948052e-06, |
| "loss": 11.3157, |
| "step": 30800 |
| }, |
| { |
| "epoch": 1823.5373134328358, |
| "grad_norm": 220.3183135986328, |
| "learning_rate": 6.365974025974027e-06, |
| "loss": 13.424, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1835.2985074626865, |
| "grad_norm": 261.7161865234375, |
| "learning_rate": 6.34e-06, |
| "loss": 11.4347, |
| "step": 31200 |
| }, |
| { |
| "epoch": 1847.0597014925372, |
| "grad_norm": 155.91165161132812, |
| "learning_rate": 6.314025974025975e-06, |
| "loss": 10.8361, |
| "step": 31400 |
| }, |
| { |
| "epoch": 1858.8358208955224, |
| "grad_norm": 93.3238754272461, |
| "learning_rate": 6.288051948051948e-06, |
| "loss": 12.3992, |
| "step": 31600 |
| }, |
| { |
| "epoch": 1870.597014925373, |
| "grad_norm": 249.3748016357422, |
| "learning_rate": 6.262077922077923e-06, |
| "loss": 11.965, |
| "step": 31800 |
| }, |
| { |
| "epoch": 1882.358208955224, |
| "grad_norm": 242.0140838623047, |
| "learning_rate": 6.236103896103896e-06, |
| "loss": 11.4908, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1894.1194029850747, |
| "grad_norm": 463.1481018066406, |
| "learning_rate": 6.210129870129871e-06, |
| "loss": 12.7511, |
| "step": 32200 |
| }, |
| { |
| "epoch": 1905.8955223880596, |
| "grad_norm": 265.88739013671875, |
| "learning_rate": 6.184155844155845e-06, |
| "loss": 11.0652, |
| "step": 32400 |
| }, |
| { |
| "epoch": 1917.6567164179105, |
| "grad_norm": 167.36598205566406, |
| "learning_rate": 6.1584415584415595e-06, |
| "loss": 10.4137, |
| "step": 32600 |
| }, |
| { |
| "epoch": 1929.4179104477612, |
| "grad_norm": 173.92787170410156, |
| "learning_rate": 6.132467532467533e-06, |
| "loss": 10.7277, |
| "step": 32800 |
| }, |
| { |
| "epoch": 1941.1791044776119, |
| "grad_norm": 294.4163818359375, |
| "learning_rate": 6.1064935064935075e-06, |
| "loss": 11.8032, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1952.955223880597, |
| "grad_norm": 243.70274353027344, |
| "learning_rate": 6.080519480519481e-06, |
| "loss": 10.3227, |
| "step": 33200 |
| }, |
| { |
| "epoch": 1964.7164179104477, |
| "grad_norm": 120.62760162353516, |
| "learning_rate": 6.0545454545454555e-06, |
| "loss": 10.37, |
| "step": 33400 |
| }, |
| { |
| "epoch": 1976.4776119402984, |
| "grad_norm": 130.5196075439453, |
| "learning_rate": 6.028571428571429e-06, |
| "loss": 11.8498, |
| "step": 33600 |
| }, |
| { |
| "epoch": 1988.2388059701493, |
| "grad_norm": 254.48614501953125, |
| "learning_rate": 6.0025974025974035e-06, |
| "loss": 10.7649, |
| "step": 33800 |
| }, |
| { |
| "epoch": 2000.0, |
| "grad_norm": 292.97216796875, |
| "learning_rate": 5.976623376623377e-06, |
| "loss": 11.4211, |
| "step": 34000 |
| }, |
| { |
| "epoch": 2011.7761194029852, |
| "grad_norm": 497.4891052246094, |
| "learning_rate": 5.9506493506493515e-06, |
| "loss": 11.9636, |
| "step": 34200 |
| }, |
| { |
| "epoch": 2023.5373134328358, |
| "grad_norm": 316.7022705078125, |
| "learning_rate": 5.924675324675325e-06, |
| "loss": 10.3258, |
| "step": 34400 |
| }, |
| { |
| "epoch": 2035.2985074626865, |
| "grad_norm": 263.2421569824219, |
| "learning_rate": 5.8987012987012994e-06, |
| "loss": 11.0163, |
| "step": 34600 |
| }, |
| { |
| "epoch": 2047.0597014925372, |
| "grad_norm": 149.36387634277344, |
| "learning_rate": 5.872727272727273e-06, |
| "loss": 9.7479, |
| "step": 34800 |
| }, |
| { |
| "epoch": 2058.8358208955224, |
| "grad_norm": 294.4871826171875, |
| "learning_rate": 5.8467532467532474e-06, |
| "loss": 10.7836, |
| "step": 35000 |
| }, |
| { |
| "epoch": 2070.597014925373, |
| "grad_norm": 311.5248107910156, |
| "learning_rate": 5.820779220779221e-06, |
| "loss": 10.6797, |
| "step": 35200 |
| }, |
| { |
| "epoch": 2082.3582089552237, |
| "grad_norm": 136.69500732421875, |
| "learning_rate": 5.7948051948051954e-06, |
| "loss": 10.9252, |
| "step": 35400 |
| }, |
| { |
| "epoch": 2094.1194029850744, |
| "grad_norm": 293.40399169921875, |
| "learning_rate": 5.768831168831169e-06, |
| "loss": 9.608, |
| "step": 35600 |
| }, |
| { |
| "epoch": 2105.89552238806, |
| "grad_norm": 232.2513427734375, |
| "learning_rate": 5.742987012987013e-06, |
| "loss": 10.0404, |
| "step": 35800 |
| }, |
| { |
| "epoch": 2117.6567164179105, |
| "grad_norm": 178.5306396484375, |
| "learning_rate": 5.717012987012988e-06, |
| "loss": 9.612, |
| "step": 36000 |
| }, |
| { |
| "epoch": 2129.417910447761, |
| "grad_norm": 323.44146728515625, |
| "learning_rate": 5.691168831168831e-06, |
| "loss": 8.6739, |
| "step": 36200 |
| }, |
| { |
| "epoch": 2141.179104477612, |
| "grad_norm": 101.05858612060547, |
| "learning_rate": 5.665194805194806e-06, |
| "loss": 9.2302, |
| "step": 36400 |
| }, |
| { |
| "epoch": 2152.955223880597, |
| "grad_norm": 164.43310546875, |
| "learning_rate": 5.63922077922078e-06, |
| "loss": 9.1987, |
| "step": 36600 |
| }, |
| { |
| "epoch": 2164.716417910448, |
| "grad_norm": 182.63916015625, |
| "learning_rate": 5.613246753246754e-06, |
| "loss": 9.7936, |
| "step": 36800 |
| }, |
| { |
| "epoch": 2176.4776119402986, |
| "grad_norm": 201.4703369140625, |
| "learning_rate": 5.587272727272728e-06, |
| "loss": 9.6401, |
| "step": 37000 |
| }, |
| { |
| "epoch": 2188.2388059701493, |
| "grad_norm": 151.9721221923828, |
| "learning_rate": 5.561298701298702e-06, |
| "loss": 10.4346, |
| "step": 37200 |
| }, |
| { |
| "epoch": 2200.0, |
| "grad_norm": 244.78732299804688, |
| "learning_rate": 5.535324675324676e-06, |
| "loss": 10.2022, |
| "step": 37400 |
| }, |
| { |
| "epoch": 2211.776119402985, |
| "grad_norm": 233.7654266357422, |
| "learning_rate": 5.50935064935065e-06, |
| "loss": 10.1548, |
| "step": 37600 |
| }, |
| { |
| "epoch": 2223.5373134328356, |
| "grad_norm": 188.21788024902344, |
| "learning_rate": 5.483376623376624e-06, |
| "loss": 9.9679, |
| "step": 37800 |
| }, |
| { |
| "epoch": 2235.2985074626868, |
| "grad_norm": 285.9569091796875, |
| "learning_rate": 5.457402597402598e-06, |
| "loss": 9.8509, |
| "step": 38000 |
| }, |
| { |
| "epoch": 2247.0597014925374, |
| "grad_norm": 264.8778991699219, |
| "learning_rate": 5.431428571428572e-06, |
| "loss": 9.9817, |
| "step": 38200 |
| }, |
| { |
| "epoch": 2258.8358208955224, |
| "grad_norm": 182.24191284179688, |
| "learning_rate": 5.405454545454546e-06, |
| "loss": 9.4971, |
| "step": 38400 |
| }, |
| { |
| "epoch": 2270.597014925373, |
| "grad_norm": 219.8632354736328, |
| "learning_rate": 5.37948051948052e-06, |
| "loss": 8.164, |
| "step": 38600 |
| }, |
| { |
| "epoch": 2282.3582089552237, |
| "grad_norm": 292.8862609863281, |
| "learning_rate": 5.353506493506494e-06, |
| "loss": 8.4305, |
| "step": 38800 |
| }, |
| { |
| "epoch": 2294.1194029850744, |
| "grad_norm": 243.6376190185547, |
| "learning_rate": 5.327532467532468e-06, |
| "loss": 10.2729, |
| "step": 39000 |
| }, |
| { |
| "epoch": 2305.89552238806, |
| "grad_norm": 120.5704574584961, |
| "learning_rate": 5.301558441558442e-06, |
| "loss": 8.4079, |
| "step": 39200 |
| }, |
| { |
| "epoch": 2317.6567164179105, |
| "grad_norm": 333.58612060546875, |
| "learning_rate": 5.275584415584416e-06, |
| "loss": 8.1809, |
| "step": 39400 |
| }, |
| { |
| "epoch": 2329.417910447761, |
| "grad_norm": 77.8395004272461, |
| "learning_rate": 5.24961038961039e-06, |
| "loss": 9.0445, |
| "step": 39600 |
| }, |
| { |
| "epoch": 2341.179104477612, |
| "grad_norm": 172.45359802246094, |
| "learning_rate": 5.223636363636364e-06, |
| "loss": 9.6053, |
| "step": 39800 |
| }, |
| { |
| "epoch": 2352.955223880597, |
| "grad_norm": 195.2128143310547, |
| "learning_rate": 5.197662337662338e-06, |
| "loss": 8.1167, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2364.716417910448, |
| "grad_norm": 154.22743225097656, |
| "learning_rate": 5.171688311688312e-06, |
| "loss": 7.8072, |
| "step": 40200 |
| }, |
| { |
| "epoch": 2376.4776119402986, |
| "grad_norm": 83.33777618408203, |
| "learning_rate": 5.145714285714286e-06, |
| "loss": 8.0217, |
| "step": 40400 |
| }, |
| { |
| "epoch": 2388.2388059701493, |
| "grad_norm": 102.02108764648438, |
| "learning_rate": 5.11974025974026e-06, |
| "loss": 8.8543, |
| "step": 40600 |
| }, |
| { |
| "epoch": 2400.0, |
| "grad_norm": 68.74557495117188, |
| "learning_rate": 5.0937662337662345e-06, |
| "loss": 8.7576, |
| "step": 40800 |
| }, |
| { |
| "epoch": 2411.776119402985, |
| "grad_norm": 251.68568420410156, |
| "learning_rate": 5.067792207792208e-06, |
| "loss": 8.4178, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2423.5373134328356, |
| "grad_norm": 176.3686065673828, |
| "learning_rate": 5.0418181818181825e-06, |
| "loss": 7.5952, |
| "step": 41200 |
| }, |
| { |
| "epoch": 2435.2985074626868, |
| "grad_norm": 355.08428955078125, |
| "learning_rate": 5.015974025974026e-06, |
| "loss": 7.7425, |
| "step": 41400 |
| }, |
| { |
| "epoch": 2447.0597014925374, |
| "grad_norm": 114.10618591308594, |
| "learning_rate": 4.9900000000000005e-06, |
| "loss": 9.2298, |
| "step": 41600 |
| }, |
| { |
| "epoch": 2458.8358208955224, |
| "grad_norm": 85.14331817626953, |
| "learning_rate": 4.964025974025974e-06, |
| "loss": 8.2393, |
| "step": 41800 |
| }, |
| { |
| "epoch": 2470.597014925373, |
| "grad_norm": 223.14291381835938, |
| "learning_rate": 4.9380519480519485e-06, |
| "loss": 9.5785, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2482.3582089552237, |
| "grad_norm": 156.1779022216797, |
| "learning_rate": 4.912077922077922e-06, |
| "loss": 8.3694, |
| "step": 42200 |
| }, |
| { |
| "epoch": 2494.1194029850744, |
| "grad_norm": 163.2755126953125, |
| "learning_rate": 4.8862337662337665e-06, |
| "loss": 8.2274, |
| "step": 42400 |
| }, |
| { |
| "epoch": 2505.89552238806, |
| "grad_norm": 448.78741455078125, |
| "learning_rate": 4.860259740259741e-06, |
| "loss": 7.7257, |
| "step": 42600 |
| }, |
| { |
| "epoch": 2517.6567164179105, |
| "grad_norm": 224.7237548828125, |
| "learning_rate": 4.8342857142857145e-06, |
| "loss": 7.8661, |
| "step": 42800 |
| }, |
| { |
| "epoch": 2529.417910447761, |
| "grad_norm": 218.48426818847656, |
| "learning_rate": 4.808311688311689e-06, |
| "loss": 8.1052, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2541.179104477612, |
| "grad_norm": 285.8219909667969, |
| "learning_rate": 4.7823376623376625e-06, |
| "loss": 7.2237, |
| "step": 43200 |
| }, |
| { |
| "epoch": 2552.955223880597, |
| "grad_norm": 121.75675964355469, |
| "learning_rate": 4.756363636363637e-06, |
| "loss": 7.3067, |
| "step": 43400 |
| }, |
| { |
| "epoch": 2564.716417910448, |
| "grad_norm": 441.2803039550781, |
| "learning_rate": 4.7303896103896104e-06, |
| "loss": 7.4268, |
| "step": 43600 |
| }, |
| { |
| "epoch": 2576.4776119402986, |
| "grad_norm": 250.1401824951172, |
| "learning_rate": 4.704415584415585e-06, |
| "loss": 7.4162, |
| "step": 43800 |
| }, |
| { |
| "epoch": 2588.2388059701493, |
| "grad_norm": 225.75238037109375, |
| "learning_rate": 4.6784415584415584e-06, |
| "loss": 7.9644, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2600.0, |
| "grad_norm": 199.13861083984375, |
| "learning_rate": 4.652597402597403e-06, |
| "loss": 8.048, |
| "step": 44200 |
| }, |
| { |
| "epoch": 2611.776119402985, |
| "grad_norm": 128.95506286621094, |
| "learning_rate": 4.626623376623377e-06, |
| "loss": 6.9158, |
| "step": 44400 |
| }, |
| { |
| "epoch": 2623.5373134328356, |
| "grad_norm": 365.9570007324219, |
| "learning_rate": 4.600649350649351e-06, |
| "loss": 6.4058, |
| "step": 44600 |
| }, |
| { |
| "epoch": 2635.2985074626868, |
| "grad_norm": 114.73458862304688, |
| "learning_rate": 4.574675324675325e-06, |
| "loss": 8.2035, |
| "step": 44800 |
| }, |
| { |
| "epoch": 2647.0597014925374, |
| "grad_norm": 158.98861694335938, |
| "learning_rate": 4.548701298701299e-06, |
| "loss": 6.3892, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2658.8358208955224, |
| "grad_norm": 203.47166442871094, |
| "learning_rate": 4.522727272727273e-06, |
| "loss": 7.2696, |
| "step": 45200 |
| }, |
| { |
| "epoch": 2670.597014925373, |
| "grad_norm": 231.44822692871094, |
| "learning_rate": 4.496753246753247e-06, |
| "loss": 7.5676, |
| "step": 45400 |
| }, |
| { |
| "epoch": 2682.3582089552237, |
| "grad_norm": 247.14039611816406, |
| "learning_rate": 4.470779220779221e-06, |
| "loss": 6.668, |
| "step": 45600 |
| }, |
| { |
| "epoch": 2694.1194029850744, |
| "grad_norm": 170.46774291992188, |
| "learning_rate": 4.444805194805195e-06, |
| "loss": 6.9474, |
| "step": 45800 |
| }, |
| { |
| "epoch": 2705.89552238806, |
| "grad_norm": 191.91915893554688, |
| "learning_rate": 4.418831168831169e-06, |
| "loss": 6.3278, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2717.6567164179105, |
| "grad_norm": 105.89759826660156, |
| "learning_rate": 4.392857142857143e-06, |
| "loss": 7.3746, |
| "step": 46200 |
| }, |
| { |
| "epoch": 2729.417910447761, |
| "grad_norm": 86.0598373413086, |
| "learning_rate": 4.366883116883117e-06, |
| "loss": 7.079, |
| "step": 46400 |
| }, |
| { |
| "epoch": 2741.179104477612, |
| "grad_norm": 272.9900207519531, |
| "learning_rate": 4.340909090909091e-06, |
| "loss": 6.9619, |
| "step": 46600 |
| }, |
| { |
| "epoch": 2752.955223880597, |
| "grad_norm": 113.84228515625, |
| "learning_rate": 4.314935064935065e-06, |
| "loss": 6.7595, |
| "step": 46800 |
| }, |
| { |
| "epoch": 2764.716417910448, |
| "grad_norm": 464.052734375, |
| "learning_rate": 4.28896103896104e-06, |
| "loss": 6.9025, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2776.4776119402986, |
| "grad_norm": 73.1541519165039, |
| "learning_rate": 4.262987012987013e-06, |
| "loss": 6.753, |
| "step": 47200 |
| }, |
| { |
| "epoch": 2788.2388059701493, |
| "grad_norm": 42.5888786315918, |
| "learning_rate": 4.237012987012988e-06, |
| "loss": 6.2469, |
| "step": 47400 |
| }, |
| { |
| "epoch": 2800.0, |
| "grad_norm": 87.32052612304688, |
| "learning_rate": 4.211038961038961e-06, |
| "loss": 7.9924, |
| "step": 47600 |
| }, |
| { |
| "epoch": 2811.776119402985, |
| "grad_norm": 324.5746154785156, |
| "learning_rate": 4.185064935064936e-06, |
| "loss": 5.9469, |
| "step": 47800 |
| }, |
| { |
| "epoch": 2823.5373134328356, |
| "grad_norm": 51.54016876220703, |
| "learning_rate": 4.159090909090909e-06, |
| "loss": 7.4489, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2835.2985074626868, |
| "grad_norm": 176.7570343017578, |
| "learning_rate": 4.1331168831168836e-06, |
| "loss": 6.7529, |
| "step": 48200 |
| }, |
| { |
| "epoch": 2847.0597014925374, |
| "grad_norm": 107.7917709350586, |
| "learning_rate": 4.107142857142857e-06, |
| "loss": 6.1709, |
| "step": 48400 |
| }, |
| { |
| "epoch": 2858.8358208955224, |
| "grad_norm": 218.55908203125, |
| "learning_rate": 4.0812987012987016e-06, |
| "loss": 6.4473, |
| "step": 48600 |
| }, |
| { |
| "epoch": 2870.597014925373, |
| "grad_norm": 317.35137939453125, |
| "learning_rate": 4.055324675324675e-06, |
| "loss": 7.2964, |
| "step": 48800 |
| }, |
| { |
| "epoch": 2882.3582089552237, |
| "grad_norm": 407.064697265625, |
| "learning_rate": 4.0293506493506495e-06, |
| "loss": 6.5469, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2894.1194029850744, |
| "grad_norm": 89.26599884033203, |
| "learning_rate": 4.003376623376624e-06, |
| "loss": 6.4736, |
| "step": 49200 |
| }, |
| { |
| "epoch": 2905.89552238806, |
| "grad_norm": 25.349979400634766, |
| "learning_rate": 3.9774025974025975e-06, |
| "loss": 6.6709, |
| "step": 49400 |
| }, |
| { |
| "epoch": 2917.6567164179105, |
| "grad_norm": 152.76893615722656, |
| "learning_rate": 3.951428571428572e-06, |
| "loss": 6.769, |
| "step": 49600 |
| }, |
| { |
| "epoch": 2929.417910447761, |
| "grad_norm": 369.66729736328125, |
| "learning_rate": 3.9254545454545455e-06, |
| "loss": 6.8757, |
| "step": 49800 |
| }, |
| { |
| "epoch": 2941.179104477612, |
| "grad_norm": 163.01708984375, |
| "learning_rate": 3.89948051948052e-06, |
| "loss": 5.9855, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2952.955223880597, |
| "grad_norm": 359.50164794921875, |
| "learning_rate": 3.8735064935064935e-06, |
| "loss": 6.7189, |
| "step": 50200 |
| }, |
| { |
| "epoch": 2964.716417910448, |
| "grad_norm": 135.31906127929688, |
| "learning_rate": 3.847532467532468e-06, |
| "loss": 6.1798, |
| "step": 50400 |
| }, |
| { |
| "epoch": 2976.4776119402986, |
| "grad_norm": 77.5290756225586, |
| "learning_rate": 3.8215584415584415e-06, |
| "loss": 5.9281, |
| "step": 50600 |
| }, |
| { |
| "epoch": 2988.2388059701493, |
| "grad_norm": 103.02841186523438, |
| "learning_rate": 3.7955844155844155e-06, |
| "loss": 6.068, |
| "step": 50800 |
| }, |
| { |
| "epoch": 3000.0, |
| "grad_norm": 348.920166015625, |
| "learning_rate": 3.76961038961039e-06, |
| "loss": 5.7779, |
| "step": 51000 |
| }, |
| { |
| "epoch": 3011.776119402985, |
| "grad_norm": 356.079345703125, |
| "learning_rate": 3.743636363636364e-06, |
| "loss": 6.8131, |
| "step": 51200 |
| }, |
| { |
| "epoch": 3023.5373134328356, |
| "grad_norm": 201.86148071289062, |
| "learning_rate": 3.717662337662338e-06, |
| "loss": 6.2735, |
| "step": 51400 |
| }, |
| { |
| "epoch": 3035.2985074626868, |
| "grad_norm": 181.3549041748047, |
| "learning_rate": 3.691688311688312e-06, |
| "loss": 6.2642, |
| "step": 51600 |
| }, |
| { |
| "epoch": 3047.0597014925374, |
| "grad_norm": 158.26319885253906, |
| "learning_rate": 3.665714285714286e-06, |
| "loss": 7.6419, |
| "step": 51800 |
| }, |
| { |
| "epoch": 3058.8358208955224, |
| "grad_norm": 417.2932434082031, |
| "learning_rate": 3.63974025974026e-06, |
| "loss": 5.9062, |
| "step": 52000 |
| }, |
| { |
| "epoch": 3070.597014925373, |
| "grad_norm": 123.00536346435547, |
| "learning_rate": 3.613766233766234e-06, |
| "loss": 6.0384, |
| "step": 52200 |
| }, |
| { |
| "epoch": 3082.3582089552237, |
| "grad_norm": 212.98793029785156, |
| "learning_rate": 3.587792207792208e-06, |
| "loss": 5.6856, |
| "step": 52400 |
| }, |
| { |
| "epoch": 3094.1194029850744, |
| "grad_norm": 232.1907501220703, |
| "learning_rate": 3.561818181818182e-06, |
| "loss": 6.196, |
| "step": 52600 |
| }, |
| { |
| "epoch": 3105.89552238806, |
| "grad_norm": 40.089229583740234, |
| "learning_rate": 3.535844155844156e-06, |
| "loss": 5.7956, |
| "step": 52800 |
| }, |
| { |
| "epoch": 3117.6567164179105, |
| "grad_norm": 286.09320068359375, |
| "learning_rate": 3.5100000000000003e-06, |
| "loss": 6.1736, |
| "step": 53000 |
| }, |
| { |
| "epoch": 3129.417910447761, |
| "grad_norm": 247.46432495117188, |
| "learning_rate": 3.4840259740259743e-06, |
| "loss": 5.9662, |
| "step": 53200 |
| }, |
| { |
| "epoch": 3141.179104477612, |
| "grad_norm": 175.70138549804688, |
| "learning_rate": 3.4580519480519483e-06, |
| "loss": 5.5742, |
| "step": 53400 |
| }, |
| { |
| "epoch": 3152.955223880597, |
| "grad_norm": 244.6918487548828, |
| "learning_rate": 3.4320779220779223e-06, |
| "loss": 5.4242, |
| "step": 53600 |
| }, |
| { |
| "epoch": 3164.716417910448, |
| "grad_norm": 290.1295166015625, |
| "learning_rate": 3.4061038961038963e-06, |
| "loss": 5.9908, |
| "step": 53800 |
| }, |
| { |
| "epoch": 3176.4776119402986, |
| "grad_norm": 191.1642303466797, |
| "learning_rate": 3.3801298701298702e-06, |
| "loss": 6.4687, |
| "step": 54000 |
| }, |
| { |
| "epoch": 3188.2388059701493, |
| "grad_norm": 230.18724060058594, |
| "learning_rate": 3.3541558441558442e-06, |
| "loss": 5.8824, |
| "step": 54200 |
| }, |
| { |
| "epoch": 3200.0, |
| "grad_norm": 189.81280517578125, |
| "learning_rate": 3.3281818181818182e-06, |
| "loss": 6.0779, |
| "step": 54400 |
| }, |
| { |
| "epoch": 3211.776119402985, |
| "grad_norm": 139.64651489257812, |
| "learning_rate": 3.3022077922077927e-06, |
| "loss": 6.1833, |
| "step": 54600 |
| }, |
| { |
| "epoch": 3223.5373134328356, |
| "grad_norm": 191.9344024658203, |
| "learning_rate": 3.2762337662337666e-06, |
| "loss": 5.7443, |
| "step": 54800 |
| }, |
| { |
| "epoch": 3235.2985074626868, |
| "grad_norm": 313.61773681640625, |
| "learning_rate": 3.2502597402597406e-06, |
| "loss": 5.4124, |
| "step": 55000 |
| }, |
| { |
| "epoch": 3247.0597014925374, |
| "grad_norm": 139.777099609375, |
| "learning_rate": 3.2242857142857146e-06, |
| "loss": 5.4102, |
| "step": 55200 |
| }, |
| { |
| "epoch": 3258.8358208955224, |
| "grad_norm": 142.17034912109375, |
| "learning_rate": 3.1983116883116886e-06, |
| "loss": 5.2255, |
| "step": 55400 |
| }, |
| { |
| "epoch": 3270.597014925373, |
| "grad_norm": 159.3903350830078, |
| "learning_rate": 3.1724675324675326e-06, |
| "loss": 5.1369, |
| "step": 55600 |
| }, |
| { |
| "epoch": 3282.3582089552237, |
| "grad_norm": 171.2684326171875, |
| "learning_rate": 3.1464935064935066e-06, |
| "loss": 5.5156, |
| "step": 55800 |
| }, |
| { |
| "epoch": 3294.1194029850744, |
| "grad_norm": 177.32545471191406, |
| "learning_rate": 3.1205194805194806e-06, |
| "loss": 6.1047, |
| "step": 56000 |
| }, |
| { |
| "epoch": 3305.89552238806, |
| "grad_norm": 127.48875427246094, |
| "learning_rate": 3.094545454545455e-06, |
| "loss": 5.5533, |
| "step": 56200 |
| }, |
| { |
| "epoch": 3317.6567164179105, |
| "grad_norm": 284.4790954589844, |
| "learning_rate": 3.068571428571429e-06, |
| "loss": 5.096, |
| "step": 56400 |
| }, |
| { |
| "epoch": 3329.417910447761, |
| "grad_norm": 148.5039520263672, |
| "learning_rate": 3.042597402597403e-06, |
| "loss": 5.6225, |
| "step": 56600 |
| }, |
| { |
| "epoch": 3341.179104477612, |
| "grad_norm": 414.6415710449219, |
| "learning_rate": 3.016623376623377e-06, |
| "loss": 6.35, |
| "step": 56800 |
| }, |
| { |
| "epoch": 3352.955223880597, |
| "grad_norm": 62.76802062988281, |
| "learning_rate": 2.990649350649351e-06, |
| "loss": 5.5579, |
| "step": 57000 |
| }, |
| { |
| "epoch": 3364.716417910448, |
| "grad_norm": 31.516719818115234, |
| "learning_rate": 2.964675324675325e-06, |
| "loss": 5.7893, |
| "step": 57200 |
| }, |
| { |
| "epoch": 3376.4776119402986, |
| "grad_norm": 151.5336456298828, |
| "learning_rate": 2.938701298701299e-06, |
| "loss": 6.144, |
| "step": 57400 |
| }, |
| { |
| "epoch": 3388.2388059701493, |
| "grad_norm": 457.56573486328125, |
| "learning_rate": 2.912727272727273e-06, |
| "loss": 5.7772, |
| "step": 57600 |
| }, |
| { |
| "epoch": 3400.0, |
| "grad_norm": 81.55277252197266, |
| "learning_rate": 2.8867532467532474e-06, |
| "loss": 5.0447, |
| "step": 57800 |
| }, |
| { |
| "epoch": 3411.776119402985, |
| "grad_norm": 22.124055862426758, |
| "learning_rate": 2.8607792207792214e-06, |
| "loss": 6.2737, |
| "step": 58000 |
| }, |
| { |
| "epoch": 3423.5373134328356, |
| "grad_norm": 284.54803466796875, |
| "learning_rate": 2.8348051948051954e-06, |
| "loss": 5.392, |
| "step": 58200 |
| }, |
| { |
| "epoch": 3435.2985074626868, |
| "grad_norm": 455.4921569824219, |
| "learning_rate": 2.8088311688311694e-06, |
| "loss": 5.9439, |
| "step": 58400 |
| }, |
| { |
| "epoch": 3447.0597014925374, |
| "grad_norm": 328.2806701660156, |
| "learning_rate": 2.7829870129870134e-06, |
| "loss": 5.8568, |
| "step": 58600 |
| }, |
| { |
| "epoch": 3458.8358208955224, |
| "grad_norm": 186.59173583984375, |
| "learning_rate": 2.7570129870129874e-06, |
| "loss": 5.8562, |
| "step": 58800 |
| }, |
| { |
| "epoch": 3470.597014925373, |
| "grad_norm": 225.7448272705078, |
| "learning_rate": 2.7310389610389614e-06, |
| "loss": 4.7843, |
| "step": 59000 |
| }, |
| { |
| "epoch": 3482.3582089552237, |
| "grad_norm": 453.7283935546875, |
| "learning_rate": 2.7050649350649354e-06, |
| "loss": 5.9007, |
| "step": 59200 |
| }, |
| { |
| "epoch": 3494.1194029850744, |
| "grad_norm": 185.39083862304688, |
| "learning_rate": 2.6790909090909094e-06, |
| "loss": 5.8159, |
| "step": 59400 |
| }, |
| { |
| "epoch": 3505.89552238806, |
| "grad_norm": 95.3510513305664, |
| "learning_rate": 2.6531168831168833e-06, |
| "loss": 5.2905, |
| "step": 59600 |
| }, |
| { |
| "epoch": 3517.6567164179105, |
| "grad_norm": 29.410858154296875, |
| "learning_rate": 2.6271428571428573e-06, |
| "loss": 4.9938, |
| "step": 59800 |
| }, |
| { |
| "epoch": 3529.417910447761, |
| "grad_norm": 87.61227416992188, |
| "learning_rate": 2.6011688311688318e-06, |
| "loss": 5.5285, |
| "step": 60000 |
| }, |
| { |
| "epoch": 3541.179104477612, |
| "grad_norm": 13.053986549377441, |
| "learning_rate": 2.5751948051948058e-06, |
| "loss": 5.7445, |
| "step": 60200 |
| }, |
| { |
| "epoch": 3552.955223880597, |
| "grad_norm": 104.69475555419922, |
| "learning_rate": 2.5493506493506497e-06, |
| "loss": 5.7425, |
| "step": 60400 |
| }, |
| { |
| "epoch": 3564.716417910448, |
| "grad_norm": 206.36949157714844, |
| "learning_rate": 2.5233766233766237e-06, |
| "loss": 5.2375, |
| "step": 60600 |
| }, |
| { |
| "epoch": 3576.4776119402986, |
| "grad_norm": 375.29266357421875, |
| "learning_rate": 2.4974025974025977e-06, |
| "loss": 4.803, |
| "step": 60800 |
| }, |
| { |
| "epoch": 3588.2388059701493, |
| "grad_norm": 55.10752868652344, |
| "learning_rate": 2.4714285714285717e-06, |
| "loss": 4.9683, |
| "step": 61000 |
| }, |
| { |
| "epoch": 3600.0, |
| "grad_norm": 129.48883056640625, |
| "learning_rate": 2.4454545454545457e-06, |
| "loss": 4.7734, |
| "step": 61200 |
| }, |
| { |
| "epoch": 3611.776119402985, |
| "grad_norm": 232.613525390625, |
| "learning_rate": 2.4194805194805197e-06, |
| "loss": 4.9529, |
| "step": 61400 |
| }, |
| { |
| "epoch": 3623.5373134328356, |
| "grad_norm": 119.18389892578125, |
| "learning_rate": 2.3935064935064937e-06, |
| "loss": 4.6153, |
| "step": 61600 |
| }, |
| { |
| "epoch": 3635.2985074626868, |
| "grad_norm": 163.57119750976562, |
| "learning_rate": 2.3675324675324677e-06, |
| "loss": 5.8858, |
| "step": 61800 |
| }, |
| { |
| "epoch": 3647.0597014925374, |
| "grad_norm": 179.8179473876953, |
| "learning_rate": 2.3415584415584417e-06, |
| "loss": 4.7752, |
| "step": 62000 |
| }, |
| { |
| "epoch": 3658.8358208955224, |
| "grad_norm": 476.3257751464844, |
| "learning_rate": 2.3155844155844157e-06, |
| "loss": 5.3414, |
| "step": 62200 |
| }, |
| { |
| "epoch": 3670.597014925373, |
| "grad_norm": 52.362213134765625, |
| "learning_rate": 2.2896103896103897e-06, |
| "loss": 5.1212, |
| "step": 62400 |
| }, |
| { |
| "epoch": 3682.3582089552237, |
| "grad_norm": 146.75779724121094, |
| "learning_rate": 2.2636363636363637e-06, |
| "loss": 5.0903, |
| "step": 62600 |
| }, |
| { |
| "epoch": 3694.1194029850744, |
| "grad_norm": 194.1846160888672, |
| "learning_rate": 2.237662337662338e-06, |
| "loss": 5.1435, |
| "step": 62800 |
| }, |
| { |
| "epoch": 3705.89552238806, |
| "grad_norm": 353.49639892578125, |
| "learning_rate": 2.211688311688312e-06, |
| "loss": 5.3565, |
| "step": 63000 |
| }, |
| { |
| "epoch": 3717.6567164179105, |
| "grad_norm": 170.0306396484375, |
| "learning_rate": 2.185844155844156e-06, |
| "loss": 5.4508, |
| "step": 63200 |
| }, |
| { |
| "epoch": 3729.417910447761, |
| "grad_norm": 105.09954071044922, |
| "learning_rate": 2.15987012987013e-06, |
| "loss": 4.764, |
| "step": 63400 |
| }, |
| { |
| "epoch": 3741.179104477612, |
| "grad_norm": 110.3379135131836, |
| "learning_rate": 2.133896103896104e-06, |
| "loss": 4.7869, |
| "step": 63600 |
| }, |
| { |
| "epoch": 3752.955223880597, |
| "grad_norm": 195.00747680664062, |
| "learning_rate": 2.107922077922078e-06, |
| "loss": 5.006, |
| "step": 63800 |
| }, |
| { |
| "epoch": 3764.716417910448, |
| "grad_norm": 384.640380859375, |
| "learning_rate": 2.081948051948052e-06, |
| "loss": 4.3629, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3776.4776119402986, |
| "grad_norm": 151.75579833984375, |
| "learning_rate": 2.055974025974026e-06, |
| "loss": 5.1427, |
| "step": 64200 |
| }, |
| { |
| "epoch": 3788.2388059701493, |
| "grad_norm": 321.9924621582031, |
| "learning_rate": 2.0300000000000005e-06, |
| "loss": 4.5263, |
| "step": 64400 |
| }, |
| { |
| "epoch": 3800.0, |
| "grad_norm": 71.38465118408203, |
| "learning_rate": 2.0040259740259745e-06, |
| "loss": 5.9976, |
| "step": 64600 |
| }, |
| { |
| "epoch": 3811.776119402985, |
| "grad_norm": 142.11964416503906, |
| "learning_rate": 1.9780519480519485e-06, |
| "loss": 4.5058, |
| "step": 64800 |
| }, |
| { |
| "epoch": 3823.5373134328356, |
| "grad_norm": 255.7351531982422, |
| "learning_rate": 1.9520779220779225e-06, |
| "loss": 5.2054, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3835.2985074626868, |
| "grad_norm": 139.64971923828125, |
| "learning_rate": 1.9261038961038964e-06, |
| "loss": 4.5154, |
| "step": 65200 |
| }, |
| { |
| "epoch": 3847.0597014925374, |
| "grad_norm": 269.8912658691406, |
| "learning_rate": 1.9001298701298704e-06, |
| "loss": 4.7901, |
| "step": 65400 |
| }, |
| { |
| "epoch": 3858.8358208955224, |
| "grad_norm": 143.27041625976562, |
| "learning_rate": 1.8741558441558444e-06, |
| "loss": 5.0364, |
| "step": 65600 |
| }, |
| { |
| "epoch": 3870.597014925373, |
| "grad_norm": 85.00979614257812, |
| "learning_rate": 1.8481818181818184e-06, |
| "loss": 5.7018, |
| "step": 65800 |
| }, |
| { |
| "epoch": 3882.3582089552237, |
| "grad_norm": 23.321130752563477, |
| "learning_rate": 1.8222077922077924e-06, |
| "loss": 5.2634, |
| "step": 66000 |
| }, |
| { |
| "epoch": 3894.1194029850744, |
| "grad_norm": 116.6059799194336, |
| "learning_rate": 1.7963636363636366e-06, |
| "loss": 5.1182, |
| "step": 66200 |
| }, |
| { |
| "epoch": 3905.89552238806, |
| "grad_norm": 250.52786254882812, |
| "learning_rate": 1.7703896103896106e-06, |
| "loss": 4.8144, |
| "step": 66400 |
| }, |
| { |
| "epoch": 3917.6567164179105, |
| "grad_norm": 153.89578247070312, |
| "learning_rate": 1.7444155844155846e-06, |
| "loss": 4.1859, |
| "step": 66600 |
| }, |
| { |
| "epoch": 3929.417910447761, |
| "grad_norm": 377.2282409667969, |
| "learning_rate": 1.7184415584415586e-06, |
| "loss": 4.8385, |
| "step": 66800 |
| }, |
| { |
| "epoch": 3941.179104477612, |
| "grad_norm": 122.95838928222656, |
| "learning_rate": 1.6924675324675326e-06, |
| "loss": 3.9718, |
| "step": 67000 |
| }, |
| { |
| "epoch": 3952.955223880597, |
| "grad_norm": 231.4821014404297, |
| "learning_rate": 1.6664935064935068e-06, |
| "loss": 4.6111, |
| "step": 67200 |
| }, |
| { |
| "epoch": 3964.716417910448, |
| "grad_norm": 237.96945190429688, |
| "learning_rate": 1.6405194805194808e-06, |
| "loss": 5.2436, |
| "step": 67400 |
| }, |
| { |
| "epoch": 3976.4776119402986, |
| "grad_norm": 110.74050903320312, |
| "learning_rate": 1.6145454545454548e-06, |
| "loss": 4.6773, |
| "step": 67600 |
| }, |
| { |
| "epoch": 3988.2388059701493, |
| "grad_norm": 68.52436828613281, |
| "learning_rate": 1.5885714285714288e-06, |
| "loss": 4.4819, |
| "step": 67800 |
| }, |
| { |
| "epoch": 4000.0, |
| "grad_norm": 87.57453155517578, |
| "learning_rate": 1.5625974025974028e-06, |
| "loss": 4.9292, |
| "step": 68000 |
| }, |
| { |
| "epoch": 4011.776119402985, |
| "grad_norm": 288.13714599609375, |
| "learning_rate": 1.5366233766233768e-06, |
| "loss": 5.3013, |
| "step": 68200 |
| }, |
| { |
| "epoch": 4023.5373134328356, |
| "grad_norm": 266.3171081542969, |
| "learning_rate": 1.5106493506493508e-06, |
| "loss": 4.7125, |
| "step": 68400 |
| }, |
| { |
| "epoch": 4035.2985074626868, |
| "grad_norm": 136.16607666015625, |
| "learning_rate": 1.4846753246753248e-06, |
| "loss": 4.4004, |
| "step": 68600 |
| }, |
| { |
| "epoch": 4047.0597014925374, |
| "grad_norm": 199.1755828857422, |
| "learning_rate": 1.4587012987012988e-06, |
| "loss": 5.0152, |
| "step": 68800 |
| }, |
| { |
| "epoch": 4058.8358208955224, |
| "grad_norm": 135.2139434814453, |
| "learning_rate": 1.4327272727272728e-06, |
| "loss": 4.592, |
| "step": 69000 |
| }, |
| { |
| "epoch": 4070.597014925373, |
| "grad_norm": 256.96954345703125, |
| "learning_rate": 1.406753246753247e-06, |
| "loss": 4.1364, |
| "step": 69200 |
| }, |
| { |
| "epoch": 4082.3582089552237, |
| "grad_norm": 151.81385803222656, |
| "learning_rate": 1.380779220779221e-06, |
| "loss": 4.638, |
| "step": 69400 |
| }, |
| { |
| "epoch": 4094.1194029850744, |
| "grad_norm": 140.18310546875, |
| "learning_rate": 1.354805194805195e-06, |
| "loss": 5.1307, |
| "step": 69600 |
| }, |
| { |
| "epoch": 4105.895522388059, |
| "grad_norm": 131.49514770507812, |
| "learning_rate": 1.328831168831169e-06, |
| "loss": 4.4532, |
| "step": 69800 |
| }, |
| { |
| "epoch": 4117.6567164179105, |
| "grad_norm": 86.51961517333984, |
| "learning_rate": 1.302857142857143e-06, |
| "loss": 4.4996, |
| "step": 70000 |
| }, |
| { |
| "epoch": 4129.417910447762, |
| "grad_norm": 232.16627502441406, |
| "learning_rate": 1.2770129870129871e-06, |
| "loss": 5.0817, |
| "step": 70200 |
| }, |
| { |
| "epoch": 4141.179104477612, |
| "grad_norm": 111.115478515625, |
| "learning_rate": 1.2511688311688313e-06, |
| "loss": 4.4479, |
| "step": 70400 |
| }, |
| { |
| "epoch": 4152.955223880597, |
| "grad_norm": 136.0243377685547, |
| "learning_rate": 1.2251948051948053e-06, |
| "loss": 4.9805, |
| "step": 70600 |
| }, |
| { |
| "epoch": 4164.7164179104475, |
| "grad_norm": 240.4090576171875, |
| "learning_rate": 1.1992207792207793e-06, |
| "loss": 5.1758, |
| "step": 70800 |
| }, |
| { |
| "epoch": 4176.477611940299, |
| "grad_norm": 162.47177124023438, |
| "learning_rate": 1.1732467532467533e-06, |
| "loss": 4.4003, |
| "step": 71000 |
| }, |
| { |
| "epoch": 4188.238805970149, |
| "grad_norm": 172.05398559570312, |
| "learning_rate": 1.1472727272727275e-06, |
| "loss": 4.5982, |
| "step": 71200 |
| }, |
| { |
| "epoch": 4200.0, |
| "grad_norm": 115.58950805664062, |
| "learning_rate": 1.1212987012987015e-06, |
| "loss": 4.9562, |
| "step": 71400 |
| }, |
| { |
| "epoch": 4211.776119402985, |
| "grad_norm": 612.0722045898438, |
| "learning_rate": 1.0954545454545455e-06, |
| "loss": 4.5789, |
| "step": 71600 |
| }, |
| { |
| "epoch": 4223.537313432836, |
| "grad_norm": 142.39805603027344, |
| "learning_rate": 1.0694805194805195e-06, |
| "loss": 5.3481, |
| "step": 71800 |
| }, |
| { |
| "epoch": 4235.298507462687, |
| "grad_norm": 151.8544158935547, |
| "learning_rate": 1.0435064935064935e-06, |
| "loss": 4.9694, |
| "step": 72000 |
| }, |
| { |
| "epoch": 4247.059701492537, |
| "grad_norm": 78.40233612060547, |
| "learning_rate": 1.0175324675324675e-06, |
| "loss": 4.6901, |
| "step": 72200 |
| }, |
| { |
| "epoch": 4258.835820895522, |
| "grad_norm": 128.51756286621094, |
| "learning_rate": 9.915584415584415e-07, |
| "loss": 4.44, |
| "step": 72400 |
| }, |
| { |
| "epoch": 4270.5970149253735, |
| "grad_norm": 98.647216796875, |
| "learning_rate": 9.655844155844157e-07, |
| "loss": 4.1811, |
| "step": 72600 |
| }, |
| { |
| "epoch": 4282.358208955224, |
| "grad_norm": 142.39537048339844, |
| "learning_rate": 9.396103896103898e-07, |
| "loss": 3.957, |
| "step": 72800 |
| }, |
| { |
| "epoch": 4294.119402985075, |
| "grad_norm": 172.83412170410156, |
| "learning_rate": 9.136363636363638e-07, |
| "loss": 5.0366, |
| "step": 73000 |
| }, |
| { |
| "epoch": 4305.895522388059, |
| "grad_norm": 323.51953125, |
| "learning_rate": 8.876623376623378e-07, |
| "loss": 4.8918, |
| "step": 73200 |
| }, |
| { |
| "epoch": 4317.6567164179105, |
| "grad_norm": 175.51437377929688, |
| "learning_rate": 8.616883116883118e-07, |
| "loss": 4.8388, |
| "step": 73400 |
| }, |
| { |
| "epoch": 4329.417910447762, |
| "grad_norm": 10.666017532348633, |
| "learning_rate": 8.357142857142858e-07, |
| "loss": 4.7345, |
| "step": 73600 |
| }, |
| { |
| "epoch": 4341.179104477612, |
| "grad_norm": 236.7921142578125, |
| "learning_rate": 8.097402597402599e-07, |
| "loss": 4.5804, |
| "step": 73800 |
| }, |
| { |
| "epoch": 4352.955223880597, |
| "grad_norm": 124.93741607666016, |
| "learning_rate": 7.837662337662339e-07, |
| "loss": 5.0424, |
| "step": 74000 |
| }, |
| { |
| "epoch": 4364.7164179104475, |
| "grad_norm": 321.4673156738281, |
| "learning_rate": 7.577922077922079e-07, |
| "loss": 4.7424, |
| "step": 74200 |
| }, |
| { |
| "epoch": 4376.477611940299, |
| "grad_norm": 566.9447021484375, |
| "learning_rate": 7.318181818181819e-07, |
| "loss": 4.2368, |
| "step": 74400 |
| }, |
| { |
| "epoch": 4388.238805970149, |
| "grad_norm": 170.28578186035156, |
| "learning_rate": 7.058441558441559e-07, |
| "loss": 4.5343, |
| "step": 74600 |
| }, |
| { |
| "epoch": 4400.0, |
| "grad_norm": 45.38258743286133, |
| "learning_rate": 6.7987012987013e-07, |
| "loss": 4.2724, |
| "step": 74800 |
| }, |
| { |
| "epoch": 4411.776119402985, |
| "grad_norm": 207.35653686523438, |
| "learning_rate": 6.538961038961039e-07, |
| "loss": 4.1466, |
| "step": 75000 |
| }, |
| { |
| "epoch": 4423.537313432836, |
| "grad_norm": 195.24972534179688, |
| "learning_rate": 6.279220779220779e-07, |
| "loss": 4.8058, |
| "step": 75200 |
| }, |
| { |
| "epoch": 4435.298507462687, |
| "grad_norm": 21.271228790283203, |
| "learning_rate": 6.01948051948052e-07, |
| "loss": 4.2456, |
| "step": 75400 |
| }, |
| { |
| "epoch": 4447.059701492537, |
| "grad_norm": 128.87979125976562, |
| "learning_rate": 5.75974025974026e-07, |
| "loss": 4.609, |
| "step": 75600 |
| }, |
| { |
| "epoch": 4458.835820895522, |
| "grad_norm": 125.3255844116211, |
| "learning_rate": 5.5e-07, |
| "loss": 3.8383, |
| "step": 75800 |
| }, |
| { |
| "epoch": 4470.5970149253735, |
| "grad_norm": 176.71116638183594, |
| "learning_rate": 5.24025974025974e-07, |
| "loss": 4.3682, |
| "step": 76000 |
| }, |
| { |
| "epoch": 4482.358208955224, |
| "grad_norm": 174.38621520996094, |
| "learning_rate": 4.980519480519481e-07, |
| "loss": 3.9024, |
| "step": 76200 |
| }, |
| { |
| "epoch": 4494.119402985075, |
| "grad_norm": 150.01458740234375, |
| "learning_rate": 4.720779220779221e-07, |
| "loss": 4.1331, |
| "step": 76400 |
| }, |
| { |
| "epoch": 4505.895522388059, |
| "grad_norm": 209.55963134765625, |
| "learning_rate": 4.461038961038961e-07, |
| "loss": 4.5891, |
| "step": 76600 |
| }, |
| { |
| "epoch": 4517.6567164179105, |
| "grad_norm": 332.1000061035156, |
| "learning_rate": 4.201298701298702e-07, |
| "loss": 4.1596, |
| "step": 76800 |
| }, |
| { |
| "epoch": 4529.417910447762, |
| "grad_norm": 267.723388671875, |
| "learning_rate": 3.941558441558442e-07, |
| "loss": 3.6498, |
| "step": 77000 |
| }, |
| { |
| "epoch": 4541.179104477612, |
| "grad_norm": 221.1051483154297, |
| "learning_rate": 3.681818181818182e-07, |
| "loss": 3.9301, |
| "step": 77200 |
| }, |
| { |
| "epoch": 4552.955223880597, |
| "grad_norm": 102.58394622802734, |
| "learning_rate": 3.4220779220779225e-07, |
| "loss": 4.2587, |
| "step": 77400 |
| }, |
| { |
| "epoch": 4564.7164179104475, |
| "grad_norm": 221.03683471679688, |
| "learning_rate": 3.1623376623376624e-07, |
| "loss": 3.8038, |
| "step": 77600 |
| }, |
| { |
| "epoch": 4576.477611940299, |
| "grad_norm": 199.16111755371094, |
| "learning_rate": 2.902597402597403e-07, |
| "loss": 4.4641, |
| "step": 77800 |
| }, |
| { |
| "epoch": 4588.238805970149, |
| "grad_norm": 30.24901580810547, |
| "learning_rate": 2.6428571428571433e-07, |
| "loss": 4.233, |
| "step": 78000 |
| }, |
| { |
| "epoch": 4600.0, |
| "grad_norm": 146.84005737304688, |
| "learning_rate": 2.384415584415585e-07, |
| "loss": 4.2298, |
| "step": 78200 |
| }, |
| { |
| "epoch": 4611.776119402985, |
| "grad_norm": 215.6741180419922, |
| "learning_rate": 2.1246753246753248e-07, |
| "loss": 4.4618, |
| "step": 78400 |
| }, |
| { |
| "epoch": 4623.537313432836, |
| "grad_norm": 318.6520080566406, |
| "learning_rate": 1.864935064935065e-07, |
| "loss": 4.4153, |
| "step": 78600 |
| }, |
| { |
| "epoch": 4635.298507462687, |
| "grad_norm": 80.2079849243164, |
| "learning_rate": 1.6051948051948055e-07, |
| "loss": 4.2413, |
| "step": 78800 |
| }, |
| { |
| "epoch": 4647.059701492537, |
| "grad_norm": 118.82848358154297, |
| "learning_rate": 1.3454545454545457e-07, |
| "loss": 4.164, |
| "step": 79000 |
| }, |
| { |
| "epoch": 4658.835820895522, |
| "grad_norm": 56.74250411987305, |
| "learning_rate": 1.0857142857142857e-07, |
| "loss": 4.7498, |
| "step": 79200 |
| }, |
| { |
| "epoch": 4670.5970149253735, |
| "grad_norm": 164.66262817382812, |
| "learning_rate": 8.259740259740261e-08, |
| "loss": 4.4375, |
| "step": 79400 |
| }, |
| { |
| "epoch": 4682.358208955224, |
| "grad_norm": 267.6045227050781, |
| "learning_rate": 5.662337662337663e-08, |
| "loss": 4.8022, |
| "step": 79600 |
| }, |
| { |
| "epoch": 4694.119402985075, |
| "grad_norm": 136.69151306152344, |
| "learning_rate": 3.0649350649350655e-08, |
| "loss": 3.8398, |
| "step": 79800 |
| }, |
| { |
| "epoch": 4705.895522388059, |
| "grad_norm": 251.12142944335938, |
| "learning_rate": 4.675324675324676e-09, |
| "loss": 4.6735, |
| "step": 80000 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 80000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5000, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.852250804395515e+20, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|