| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 68.4931506849315, |
| "eval_steps": 500, |
| "global_step": 20000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.03424657534246575, |
| "grad_norm": 6.8509087562561035, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.9799, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0684931506849315, |
| "grad_norm": 6.680306911468506, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 1.9562, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10273972602739725, |
| "grad_norm": 4.74023962020874, |
| "learning_rate": 3e-06, |
| "loss": 1.6955, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.136986301369863, |
| "grad_norm": 5.3383073806762695, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 1.7233, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.17123287671232876, |
| "grad_norm": 4.325743675231934, |
| "learning_rate": 5e-06, |
| "loss": 1.4227, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2054794520547945, |
| "grad_norm": 4.414618968963623, |
| "learning_rate": 6e-06, |
| "loss": 1.0638, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.23972602739726026, |
| "grad_norm": 2.4248571395874023, |
| "learning_rate": 7.000000000000001e-06, |
| "loss": 0.7888, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.273972602739726, |
| "grad_norm": 1.6021969318389893, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.535, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3082191780821918, |
| "grad_norm": 0.989554762840271, |
| "learning_rate": 9e-06, |
| "loss": 0.3166, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.3424657534246575, |
| "grad_norm": 0.5975518822669983, |
| "learning_rate": 1e-05, |
| "loss": 0.2056, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3767123287671233, |
| "grad_norm": 0.40622058510780334, |
| "learning_rate": 1.1000000000000001e-05, |
| "loss": 0.1534, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.410958904109589, |
| "grad_norm": 0.2416389435529709, |
| "learning_rate": 1.2e-05, |
| "loss": 0.1101, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4452054794520548, |
| "grad_norm": 0.21470960974693298, |
| "learning_rate": 1.3000000000000001e-05, |
| "loss": 0.0981, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4794520547945205, |
| "grad_norm": 0.1605301946401596, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.0844, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5136986301369864, |
| "grad_norm": 0.16127008199691772, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0787, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.547945205479452, |
| "grad_norm": 0.14680036902427673, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.0747, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5821917808219178, |
| "grad_norm": 0.1104154884815216, |
| "learning_rate": 1.7000000000000003e-05, |
| "loss": 0.0646, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6164383561643836, |
| "grad_norm": 0.12858060002326965, |
| "learning_rate": 1.8e-05, |
| "loss": 0.0585, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6506849315068494, |
| "grad_norm": 0.10955977439880371, |
| "learning_rate": 1.9e-05, |
| "loss": 0.0518, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.684931506849315, |
| "grad_norm": 0.1255006641149521, |
| "learning_rate": 2e-05, |
| "loss": 0.0532, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7191780821917808, |
| "grad_norm": 0.10987284034490585, |
| "learning_rate": 2.1e-05, |
| "loss": 0.0489, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7534246575342466, |
| "grad_norm": 0.12291987985372543, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.0457, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7876712328767124, |
| "grad_norm": 0.11342420428991318, |
| "learning_rate": 2.3000000000000003e-05, |
| "loss": 0.0487, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.821917808219178, |
| "grad_norm": 0.09843714535236359, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0452, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8561643835616438, |
| "grad_norm": 0.10258728265762329, |
| "learning_rate": 2.5e-05, |
| "loss": 0.0391, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8904109589041096, |
| "grad_norm": 0.159352108836174, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.0354, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9246575342465754, |
| "grad_norm": 0.13461743295192719, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.0373, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.958904109589041, |
| "grad_norm": 0.11037391424179077, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0348, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9931506849315068, |
| "grad_norm": 0.1120462641119957, |
| "learning_rate": 2.9e-05, |
| "loss": 0.0361, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0273972602739727, |
| "grad_norm": 0.11409150063991547, |
| "learning_rate": 3e-05, |
| "loss": 0.0317, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0616438356164384, |
| "grad_norm": 0.11472675949335098, |
| "learning_rate": 3.1e-05, |
| "loss": 0.0286, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.095890410958904, |
| "grad_norm": 0.1504867672920227, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0285, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.13013698630137, |
| "grad_norm": 0.14106976985931396, |
| "learning_rate": 3.3e-05, |
| "loss": 0.0293, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.1643835616438356, |
| "grad_norm": 0.09240074455738068, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.026, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.1986301369863013, |
| "grad_norm": 0.09657058119773865, |
| "learning_rate": 3.5e-05, |
| "loss": 0.0254, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.2328767123287672, |
| "grad_norm": 0.11131025850772858, |
| "learning_rate": 3.6e-05, |
| "loss": 0.0298, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.2671232876712328, |
| "grad_norm": 0.09079006314277649, |
| "learning_rate": 3.7e-05, |
| "loss": 0.0231, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3013698630136985, |
| "grad_norm": 0.10090917348861694, |
| "learning_rate": 3.8e-05, |
| "loss": 0.0279, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.3356164383561644, |
| "grad_norm": 0.10513892769813538, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.0245, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.36986301369863, |
| "grad_norm": 0.10360024124383926, |
| "learning_rate": 4e-05, |
| "loss": 0.025, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.404109589041096, |
| "grad_norm": 0.09798359870910645, |
| "learning_rate": 4.1e-05, |
| "loss": 0.0238, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.4383561643835616, |
| "grad_norm": 0.099854975938797, |
| "learning_rate": 4.2e-05, |
| "loss": 0.0229, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.4726027397260273, |
| "grad_norm": 0.14537930488586426, |
| "learning_rate": 4.3e-05, |
| "loss": 0.0251, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.5068493150684932, |
| "grad_norm": 0.13373461365699768, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0229, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.541095890410959, |
| "grad_norm": 0.14300961792469025, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0203, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.5753424657534247, |
| "grad_norm": 0.10283118486404419, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.0186, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.6095890410958904, |
| "grad_norm": 0.1506546288728714, |
| "learning_rate": 4.7e-05, |
| "loss": 0.0189, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.643835616438356, |
| "grad_norm": 0.12016734480857849, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0211, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.678082191780822, |
| "grad_norm": 0.1319936364889145, |
| "learning_rate": 4.9e-05, |
| "loss": 0.0208, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.7123287671232876, |
| "grad_norm": 0.11739251017570496, |
| "learning_rate": 5e-05, |
| "loss": 0.0234, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.7465753424657535, |
| "grad_norm": 0.1280950903892517, |
| "learning_rate": 5.1000000000000006e-05, |
| "loss": 0.0196, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.7808219178082192, |
| "grad_norm": 0.1243286281824112, |
| "learning_rate": 5.2000000000000004e-05, |
| "loss": 0.022, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.8150684931506849, |
| "grad_norm": 0.08275436609983444, |
| "learning_rate": 5.300000000000001e-05, |
| "loss": 0.0185, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.8493150684931505, |
| "grad_norm": 0.1354629397392273, |
| "learning_rate": 5.4000000000000005e-05, |
| "loss": 0.0167, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.8835616438356164, |
| "grad_norm": 0.14976277947425842, |
| "learning_rate": 5.500000000000001e-05, |
| "loss": 0.0187, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.9178082191780823, |
| "grad_norm": 0.15114262700080872, |
| "learning_rate": 5.6000000000000006e-05, |
| "loss": 0.0174, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.952054794520548, |
| "grad_norm": 0.24440935254096985, |
| "learning_rate": 5.6999999999999996e-05, |
| "loss": 0.0189, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.9863013698630136, |
| "grad_norm": 0.11309883743524551, |
| "learning_rate": 5.8e-05, |
| "loss": 0.0189, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.0205479452054793, |
| "grad_norm": 0.14310362935066223, |
| "learning_rate": 5.9e-05, |
| "loss": 0.0202, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.0547945205479454, |
| "grad_norm": 0.15003430843353271, |
| "learning_rate": 6e-05, |
| "loss": 0.0195, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.089041095890411, |
| "grad_norm": 0.0980050340294838, |
| "learning_rate": 6.1e-05, |
| "loss": 0.0153, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.1232876712328768, |
| "grad_norm": 0.1242208257317543, |
| "learning_rate": 6.2e-05, |
| "loss": 0.0167, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.1575342465753424, |
| "grad_norm": 0.11008936911821365, |
| "learning_rate": 6.3e-05, |
| "loss": 0.0156, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.191780821917808, |
| "grad_norm": 0.16221505403518677, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.0189, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.2260273972602738, |
| "grad_norm": 0.12388405948877335, |
| "learning_rate": 6.500000000000001e-05, |
| "loss": 0.0168, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.26027397260274, |
| "grad_norm": 0.125056654214859, |
| "learning_rate": 6.6e-05, |
| "loss": 0.0181, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.2945205479452055, |
| "grad_norm": 0.13683508336544037, |
| "learning_rate": 6.7e-05, |
| "loss": 0.0134, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.328767123287671, |
| "grad_norm": 0.11894214898347855, |
| "learning_rate": 6.800000000000001e-05, |
| "loss": 0.0186, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.363013698630137, |
| "grad_norm": 0.09277641028165817, |
| "learning_rate": 6.9e-05, |
| "loss": 0.0146, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.3972602739726026, |
| "grad_norm": 0.12442376464605331, |
| "learning_rate": 7e-05, |
| "loss": 0.013, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.4315068493150687, |
| "grad_norm": 0.11001459509134293, |
| "learning_rate": 7.1e-05, |
| "loss": 0.0135, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.4657534246575343, |
| "grad_norm": 0.1414063721895218, |
| "learning_rate": 7.2e-05, |
| "loss": 0.0164, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 0.1333468109369278, |
| "learning_rate": 7.3e-05, |
| "loss": 0.0142, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.5342465753424657, |
| "grad_norm": 0.11644615232944489, |
| "learning_rate": 7.4e-05, |
| "loss": 0.0129, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.5684931506849313, |
| "grad_norm": 0.14134648442268372, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 0.0145, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.602739726027397, |
| "grad_norm": 0.1172047108411789, |
| "learning_rate": 7.6e-05, |
| "loss": 0.0145, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.636986301369863, |
| "grad_norm": 0.12389740347862244, |
| "learning_rate": 7.7e-05, |
| "loss": 0.0147, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.671232876712329, |
| "grad_norm": 0.1197102963924408, |
| "learning_rate": 7.800000000000001e-05, |
| "loss": 0.0152, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.7054794520547945, |
| "grad_norm": 0.14396944642066956, |
| "learning_rate": 7.900000000000001e-05, |
| "loss": 0.0148, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.73972602739726, |
| "grad_norm": 0.19851979613304138, |
| "learning_rate": 8e-05, |
| "loss": 0.0145, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.7739726027397262, |
| "grad_norm": 0.11773121356964111, |
| "learning_rate": 8.1e-05, |
| "loss": 0.0122, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.808219178082192, |
| "grad_norm": 0.1518307477235794, |
| "learning_rate": 8.2e-05, |
| "loss": 0.0143, |
| "step": 820 |
| }, |
| { |
| "epoch": 2.8424657534246576, |
| "grad_norm": 0.12196581810712814, |
| "learning_rate": 8.3e-05, |
| "loss": 0.0119, |
| "step": 830 |
| }, |
| { |
| "epoch": 2.8767123287671232, |
| "grad_norm": 0.13978245854377747, |
| "learning_rate": 8.4e-05, |
| "loss": 0.0142, |
| "step": 840 |
| }, |
| { |
| "epoch": 2.910958904109589, |
| "grad_norm": 0.1492251455783844, |
| "learning_rate": 8.5e-05, |
| "loss": 0.0145, |
| "step": 850 |
| }, |
| { |
| "epoch": 2.9452054794520546, |
| "grad_norm": 0.14013636112213135, |
| "learning_rate": 8.6e-05, |
| "loss": 0.0136, |
| "step": 860 |
| }, |
| { |
| "epoch": 2.9794520547945207, |
| "grad_norm": 0.10790041089057922, |
| "learning_rate": 8.7e-05, |
| "loss": 0.0122, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.0136986301369864, |
| "grad_norm": 0.10169006884098053, |
| "learning_rate": 8.800000000000001e-05, |
| "loss": 0.0111, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.047945205479452, |
| "grad_norm": 0.10675999522209167, |
| "learning_rate": 8.900000000000001e-05, |
| "loss": 0.0163, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.0821917808219177, |
| "grad_norm": 0.14085350930690765, |
| "learning_rate": 9e-05, |
| "loss": 0.0145, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.1164383561643834, |
| "grad_norm": 0.14714281260967255, |
| "learning_rate": 9.1e-05, |
| "loss": 0.0127, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.1506849315068495, |
| "grad_norm": 0.17444485425949097, |
| "learning_rate": 9.200000000000001e-05, |
| "loss": 0.0118, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.184931506849315, |
| "grad_norm": 0.11419258266687393, |
| "learning_rate": 9.300000000000001e-05, |
| "loss": 0.0116, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.219178082191781, |
| "grad_norm": 0.112928606569767, |
| "learning_rate": 9.4e-05, |
| "loss": 0.0118, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.2534246575342465, |
| "grad_norm": 0.09506986290216446, |
| "learning_rate": 9.5e-05, |
| "loss": 0.0121, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.287671232876712, |
| "grad_norm": 0.10540028661489487, |
| "learning_rate": 9.6e-05, |
| "loss": 0.0108, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.3219178082191783, |
| "grad_norm": 0.09384490549564362, |
| "learning_rate": 9.7e-05, |
| "loss": 0.0138, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.356164383561644, |
| "grad_norm": 0.09561273455619812, |
| "learning_rate": 9.8e-05, |
| "loss": 0.0111, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.3904109589041096, |
| "grad_norm": 0.10197819769382477, |
| "learning_rate": 9.900000000000001e-05, |
| "loss": 0.0129, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.4246575342465753, |
| "grad_norm": 0.11337673664093018, |
| "learning_rate": 0.0001, |
| "loss": 0.0105, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.458904109589041, |
| "grad_norm": 0.09718764573335648, |
| "learning_rate": 9.999993165095463e-05, |
| "loss": 0.0122, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.493150684931507, |
| "grad_norm": 0.08433857560157776, |
| "learning_rate": 9.999972660400536e-05, |
| "loss": 0.0116, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.5273972602739727, |
| "grad_norm": 0.09182614833116531, |
| "learning_rate": 9.999938485971279e-05, |
| "loss": 0.0122, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.5616438356164384, |
| "grad_norm": 0.10014256834983826, |
| "learning_rate": 9.999890641901125e-05, |
| "loss": 0.0122, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.595890410958904, |
| "grad_norm": 0.1171124204993248, |
| "learning_rate": 9.999829128320874e-05, |
| "loss": 0.0122, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.6301369863013697, |
| "grad_norm": 0.12046120315790176, |
| "learning_rate": 9.999753945398704e-05, |
| "loss": 0.0113, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.6643835616438354, |
| "grad_norm": 0.1236460953950882, |
| "learning_rate": 9.999665093340165e-05, |
| "loss": 0.0129, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.6986301369863015, |
| "grad_norm": 0.0972442626953125, |
| "learning_rate": 9.99956257238817e-05, |
| "loss": 0.0096, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.732876712328767, |
| "grad_norm": 0.12523873150348663, |
| "learning_rate": 9.999446382823013e-05, |
| "loss": 0.0124, |
| "step": 1090 |
| }, |
| { |
| "epoch": 3.767123287671233, |
| "grad_norm": 0.12162143737077713, |
| "learning_rate": 9.999316524962345e-05, |
| "loss": 0.0098, |
| "step": 1100 |
| }, |
| { |
| "epoch": 3.8013698630136985, |
| "grad_norm": 0.12172838300466537, |
| "learning_rate": 9.999172999161198e-05, |
| "loss": 0.0101, |
| "step": 1110 |
| }, |
| { |
| "epoch": 3.8356164383561646, |
| "grad_norm": 0.10995998233556747, |
| "learning_rate": 9.999015805811965e-05, |
| "loss": 0.0112, |
| "step": 1120 |
| }, |
| { |
| "epoch": 3.8698630136986303, |
| "grad_norm": 0.13435988128185272, |
| "learning_rate": 9.998844945344405e-05, |
| "loss": 0.0147, |
| "step": 1130 |
| }, |
| { |
| "epoch": 3.904109589041096, |
| "grad_norm": 0.1312176138162613, |
| "learning_rate": 9.998660418225645e-05, |
| "loss": 0.0125, |
| "step": 1140 |
| }, |
| { |
| "epoch": 3.9383561643835616, |
| "grad_norm": 0.11387414485216141, |
| "learning_rate": 9.998462224960175e-05, |
| "loss": 0.0104, |
| "step": 1150 |
| }, |
| { |
| "epoch": 3.9726027397260273, |
| "grad_norm": 0.1328994333744049, |
| "learning_rate": 9.998250366089848e-05, |
| "loss": 0.0116, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.006849315068493, |
| "grad_norm": 0.0746927261352539, |
| "learning_rate": 9.998024842193876e-05, |
| "loss": 0.0125, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.041095890410959, |
| "grad_norm": 0.09642993658781052, |
| "learning_rate": 9.997785653888835e-05, |
| "loss": 0.0116, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.075342465753424, |
| "grad_norm": 0.10804189741611481, |
| "learning_rate": 9.997532801828658e-05, |
| "loss": 0.0112, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.109589041095891, |
| "grad_norm": 0.096079520881176, |
| "learning_rate": 9.997266286704631e-05, |
| "loss": 0.0114, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.1438356164383565, |
| "grad_norm": 0.10320870578289032, |
| "learning_rate": 9.996986109245395e-05, |
| "loss": 0.0096, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.178082191780822, |
| "grad_norm": 0.11493821442127228, |
| "learning_rate": 9.996692270216947e-05, |
| "loss": 0.011, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.212328767123288, |
| "grad_norm": 0.11878206580877304, |
| "learning_rate": 9.996384770422629e-05, |
| "loss": 0.0118, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.2465753424657535, |
| "grad_norm": 0.07943862676620483, |
| "learning_rate": 9.996063610703137e-05, |
| "loss": 0.0119, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.280821917808219, |
| "grad_norm": 0.09945517033338547, |
| "learning_rate": 9.995728791936504e-05, |
| "loss": 0.0112, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.315068493150685, |
| "grad_norm": 0.10141076892614365, |
| "learning_rate": 9.995380315038119e-05, |
| "loss": 0.0115, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.3493150684931505, |
| "grad_norm": 0.08885636180639267, |
| "learning_rate": 9.9950181809607e-05, |
| "loss": 0.0096, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.383561643835616, |
| "grad_norm": 0.08274652808904648, |
| "learning_rate": 9.994642390694308e-05, |
| "loss": 0.0114, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.417808219178082, |
| "grad_norm": 0.14571622014045715, |
| "learning_rate": 9.99425294526634e-05, |
| "loss": 0.0107, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.4520547945205475, |
| "grad_norm": 0.08620978891849518, |
| "learning_rate": 9.993849845741524e-05, |
| "loss": 0.0097, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.486301369863014, |
| "grad_norm": 0.10268136858940125, |
| "learning_rate": 9.99343309322192e-05, |
| "loss": 0.0095, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.52054794520548, |
| "grad_norm": 0.08594616502523422, |
| "learning_rate": 9.993002688846913e-05, |
| "loss": 0.0103, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.554794520547945, |
| "grad_norm": 0.09446701407432556, |
| "learning_rate": 9.992558633793212e-05, |
| "loss": 0.0096, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.589041095890411, |
| "grad_norm": 0.1126297116279602, |
| "learning_rate": 9.992100929274846e-05, |
| "loss": 0.0096, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.623287671232877, |
| "grad_norm": 0.09884592145681381, |
| "learning_rate": 9.991629576543163e-05, |
| "loss": 0.0102, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.657534246575342, |
| "grad_norm": 0.10322685539722443, |
| "learning_rate": 9.991144576886823e-05, |
| "loss": 0.0108, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.691780821917808, |
| "grad_norm": 0.14780113101005554, |
| "learning_rate": 9.990645931631796e-05, |
| "loss": 0.0104, |
| "step": 1370 |
| }, |
| { |
| "epoch": 4.726027397260274, |
| "grad_norm": 0.11470142006874084, |
| "learning_rate": 9.990133642141359e-05, |
| "loss": 0.0103, |
| "step": 1380 |
| }, |
| { |
| "epoch": 4.760273972602739, |
| "grad_norm": 0.1362563967704773, |
| "learning_rate": 9.989607709816091e-05, |
| "loss": 0.0105, |
| "step": 1390 |
| }, |
| { |
| "epoch": 4.794520547945205, |
| "grad_norm": 0.12485076487064362, |
| "learning_rate": 9.989068136093873e-05, |
| "loss": 0.0108, |
| "step": 1400 |
| }, |
| { |
| "epoch": 4.828767123287671, |
| "grad_norm": 0.13635548949241638, |
| "learning_rate": 9.988514922449879e-05, |
| "loss": 0.0079, |
| "step": 1410 |
| }, |
| { |
| "epoch": 4.863013698630137, |
| "grad_norm": 0.11690958589315414, |
| "learning_rate": 9.987948070396571e-05, |
| "loss": 0.0115, |
| "step": 1420 |
| }, |
| { |
| "epoch": 4.897260273972603, |
| "grad_norm": 0.09252661466598511, |
| "learning_rate": 9.987367581483705e-05, |
| "loss": 0.009, |
| "step": 1430 |
| }, |
| { |
| "epoch": 4.931506849315069, |
| "grad_norm": 0.08612260967493057, |
| "learning_rate": 9.986773457298311e-05, |
| "loss": 0.0089, |
| "step": 1440 |
| }, |
| { |
| "epoch": 4.965753424657534, |
| "grad_norm": 0.12948228418827057, |
| "learning_rate": 9.986165699464705e-05, |
| "loss": 0.0082, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.12411495298147202, |
| "learning_rate": 9.985544309644475e-05, |
| "loss": 0.008, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.034246575342466, |
| "grad_norm": 0.13053423166275024, |
| "learning_rate": 9.984909289536473e-05, |
| "loss": 0.0118, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.068493150684931, |
| "grad_norm": 0.12200204282999039, |
| "learning_rate": 9.984260640876821e-05, |
| "loss": 0.0128, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.102739726027397, |
| "grad_norm": 0.09661053121089935, |
| "learning_rate": 9.983598365438902e-05, |
| "loss": 0.0087, |
| "step": 1490 |
| }, |
| { |
| "epoch": 5.136986301369863, |
| "grad_norm": 0.12682437896728516, |
| "learning_rate": 9.98292246503335e-05, |
| "loss": 0.0097, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.171232876712328, |
| "grad_norm": 0.13681413233280182, |
| "learning_rate": 9.98223294150805e-05, |
| "loss": 0.0103, |
| "step": 1510 |
| }, |
| { |
| "epoch": 5.205479452054795, |
| "grad_norm": 0.08635826408863068, |
| "learning_rate": 9.981529796748134e-05, |
| "loss": 0.0077, |
| "step": 1520 |
| }, |
| { |
| "epoch": 5.239726027397261, |
| "grad_norm": 0.09628362208604813, |
| "learning_rate": 9.980813032675974e-05, |
| "loss": 0.0088, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.273972602739726, |
| "grad_norm": 0.06951496005058289, |
| "learning_rate": 9.980082651251175e-05, |
| "loss": 0.0096, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.308219178082192, |
| "grad_norm": 0.10647785663604736, |
| "learning_rate": 9.979338654470569e-05, |
| "loss": 0.01, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.342465753424658, |
| "grad_norm": 0.08224749565124512, |
| "learning_rate": 9.97858104436822e-05, |
| "loss": 0.01, |
| "step": 1560 |
| }, |
| { |
| "epoch": 5.376712328767123, |
| "grad_norm": 0.09614630788564682, |
| "learning_rate": 9.977809823015401e-05, |
| "loss": 0.0112, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.410958904109589, |
| "grad_norm": 0.08124130219221115, |
| "learning_rate": 9.977024992520602e-05, |
| "loss": 0.0087, |
| "step": 1580 |
| }, |
| { |
| "epoch": 5.445205479452055, |
| "grad_norm": 0.09138698130846024, |
| "learning_rate": 9.976226555029522e-05, |
| "loss": 0.009, |
| "step": 1590 |
| }, |
| { |
| "epoch": 5.47945205479452, |
| "grad_norm": 0.1088441014289856, |
| "learning_rate": 9.975414512725057e-05, |
| "loss": 0.01, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.513698630136986, |
| "grad_norm": 0.12124570459127426, |
| "learning_rate": 9.974588867827301e-05, |
| "loss": 0.0088, |
| "step": 1610 |
| }, |
| { |
| "epoch": 5.5479452054794525, |
| "grad_norm": 0.12032249569892883, |
| "learning_rate": 9.973749622593534e-05, |
| "loss": 0.0129, |
| "step": 1620 |
| }, |
| { |
| "epoch": 5.582191780821918, |
| "grad_norm": 0.12702365219593048, |
| "learning_rate": 9.972896779318219e-05, |
| "loss": 0.0101, |
| "step": 1630 |
| }, |
| { |
| "epoch": 5.616438356164384, |
| "grad_norm": 0.0776495635509491, |
| "learning_rate": 9.972030340333001e-05, |
| "loss": 0.0082, |
| "step": 1640 |
| }, |
| { |
| "epoch": 5.6506849315068495, |
| "grad_norm": 0.09928776323795319, |
| "learning_rate": 9.97115030800669e-05, |
| "loss": 0.0094, |
| "step": 1650 |
| }, |
| { |
| "epoch": 5.684931506849315, |
| "grad_norm": 0.08945798128843307, |
| "learning_rate": 9.970256684745258e-05, |
| "loss": 0.0112, |
| "step": 1660 |
| }, |
| { |
| "epoch": 5.719178082191781, |
| "grad_norm": 0.1274113953113556, |
| "learning_rate": 9.969349472991838e-05, |
| "loss": 0.0094, |
| "step": 1670 |
| }, |
| { |
| "epoch": 5.7534246575342465, |
| "grad_norm": 0.1030043512582779, |
| "learning_rate": 9.968428675226714e-05, |
| "loss": 0.0093, |
| "step": 1680 |
| }, |
| { |
| "epoch": 5.787671232876712, |
| "grad_norm": 0.07978050410747528, |
| "learning_rate": 9.967494293967312e-05, |
| "loss": 0.0088, |
| "step": 1690 |
| }, |
| { |
| "epoch": 5.821917808219178, |
| "grad_norm": 0.11394272744655609, |
| "learning_rate": 9.966546331768191e-05, |
| "loss": 0.0102, |
| "step": 1700 |
| }, |
| { |
| "epoch": 5.8561643835616435, |
| "grad_norm": 0.08235814422369003, |
| "learning_rate": 9.965584791221048e-05, |
| "loss": 0.0102, |
| "step": 1710 |
| }, |
| { |
| "epoch": 5.890410958904109, |
| "grad_norm": 0.10711020976305008, |
| "learning_rate": 9.964609674954696e-05, |
| "loss": 0.0084, |
| "step": 1720 |
| }, |
| { |
| "epoch": 5.924657534246576, |
| "grad_norm": 0.1037852019071579, |
| "learning_rate": 9.963620985635065e-05, |
| "loss": 0.0084, |
| "step": 1730 |
| }, |
| { |
| "epoch": 5.958904109589041, |
| "grad_norm": 0.08300093561410904, |
| "learning_rate": 9.962618725965196e-05, |
| "loss": 0.0098, |
| "step": 1740 |
| }, |
| { |
| "epoch": 5.993150684931507, |
| "grad_norm": 0.10054755955934525, |
| "learning_rate": 9.961602898685226e-05, |
| "loss": 0.0076, |
| "step": 1750 |
| }, |
| { |
| "epoch": 6.027397260273973, |
| "grad_norm": 0.07662080228328705, |
| "learning_rate": 9.96057350657239e-05, |
| "loss": 0.0092, |
| "step": 1760 |
| }, |
| { |
| "epoch": 6.061643835616438, |
| "grad_norm": 0.06177311763167381, |
| "learning_rate": 9.959530552441005e-05, |
| "loss": 0.0085, |
| "step": 1770 |
| }, |
| { |
| "epoch": 6.095890410958904, |
| "grad_norm": 0.08199790120124817, |
| "learning_rate": 9.95847403914247e-05, |
| "loss": 0.009, |
| "step": 1780 |
| }, |
| { |
| "epoch": 6.13013698630137, |
| "grad_norm": 0.08528486639261246, |
| "learning_rate": 9.95740396956525e-05, |
| "loss": 0.0092, |
| "step": 1790 |
| }, |
| { |
| "epoch": 6.164383561643835, |
| "grad_norm": 0.0814923569560051, |
| "learning_rate": 9.956320346634876e-05, |
| "loss": 0.0078, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.198630136986301, |
| "grad_norm": 0.12103394418954849, |
| "learning_rate": 9.955223173313931e-05, |
| "loss": 0.0079, |
| "step": 1810 |
| }, |
| { |
| "epoch": 6.232876712328767, |
| "grad_norm": 0.1314200758934021, |
| "learning_rate": 9.954112452602045e-05, |
| "loss": 0.0095, |
| "step": 1820 |
| }, |
| { |
| "epoch": 6.267123287671233, |
| "grad_norm": 0.0955151692032814, |
| "learning_rate": 9.952988187535886e-05, |
| "loss": 0.0076, |
| "step": 1830 |
| }, |
| { |
| "epoch": 6.301369863013699, |
| "grad_norm": 0.07099077850580215, |
| "learning_rate": 9.95185038118915e-05, |
| "loss": 0.0078, |
| "step": 1840 |
| }, |
| { |
| "epoch": 6.335616438356165, |
| "grad_norm": 0.06817654520273209, |
| "learning_rate": 9.950699036672559e-05, |
| "loss": 0.0093, |
| "step": 1850 |
| }, |
| { |
| "epoch": 6.36986301369863, |
| "grad_norm": 0.10626586526632309, |
| "learning_rate": 9.949534157133844e-05, |
| "loss": 0.0077, |
| "step": 1860 |
| }, |
| { |
| "epoch": 6.404109589041096, |
| "grad_norm": 0.08003693073987961, |
| "learning_rate": 9.948355745757741e-05, |
| "loss": 0.0097, |
| "step": 1870 |
| }, |
| { |
| "epoch": 6.438356164383562, |
| "grad_norm": 0.08806777745485306, |
| "learning_rate": 9.94716380576598e-05, |
| "loss": 0.0102, |
| "step": 1880 |
| }, |
| { |
| "epoch": 6.472602739726027, |
| "grad_norm": 0.07476870715618134, |
| "learning_rate": 9.945958340417283e-05, |
| "loss": 0.0073, |
| "step": 1890 |
| }, |
| { |
| "epoch": 6.506849315068493, |
| "grad_norm": 0.08984264731407166, |
| "learning_rate": 9.944739353007344e-05, |
| "loss": 0.0081, |
| "step": 1900 |
| }, |
| { |
| "epoch": 6.541095890410959, |
| "grad_norm": 0.08520924299955368, |
| "learning_rate": 9.943506846868826e-05, |
| "loss": 0.0111, |
| "step": 1910 |
| }, |
| { |
| "epoch": 6.575342465753424, |
| "grad_norm": 0.07232493162155151, |
| "learning_rate": 9.942260825371358e-05, |
| "loss": 0.0075, |
| "step": 1920 |
| }, |
| { |
| "epoch": 6.609589041095891, |
| "grad_norm": 0.07491834461688995, |
| "learning_rate": 9.941001291921512e-05, |
| "loss": 0.0094, |
| "step": 1930 |
| }, |
| { |
| "epoch": 6.6438356164383565, |
| "grad_norm": 0.10011128336191177, |
| "learning_rate": 9.939728249962807e-05, |
| "loss": 0.0072, |
| "step": 1940 |
| }, |
| { |
| "epoch": 6.678082191780822, |
| "grad_norm": 0.0929722785949707, |
| "learning_rate": 9.938441702975689e-05, |
| "loss": 0.011, |
| "step": 1950 |
| }, |
| { |
| "epoch": 6.712328767123288, |
| "grad_norm": 0.1011221706867218, |
| "learning_rate": 9.937141654477528e-05, |
| "loss": 0.0099, |
| "step": 1960 |
| }, |
| { |
| "epoch": 6.7465753424657535, |
| "grad_norm": 0.09542585164308548, |
| "learning_rate": 9.93582810802261e-05, |
| "loss": 0.0085, |
| "step": 1970 |
| }, |
| { |
| "epoch": 6.780821917808219, |
| "grad_norm": 0.08605185896158218, |
| "learning_rate": 9.934501067202117e-05, |
| "loss": 0.0078, |
| "step": 1980 |
| }, |
| { |
| "epoch": 6.815068493150685, |
| "grad_norm": 0.062511146068573, |
| "learning_rate": 9.93316053564413e-05, |
| "loss": 0.0065, |
| "step": 1990 |
| }, |
| { |
| "epoch": 6.8493150684931505, |
| "grad_norm": 0.08524167537689209, |
| "learning_rate": 9.931806517013612e-05, |
| "loss": 0.0081, |
| "step": 2000 |
| }, |
| { |
| "epoch": 6.883561643835616, |
| "grad_norm": 0.0872335210442543, |
| "learning_rate": 9.930439015012396e-05, |
| "loss": 0.0066, |
| "step": 2010 |
| }, |
| { |
| "epoch": 6.917808219178082, |
| "grad_norm": 0.08723822236061096, |
| "learning_rate": 9.929058033379181e-05, |
| "loss": 0.0076, |
| "step": 2020 |
| }, |
| { |
| "epoch": 6.9520547945205475, |
| "grad_norm": 0.07593391835689545, |
| "learning_rate": 9.927663575889521e-05, |
| "loss": 0.0081, |
| "step": 2030 |
| }, |
| { |
| "epoch": 6.986301369863014, |
| "grad_norm": 0.07648999243974686, |
| "learning_rate": 9.926255646355804e-05, |
| "loss": 0.0069, |
| "step": 2040 |
| }, |
| { |
| "epoch": 7.02054794520548, |
| "grad_norm": 0.08651500195264816, |
| "learning_rate": 9.92483424862726e-05, |
| "loss": 0.0067, |
| "step": 2050 |
| }, |
| { |
| "epoch": 7.054794520547945, |
| "grad_norm": 0.09083620458841324, |
| "learning_rate": 9.923399386589933e-05, |
| "loss": 0.009, |
| "step": 2060 |
| }, |
| { |
| "epoch": 7.089041095890411, |
| "grad_norm": 0.0654049888253212, |
| "learning_rate": 9.921951064166684e-05, |
| "loss": 0.008, |
| "step": 2070 |
| }, |
| { |
| "epoch": 7.123287671232877, |
| "grad_norm": 0.06859409064054489, |
| "learning_rate": 9.92048928531717e-05, |
| "loss": 0.0077, |
| "step": 2080 |
| }, |
| { |
| "epoch": 7.157534246575342, |
| "grad_norm": 0.07323700934648514, |
| "learning_rate": 9.919014054037836e-05, |
| "loss": 0.009, |
| "step": 2090 |
| }, |
| { |
| "epoch": 7.191780821917808, |
| "grad_norm": 0.06128271296620369, |
| "learning_rate": 9.917525374361912e-05, |
| "loss": 0.0079, |
| "step": 2100 |
| }, |
| { |
| "epoch": 7.226027397260274, |
| "grad_norm": 0.07024730741977692, |
| "learning_rate": 9.91602325035939e-05, |
| "loss": 0.0066, |
| "step": 2110 |
| }, |
| { |
| "epoch": 7.260273972602739, |
| "grad_norm": 0.08250781893730164, |
| "learning_rate": 9.914507686137019e-05, |
| "loss": 0.007, |
| "step": 2120 |
| }, |
| { |
| "epoch": 7.294520547945205, |
| "grad_norm": 0.08373738825321198, |
| "learning_rate": 9.912978685838294e-05, |
| "loss": 0.0084, |
| "step": 2130 |
| }, |
| { |
| "epoch": 7.328767123287671, |
| "grad_norm": 0.0988110899925232, |
| "learning_rate": 9.911436253643445e-05, |
| "loss": 0.0079, |
| "step": 2140 |
| }, |
| { |
| "epoch": 7.363013698630137, |
| "grad_norm": 0.08949548006057739, |
| "learning_rate": 9.90988039376942e-05, |
| "loss": 0.0062, |
| "step": 2150 |
| }, |
| { |
| "epoch": 7.397260273972603, |
| "grad_norm": 0.1103079542517662, |
| "learning_rate": 9.90831111046988e-05, |
| "loss": 0.0078, |
| "step": 2160 |
| }, |
| { |
| "epoch": 7.431506849315069, |
| "grad_norm": 0.08410000801086426, |
| "learning_rate": 9.90672840803519e-05, |
| "loss": 0.0075, |
| "step": 2170 |
| }, |
| { |
| "epoch": 7.465753424657534, |
| "grad_norm": 0.08818292617797852, |
| "learning_rate": 9.905132290792394e-05, |
| "loss": 0.009, |
| "step": 2180 |
| }, |
| { |
| "epoch": 7.5, |
| "grad_norm": 0.08308695256710052, |
| "learning_rate": 9.903522763105218e-05, |
| "loss": 0.0089, |
| "step": 2190 |
| }, |
| { |
| "epoch": 7.534246575342466, |
| "grad_norm": 0.09962280839681625, |
| "learning_rate": 9.901899829374047e-05, |
| "loss": 0.009, |
| "step": 2200 |
| }, |
| { |
| "epoch": 7.568493150684931, |
| "grad_norm": 0.08678163588047028, |
| "learning_rate": 9.900263494035921e-05, |
| "loss": 0.008, |
| "step": 2210 |
| }, |
| { |
| "epoch": 7.602739726027397, |
| "grad_norm": 0.0837637260556221, |
| "learning_rate": 9.89861376156452e-05, |
| "loss": 0.0072, |
| "step": 2220 |
| }, |
| { |
| "epoch": 7.636986301369863, |
| "grad_norm": 0.09580914676189423, |
| "learning_rate": 9.896950636470147e-05, |
| "loss": 0.0077, |
| "step": 2230 |
| }, |
| { |
| "epoch": 7.671232876712329, |
| "grad_norm": 0.11661717295646667, |
| "learning_rate": 9.895274123299723e-05, |
| "loss": 0.0071, |
| "step": 2240 |
| }, |
| { |
| "epoch": 7.705479452054795, |
| "grad_norm": 0.10122444480657578, |
| "learning_rate": 9.893584226636772e-05, |
| "loss": 0.0086, |
| "step": 2250 |
| }, |
| { |
| "epoch": 7.739726027397261, |
| "grad_norm": 0.111696757376194, |
| "learning_rate": 9.891880951101407e-05, |
| "loss": 0.0064, |
| "step": 2260 |
| }, |
| { |
| "epoch": 7.773972602739726, |
| "grad_norm": 0.0847308561205864, |
| "learning_rate": 9.890164301350318e-05, |
| "loss": 0.0078, |
| "step": 2270 |
| }, |
| { |
| "epoch": 7.808219178082192, |
| "grad_norm": 0.09149212390184402, |
| "learning_rate": 9.888434282076758e-05, |
| "loss": 0.0066, |
| "step": 2280 |
| }, |
| { |
| "epoch": 7.842465753424658, |
| "grad_norm": 0.07390099763870239, |
| "learning_rate": 9.886690898010535e-05, |
| "loss": 0.008, |
| "step": 2290 |
| }, |
| { |
| "epoch": 7.876712328767123, |
| "grad_norm": 0.09433721750974655, |
| "learning_rate": 9.884934153917997e-05, |
| "loss": 0.0087, |
| "step": 2300 |
| }, |
| { |
| "epoch": 7.910958904109589, |
| "grad_norm": 0.1136075034737587, |
| "learning_rate": 9.883164054602012e-05, |
| "loss": 0.0085, |
| "step": 2310 |
| }, |
| { |
| "epoch": 7.945205479452055, |
| "grad_norm": 0.08907122164964676, |
| "learning_rate": 9.881380604901964e-05, |
| "loss": 0.0072, |
| "step": 2320 |
| }, |
| { |
| "epoch": 7.97945205479452, |
| "grad_norm": 0.0950908437371254, |
| "learning_rate": 9.879583809693738e-05, |
| "loss": 0.0082, |
| "step": 2330 |
| }, |
| { |
| "epoch": 8.013698630136986, |
| "grad_norm": 0.07622794061899185, |
| "learning_rate": 9.877773673889701e-05, |
| "loss": 0.0062, |
| "step": 2340 |
| }, |
| { |
| "epoch": 8.047945205479452, |
| "grad_norm": 0.10459341108798981, |
| "learning_rate": 9.8759502024387e-05, |
| "loss": 0.0097, |
| "step": 2350 |
| }, |
| { |
| "epoch": 8.082191780821917, |
| "grad_norm": 0.08677548170089722, |
| "learning_rate": 9.87411340032603e-05, |
| "loss": 0.0068, |
| "step": 2360 |
| }, |
| { |
| "epoch": 8.116438356164384, |
| "grad_norm": 0.09159575402736664, |
| "learning_rate": 9.872263272573443e-05, |
| "loss": 0.0068, |
| "step": 2370 |
| }, |
| { |
| "epoch": 8.150684931506849, |
| "grad_norm": 0.13817910850048065, |
| "learning_rate": 9.870399824239117e-05, |
| "loss": 0.0075, |
| "step": 2380 |
| }, |
| { |
| "epoch": 8.184931506849315, |
| "grad_norm": 0.10300517082214355, |
| "learning_rate": 9.868523060417646e-05, |
| "loss": 0.0086, |
| "step": 2390 |
| }, |
| { |
| "epoch": 8.219178082191782, |
| "grad_norm": 0.06537918746471405, |
| "learning_rate": 9.86663298624003e-05, |
| "loss": 0.0082, |
| "step": 2400 |
| }, |
| { |
| "epoch": 8.253424657534246, |
| "grad_norm": 0.0721374899148941, |
| "learning_rate": 9.864729606873663e-05, |
| "loss": 0.0066, |
| "step": 2410 |
| }, |
| { |
| "epoch": 8.287671232876713, |
| "grad_norm": 0.1256812959909439, |
| "learning_rate": 9.862812927522309e-05, |
| "loss": 0.0068, |
| "step": 2420 |
| }, |
| { |
| "epoch": 8.321917808219178, |
| "grad_norm": 0.08400053530931473, |
| "learning_rate": 9.860882953426099e-05, |
| "loss": 0.0066, |
| "step": 2430 |
| }, |
| { |
| "epoch": 8.356164383561644, |
| "grad_norm": 0.08019398152828217, |
| "learning_rate": 9.858939689861506e-05, |
| "loss": 0.0085, |
| "step": 2440 |
| }, |
| { |
| "epoch": 8.39041095890411, |
| "grad_norm": 0.0836905762553215, |
| "learning_rate": 9.856983142141339e-05, |
| "loss": 0.0075, |
| "step": 2450 |
| }, |
| { |
| "epoch": 8.424657534246576, |
| "grad_norm": 0.0993325412273407, |
| "learning_rate": 9.855013315614725e-05, |
| "loss": 0.007, |
| "step": 2460 |
| }, |
| { |
| "epoch": 8.45890410958904, |
| "grad_norm": 0.08629734069108963, |
| "learning_rate": 9.853030215667093e-05, |
| "loss": 0.0073, |
| "step": 2470 |
| }, |
| { |
| "epoch": 8.493150684931507, |
| "grad_norm": 0.08529617637395859, |
| "learning_rate": 9.851033847720166e-05, |
| "loss": 0.0083, |
| "step": 2480 |
| }, |
| { |
| "epoch": 8.527397260273972, |
| "grad_norm": 0.10456524789333344, |
| "learning_rate": 9.849024217231935e-05, |
| "loss": 0.0076, |
| "step": 2490 |
| }, |
| { |
| "epoch": 8.561643835616438, |
| "grad_norm": 0.09966843575239182, |
| "learning_rate": 9.847001329696653e-05, |
| "loss": 0.0079, |
| "step": 2500 |
| }, |
| { |
| "epoch": 8.595890410958905, |
| "grad_norm": 0.1018424928188324, |
| "learning_rate": 9.844965190644817e-05, |
| "loss": 0.0075, |
| "step": 2510 |
| }, |
| { |
| "epoch": 8.63013698630137, |
| "grad_norm": 0.09728335589170456, |
| "learning_rate": 9.842915805643155e-05, |
| "loss": 0.0057, |
| "step": 2520 |
| }, |
| { |
| "epoch": 8.664383561643836, |
| "grad_norm": 0.12338245660066605, |
| "learning_rate": 9.840853180294608e-05, |
| "loss": 0.0081, |
| "step": 2530 |
| }, |
| { |
| "epoch": 8.698630136986301, |
| "grad_norm": 0.10218657553195953, |
| "learning_rate": 9.838777320238312e-05, |
| "loss": 0.0067, |
| "step": 2540 |
| }, |
| { |
| "epoch": 8.732876712328768, |
| "grad_norm": 0.10925062745809555, |
| "learning_rate": 9.836688231149592e-05, |
| "loss": 0.0074, |
| "step": 2550 |
| }, |
| { |
| "epoch": 8.767123287671232, |
| "grad_norm": 0.08330442756414413, |
| "learning_rate": 9.834585918739936e-05, |
| "loss": 0.0067, |
| "step": 2560 |
| }, |
| { |
| "epoch": 8.801369863013699, |
| "grad_norm": 0.11952322721481323, |
| "learning_rate": 9.832470388756987e-05, |
| "loss": 0.0069, |
| "step": 2570 |
| }, |
| { |
| "epoch": 8.835616438356164, |
| "grad_norm": 0.0997798889875412, |
| "learning_rate": 9.830341646984521e-05, |
| "loss": 0.0071, |
| "step": 2580 |
| }, |
| { |
| "epoch": 8.86986301369863, |
| "grad_norm": 0.07497061789035797, |
| "learning_rate": 9.82819969924244e-05, |
| "loss": 0.0058, |
| "step": 2590 |
| }, |
| { |
| "epoch": 8.904109589041095, |
| "grad_norm": 0.08156754821538925, |
| "learning_rate": 9.826044551386744e-05, |
| "loss": 0.0067, |
| "step": 2600 |
| }, |
| { |
| "epoch": 8.938356164383562, |
| "grad_norm": 0.08598846197128296, |
| "learning_rate": 9.823876209309527e-05, |
| "loss": 0.0103, |
| "step": 2610 |
| }, |
| { |
| "epoch": 8.972602739726028, |
| "grad_norm": 0.09113951027393341, |
| "learning_rate": 9.821694678938953e-05, |
| "loss": 0.0062, |
| "step": 2620 |
| }, |
| { |
| "epoch": 9.006849315068493, |
| "grad_norm": 0.10338687896728516, |
| "learning_rate": 9.819499966239243e-05, |
| "loss": 0.0064, |
| "step": 2630 |
| }, |
| { |
| "epoch": 9.04109589041096, |
| "grad_norm": 0.09837481379508972, |
| "learning_rate": 9.817292077210659e-05, |
| "loss": 0.0071, |
| "step": 2640 |
| }, |
| { |
| "epoch": 9.075342465753424, |
| "grad_norm": 0.0811222493648529, |
| "learning_rate": 9.815071017889482e-05, |
| "loss": 0.0087, |
| "step": 2650 |
| }, |
| { |
| "epoch": 9.10958904109589, |
| "grad_norm": 0.10089726746082306, |
| "learning_rate": 9.812836794348004e-05, |
| "loss": 0.0112, |
| "step": 2660 |
| }, |
| { |
| "epoch": 9.143835616438356, |
| "grad_norm": 0.12093760073184967, |
| "learning_rate": 9.81058941269451e-05, |
| "loss": 0.0077, |
| "step": 2670 |
| }, |
| { |
| "epoch": 9.178082191780822, |
| "grad_norm": 0.09144003689289093, |
| "learning_rate": 9.808328879073251e-05, |
| "loss": 0.0066, |
| "step": 2680 |
| }, |
| { |
| "epoch": 9.212328767123287, |
| "grad_norm": 0.05546188727021217, |
| "learning_rate": 9.806055199664446e-05, |
| "loss": 0.006, |
| "step": 2690 |
| }, |
| { |
| "epoch": 9.246575342465754, |
| "grad_norm": 0.08728483319282532, |
| "learning_rate": 9.803768380684242e-05, |
| "loss": 0.0057, |
| "step": 2700 |
| }, |
| { |
| "epoch": 9.280821917808218, |
| "grad_norm": 0.07582209259271622, |
| "learning_rate": 9.801468428384716e-05, |
| "loss": 0.0064, |
| "step": 2710 |
| }, |
| { |
| "epoch": 9.315068493150685, |
| "grad_norm": 0.07939019799232483, |
| "learning_rate": 9.799155349053851e-05, |
| "loss": 0.007, |
| "step": 2720 |
| }, |
| { |
| "epoch": 9.349315068493151, |
| "grad_norm": 0.07291562110185623, |
| "learning_rate": 9.796829149015517e-05, |
| "loss": 0.007, |
| "step": 2730 |
| }, |
| { |
| "epoch": 9.383561643835616, |
| "grad_norm": 0.06804176419973373, |
| "learning_rate": 9.794489834629455e-05, |
| "loss": 0.0073, |
| "step": 2740 |
| }, |
| { |
| "epoch": 9.417808219178083, |
| "grad_norm": 0.0737365186214447, |
| "learning_rate": 9.792137412291265e-05, |
| "loss": 0.0062, |
| "step": 2750 |
| }, |
| { |
| "epoch": 9.452054794520548, |
| "grad_norm": 0.08482401072978973, |
| "learning_rate": 9.789771888432375e-05, |
| "loss": 0.0083, |
| "step": 2760 |
| }, |
| { |
| "epoch": 9.486301369863014, |
| "grad_norm": 0.08719678968191147, |
| "learning_rate": 9.787393269520039e-05, |
| "loss": 0.0073, |
| "step": 2770 |
| }, |
| { |
| "epoch": 9.520547945205479, |
| "grad_norm": 0.0734448954463005, |
| "learning_rate": 9.785001562057309e-05, |
| "loss": 0.0067, |
| "step": 2780 |
| }, |
| { |
| "epoch": 9.554794520547945, |
| "grad_norm": 0.08811881393194199, |
| "learning_rate": 9.782596772583026e-05, |
| "loss": 0.0062, |
| "step": 2790 |
| }, |
| { |
| "epoch": 9.58904109589041, |
| "grad_norm": 0.07836030423641205, |
| "learning_rate": 9.780178907671789e-05, |
| "loss": 0.0067, |
| "step": 2800 |
| }, |
| { |
| "epoch": 9.623287671232877, |
| "grad_norm": 0.066135935485363, |
| "learning_rate": 9.777747973933948e-05, |
| "loss": 0.0061, |
| "step": 2810 |
| }, |
| { |
| "epoch": 9.657534246575342, |
| "grad_norm": 0.08274685591459274, |
| "learning_rate": 9.775303978015585e-05, |
| "loss": 0.0054, |
| "step": 2820 |
| }, |
| { |
| "epoch": 9.691780821917808, |
| "grad_norm": 0.07670920342206955, |
| "learning_rate": 9.772846926598491e-05, |
| "loss": 0.0083, |
| "step": 2830 |
| }, |
| { |
| "epoch": 9.726027397260275, |
| "grad_norm": 0.0781722441315651, |
| "learning_rate": 9.77037682640015e-05, |
| "loss": 0.0058, |
| "step": 2840 |
| }, |
| { |
| "epoch": 9.76027397260274, |
| "grad_norm": 0.06222458556294441, |
| "learning_rate": 9.767893684173721e-05, |
| "loss": 0.0054, |
| "step": 2850 |
| }, |
| { |
| "epoch": 9.794520547945206, |
| "grad_norm": 0.0780424028635025, |
| "learning_rate": 9.765397506708023e-05, |
| "loss": 0.0062, |
| "step": 2860 |
| }, |
| { |
| "epoch": 9.82876712328767, |
| "grad_norm": 0.09629786759614944, |
| "learning_rate": 9.762888300827507e-05, |
| "loss": 0.0067, |
| "step": 2870 |
| }, |
| { |
| "epoch": 9.863013698630137, |
| "grad_norm": 0.06657546758651733, |
| "learning_rate": 9.760366073392246e-05, |
| "loss": 0.0062, |
| "step": 2880 |
| }, |
| { |
| "epoch": 9.897260273972602, |
| "grad_norm": 0.07197002321481705, |
| "learning_rate": 9.757830831297914e-05, |
| "loss": 0.0071, |
| "step": 2890 |
| }, |
| { |
| "epoch": 9.931506849315069, |
| "grad_norm": 0.06577511131763458, |
| "learning_rate": 9.755282581475769e-05, |
| "loss": 0.0072, |
| "step": 2900 |
| }, |
| { |
| "epoch": 9.965753424657533, |
| "grad_norm": 0.0619685985147953, |
| "learning_rate": 9.752721330892624e-05, |
| "loss": 0.0056, |
| "step": 2910 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 0.08390691131353378, |
| "learning_rate": 9.750147086550844e-05, |
| "loss": 0.0067, |
| "step": 2920 |
| }, |
| { |
| "epoch": 10.034246575342467, |
| "grad_norm": 0.09258890151977539, |
| "learning_rate": 9.747559855488313e-05, |
| "loss": 0.0071, |
| "step": 2930 |
| }, |
| { |
| "epoch": 10.068493150684931, |
| "grad_norm": 0.0918927937746048, |
| "learning_rate": 9.744959644778422e-05, |
| "loss": 0.0079, |
| "step": 2940 |
| }, |
| { |
| "epoch": 10.102739726027398, |
| "grad_norm": 0.08980443328619003, |
| "learning_rate": 9.742346461530048e-05, |
| "loss": 0.0062, |
| "step": 2950 |
| }, |
| { |
| "epoch": 10.136986301369863, |
| "grad_norm": 0.0940912663936615, |
| "learning_rate": 9.739720312887535e-05, |
| "loss": 0.0066, |
| "step": 2960 |
| }, |
| { |
| "epoch": 10.17123287671233, |
| "grad_norm": 0.1010262668132782, |
| "learning_rate": 9.73708120603067e-05, |
| "loss": 0.0066, |
| "step": 2970 |
| }, |
| { |
| "epoch": 10.205479452054794, |
| "grad_norm": 0.07731979340314865, |
| "learning_rate": 9.734429148174675e-05, |
| "loss": 0.0056, |
| "step": 2980 |
| }, |
| { |
| "epoch": 10.23972602739726, |
| "grad_norm": 0.07678744941949844, |
| "learning_rate": 9.731764146570173e-05, |
| "loss": 0.0071, |
| "step": 2990 |
| }, |
| { |
| "epoch": 10.273972602739725, |
| "grad_norm": 0.07159540057182312, |
| "learning_rate": 9.729086208503174e-05, |
| "loss": 0.0085, |
| "step": 3000 |
| }, |
| { |
| "epoch": 10.308219178082192, |
| "grad_norm": 0.06736049056053162, |
| "learning_rate": 9.726395341295062e-05, |
| "loss": 0.0057, |
| "step": 3010 |
| }, |
| { |
| "epoch": 10.342465753424657, |
| "grad_norm": 0.05375010892748833, |
| "learning_rate": 9.723691552302562e-05, |
| "loss": 0.0062, |
| "step": 3020 |
| }, |
| { |
| "epoch": 10.376712328767123, |
| "grad_norm": 0.0694858655333519, |
| "learning_rate": 9.720974848917735e-05, |
| "loss": 0.0064, |
| "step": 3030 |
| }, |
| { |
| "epoch": 10.41095890410959, |
| "grad_norm": 0.11771047860383987, |
| "learning_rate": 9.718245238567939e-05, |
| "loss": 0.007, |
| "step": 3040 |
| }, |
| { |
| "epoch": 10.445205479452055, |
| "grad_norm": 0.08670341968536377, |
| "learning_rate": 9.715502728715826e-05, |
| "loss": 0.0065, |
| "step": 3050 |
| }, |
| { |
| "epoch": 10.479452054794521, |
| "grad_norm": 0.08733764290809631, |
| "learning_rate": 9.712747326859315e-05, |
| "loss": 0.007, |
| "step": 3060 |
| }, |
| { |
| "epoch": 10.513698630136986, |
| "grad_norm": 0.06813944876194, |
| "learning_rate": 9.709979040531569e-05, |
| "loss": 0.0063, |
| "step": 3070 |
| }, |
| { |
| "epoch": 10.547945205479452, |
| "grad_norm": 0.0684538260102272, |
| "learning_rate": 9.707197877300974e-05, |
| "loss": 0.0073, |
| "step": 3080 |
| }, |
| { |
| "epoch": 10.582191780821917, |
| "grad_norm": 0.09879663586616516, |
| "learning_rate": 9.704403844771128e-05, |
| "loss": 0.0069, |
| "step": 3090 |
| }, |
| { |
| "epoch": 10.616438356164384, |
| "grad_norm": 0.07726743072271347, |
| "learning_rate": 9.701596950580806e-05, |
| "loss": 0.0064, |
| "step": 3100 |
| }, |
| { |
| "epoch": 10.650684931506849, |
| "grad_norm": 0.1269214004278183, |
| "learning_rate": 9.698777202403953e-05, |
| "loss": 0.0068, |
| "step": 3110 |
| }, |
| { |
| "epoch": 10.684931506849315, |
| "grad_norm": 0.08792764693498611, |
| "learning_rate": 9.695944607949649e-05, |
| "loss": 0.0065, |
| "step": 3120 |
| }, |
| { |
| "epoch": 10.719178082191782, |
| "grad_norm": 0.07960551977157593, |
| "learning_rate": 9.693099174962103e-05, |
| "loss": 0.0086, |
| "step": 3130 |
| }, |
| { |
| "epoch": 10.753424657534246, |
| "grad_norm": 0.0761343464255333, |
| "learning_rate": 9.690240911220618e-05, |
| "loss": 0.0057, |
| "step": 3140 |
| }, |
| { |
| "epoch": 10.787671232876713, |
| "grad_norm": 0.09499238431453705, |
| "learning_rate": 9.687369824539577e-05, |
| "loss": 0.0068, |
| "step": 3150 |
| }, |
| { |
| "epoch": 10.821917808219178, |
| "grad_norm": 0.0680365189909935, |
| "learning_rate": 9.684485922768422e-05, |
| "loss": 0.0064, |
| "step": 3160 |
| }, |
| { |
| "epoch": 10.856164383561644, |
| "grad_norm": 0.0675700306892395, |
| "learning_rate": 9.681589213791633e-05, |
| "loss": 0.0075, |
| "step": 3170 |
| }, |
| { |
| "epoch": 10.89041095890411, |
| "grad_norm": 0.08872543275356293, |
| "learning_rate": 9.6786797055287e-05, |
| "loss": 0.0081, |
| "step": 3180 |
| }, |
| { |
| "epoch": 10.924657534246576, |
| "grad_norm": 0.09527339786291122, |
| "learning_rate": 9.675757405934103e-05, |
| "loss": 0.0076, |
| "step": 3190 |
| }, |
| { |
| "epoch": 10.95890410958904, |
| "grad_norm": 0.08772981911897659, |
| "learning_rate": 9.672822322997305e-05, |
| "loss": 0.0066, |
| "step": 3200 |
| }, |
| { |
| "epoch": 10.993150684931507, |
| "grad_norm": 0.08488666266202927, |
| "learning_rate": 9.669874464742705e-05, |
| "loss": 0.0081, |
| "step": 3210 |
| }, |
| { |
| "epoch": 11.027397260273972, |
| "grad_norm": 0.0668686255812645, |
| "learning_rate": 9.66691383922964e-05, |
| "loss": 0.0064, |
| "step": 3220 |
| }, |
| { |
| "epoch": 11.061643835616438, |
| "grad_norm": 0.08201269805431366, |
| "learning_rate": 9.663940454552342e-05, |
| "loss": 0.0063, |
| "step": 3230 |
| }, |
| { |
| "epoch": 11.095890410958905, |
| "grad_norm": 0.10006823390722275, |
| "learning_rate": 9.660954318839933e-05, |
| "loss": 0.0067, |
| "step": 3240 |
| }, |
| { |
| "epoch": 11.13013698630137, |
| "grad_norm": 0.06139950081706047, |
| "learning_rate": 9.657955440256395e-05, |
| "loss": 0.0062, |
| "step": 3250 |
| }, |
| { |
| "epoch": 11.164383561643836, |
| "grad_norm": 0.05843405798077583, |
| "learning_rate": 9.654943827000548e-05, |
| "loss": 0.0057, |
| "step": 3260 |
| }, |
| { |
| "epoch": 11.198630136986301, |
| "grad_norm": 0.12055237591266632, |
| "learning_rate": 9.651919487306025e-05, |
| "loss": 0.007, |
| "step": 3270 |
| }, |
| { |
| "epoch": 11.232876712328768, |
| "grad_norm": 0.09484998136758804, |
| "learning_rate": 9.648882429441257e-05, |
| "loss": 0.0077, |
| "step": 3280 |
| }, |
| { |
| "epoch": 11.267123287671232, |
| "grad_norm": 0.10352499783039093, |
| "learning_rate": 9.645832661709444e-05, |
| "loss": 0.0079, |
| "step": 3290 |
| }, |
| { |
| "epoch": 11.301369863013699, |
| "grad_norm": 0.08123885840177536, |
| "learning_rate": 9.642770192448536e-05, |
| "loss": 0.0086, |
| "step": 3300 |
| }, |
| { |
| "epoch": 11.335616438356164, |
| "grad_norm": 0.07840955257415771, |
| "learning_rate": 9.639695030031204e-05, |
| "loss": 0.0073, |
| "step": 3310 |
| }, |
| { |
| "epoch": 11.36986301369863, |
| "grad_norm": 0.11473023891448975, |
| "learning_rate": 9.636607182864827e-05, |
| "loss": 0.008, |
| "step": 3320 |
| }, |
| { |
| "epoch": 11.404109589041095, |
| "grad_norm": 0.09389324486255646, |
| "learning_rate": 9.63350665939146e-05, |
| "loss": 0.0067, |
| "step": 3330 |
| }, |
| { |
| "epoch": 11.438356164383562, |
| "grad_norm": 0.0731404572725296, |
| "learning_rate": 9.630393468087818e-05, |
| "loss": 0.0077, |
| "step": 3340 |
| }, |
| { |
| "epoch": 11.472602739726028, |
| "grad_norm": 0.06645851582288742, |
| "learning_rate": 9.627267617465243e-05, |
| "loss": 0.0069, |
| "step": 3350 |
| }, |
| { |
| "epoch": 11.506849315068493, |
| "grad_norm": 0.07165537774562836, |
| "learning_rate": 9.624129116069694e-05, |
| "loss": 0.0072, |
| "step": 3360 |
| }, |
| { |
| "epoch": 11.54109589041096, |
| "grad_norm": 0.05754755809903145, |
| "learning_rate": 9.620977972481716e-05, |
| "loss": 0.0063, |
| "step": 3370 |
| }, |
| { |
| "epoch": 11.575342465753424, |
| "grad_norm": 0.07463119179010391, |
| "learning_rate": 9.617814195316411e-05, |
| "loss": 0.0058, |
| "step": 3380 |
| }, |
| { |
| "epoch": 11.60958904109589, |
| "grad_norm": 0.05406510457396507, |
| "learning_rate": 9.614637793223425e-05, |
| "loss": 0.006, |
| "step": 3390 |
| }, |
| { |
| "epoch": 11.643835616438356, |
| "grad_norm": 0.07864365726709366, |
| "learning_rate": 9.611448774886924e-05, |
| "loss": 0.006, |
| "step": 3400 |
| }, |
| { |
| "epoch": 11.678082191780822, |
| "grad_norm": 0.05467298626899719, |
| "learning_rate": 9.60824714902556e-05, |
| "loss": 0.0069, |
| "step": 3410 |
| }, |
| { |
| "epoch": 11.712328767123287, |
| "grad_norm": 0.06272034347057343, |
| "learning_rate": 9.605032924392457e-05, |
| "loss": 0.0062, |
| "step": 3420 |
| }, |
| { |
| "epoch": 11.746575342465754, |
| "grad_norm": 0.1005667969584465, |
| "learning_rate": 9.601806109775179e-05, |
| "loss": 0.0077, |
| "step": 3430 |
| }, |
| { |
| "epoch": 11.780821917808218, |
| "grad_norm": 0.07537668198347092, |
| "learning_rate": 9.598566713995718e-05, |
| "loss": 0.0069, |
| "step": 3440 |
| }, |
| { |
| "epoch": 11.815068493150685, |
| "grad_norm": 0.05782156065106392, |
| "learning_rate": 9.595314745910456e-05, |
| "loss": 0.0051, |
| "step": 3450 |
| }, |
| { |
| "epoch": 11.849315068493151, |
| "grad_norm": 0.09299279749393463, |
| "learning_rate": 9.59205021441015e-05, |
| "loss": 0.0056, |
| "step": 3460 |
| }, |
| { |
| "epoch": 11.883561643835616, |
| "grad_norm": 0.05052751302719116, |
| "learning_rate": 9.588773128419906e-05, |
| "loss": 0.0061, |
| "step": 3470 |
| }, |
| { |
| "epoch": 11.917808219178083, |
| "grad_norm": 0.07338104397058487, |
| "learning_rate": 9.58548349689915e-05, |
| "loss": 0.0063, |
| "step": 3480 |
| }, |
| { |
| "epoch": 11.952054794520548, |
| "grad_norm": 0.0899667963385582, |
| "learning_rate": 9.582181328841611e-05, |
| "loss": 0.0061, |
| "step": 3490 |
| }, |
| { |
| "epoch": 11.986301369863014, |
| "grad_norm": 0.06916534900665283, |
| "learning_rate": 9.578866633275288e-05, |
| "loss": 0.0062, |
| "step": 3500 |
| }, |
| { |
| "epoch": 12.020547945205479, |
| "grad_norm": 0.08347468823194504, |
| "learning_rate": 9.575539419262434e-05, |
| "loss": 0.0081, |
| "step": 3510 |
| }, |
| { |
| "epoch": 12.054794520547945, |
| "grad_norm": 0.0927470400929451, |
| "learning_rate": 9.572199695899522e-05, |
| "loss": 0.0054, |
| "step": 3520 |
| }, |
| { |
| "epoch": 12.08904109589041, |
| "grad_norm": 0.07318168878555298, |
| "learning_rate": 9.568847472317232e-05, |
| "loss": 0.0056, |
| "step": 3530 |
| }, |
| { |
| "epoch": 12.123287671232877, |
| "grad_norm": 0.05342930555343628, |
| "learning_rate": 9.565482757680415e-05, |
| "loss": 0.0054, |
| "step": 3540 |
| }, |
| { |
| "epoch": 12.157534246575343, |
| "grad_norm": 0.05967690050601959, |
| "learning_rate": 9.562105561188069e-05, |
| "loss": 0.0054, |
| "step": 3550 |
| }, |
| { |
| "epoch": 12.191780821917808, |
| "grad_norm": 0.06279722601175308, |
| "learning_rate": 9.558715892073323e-05, |
| "loss": 0.007, |
| "step": 3560 |
| }, |
| { |
| "epoch": 12.226027397260275, |
| "grad_norm": 0.06517907232046127, |
| "learning_rate": 9.555313759603402e-05, |
| "loss": 0.0059, |
| "step": 3570 |
| }, |
| { |
| "epoch": 12.26027397260274, |
| "grad_norm": 0.06889758259057999, |
| "learning_rate": 9.551899173079607e-05, |
| "loss": 0.0053, |
| "step": 3580 |
| }, |
| { |
| "epoch": 12.294520547945206, |
| "grad_norm": 0.06394177675247192, |
| "learning_rate": 9.548472141837286e-05, |
| "loss": 0.0055, |
| "step": 3590 |
| }, |
| { |
| "epoch": 12.32876712328767, |
| "grad_norm": 0.06316519528627396, |
| "learning_rate": 9.545032675245813e-05, |
| "loss": 0.0062, |
| "step": 3600 |
| }, |
| { |
| "epoch": 12.363013698630137, |
| "grad_norm": 0.10206209123134613, |
| "learning_rate": 9.541580782708557e-05, |
| "loss": 0.0064, |
| "step": 3610 |
| }, |
| { |
| "epoch": 12.397260273972602, |
| "grad_norm": 0.09707853198051453, |
| "learning_rate": 9.538116473662861e-05, |
| "loss": 0.0061, |
| "step": 3620 |
| }, |
| { |
| "epoch": 12.431506849315069, |
| "grad_norm": 0.09929731488227844, |
| "learning_rate": 9.534639757580013e-05, |
| "loss": 0.0055, |
| "step": 3630 |
| }, |
| { |
| "epoch": 12.465753424657533, |
| "grad_norm": 0.08357489854097366, |
| "learning_rate": 9.531150643965223e-05, |
| "loss": 0.0049, |
| "step": 3640 |
| }, |
| { |
| "epoch": 12.5, |
| "grad_norm": 0.08069298416376114, |
| "learning_rate": 9.527649142357596e-05, |
| "loss": 0.0061, |
| "step": 3650 |
| }, |
| { |
| "epoch": 12.534246575342467, |
| "grad_norm": 0.05960075184702873, |
| "learning_rate": 9.524135262330098e-05, |
| "loss": 0.0053, |
| "step": 3660 |
| }, |
| { |
| "epoch": 12.568493150684931, |
| "grad_norm": 0.06529633700847626, |
| "learning_rate": 9.520609013489547e-05, |
| "loss": 0.0069, |
| "step": 3670 |
| }, |
| { |
| "epoch": 12.602739726027398, |
| "grad_norm": 0.07700713723897934, |
| "learning_rate": 9.517070405476575e-05, |
| "loss": 0.0057, |
| "step": 3680 |
| }, |
| { |
| "epoch": 12.636986301369863, |
| "grad_norm": 0.07295393943786621, |
| "learning_rate": 9.513519447965595e-05, |
| "loss": 0.0057, |
| "step": 3690 |
| }, |
| { |
| "epoch": 12.67123287671233, |
| "grad_norm": 0.08430688083171844, |
| "learning_rate": 9.509956150664796e-05, |
| "loss": 0.0061, |
| "step": 3700 |
| }, |
| { |
| "epoch": 12.705479452054794, |
| "grad_norm": 0.051374781876802444, |
| "learning_rate": 9.50638052331609e-05, |
| "loss": 0.0062, |
| "step": 3710 |
| }, |
| { |
| "epoch": 12.73972602739726, |
| "grad_norm": 0.0681716650724411, |
| "learning_rate": 9.502792575695112e-05, |
| "loss": 0.0097, |
| "step": 3720 |
| }, |
| { |
| "epoch": 12.773972602739725, |
| "grad_norm": 0.09235887974500656, |
| "learning_rate": 9.499192317611167e-05, |
| "loss": 0.0071, |
| "step": 3730 |
| }, |
| { |
| "epoch": 12.808219178082192, |
| "grad_norm": 0.07686050236225128, |
| "learning_rate": 9.49557975890723e-05, |
| "loss": 0.008, |
| "step": 3740 |
| }, |
| { |
| "epoch": 12.842465753424658, |
| "grad_norm": 0.08247129619121552, |
| "learning_rate": 9.491954909459895e-05, |
| "loss": 0.0072, |
| "step": 3750 |
| }, |
| { |
| "epoch": 12.876712328767123, |
| "grad_norm": 0.06982657313346863, |
| "learning_rate": 9.488317779179361e-05, |
| "loss": 0.0052, |
| "step": 3760 |
| }, |
| { |
| "epoch": 12.91095890410959, |
| "grad_norm": 0.07579068094491959, |
| "learning_rate": 9.484668378009408e-05, |
| "loss": 0.0063, |
| "step": 3770 |
| }, |
| { |
| "epoch": 12.945205479452055, |
| "grad_norm": 0.09651331603527069, |
| "learning_rate": 9.481006715927351e-05, |
| "loss": 0.0066, |
| "step": 3780 |
| }, |
| { |
| "epoch": 12.979452054794521, |
| "grad_norm": 0.07850632816553116, |
| "learning_rate": 9.477332802944044e-05, |
| "loss": 0.0067, |
| "step": 3790 |
| }, |
| { |
| "epoch": 13.013698630136986, |
| "grad_norm": 0.07941552251577377, |
| "learning_rate": 9.473646649103818e-05, |
| "loss": 0.0047, |
| "step": 3800 |
| }, |
| { |
| "epoch": 13.047945205479452, |
| "grad_norm": 0.06961599737405777, |
| "learning_rate": 9.46994826448448e-05, |
| "loss": 0.0064, |
| "step": 3810 |
| }, |
| { |
| "epoch": 13.082191780821917, |
| "grad_norm": 0.09865850955247879, |
| "learning_rate": 9.46623765919727e-05, |
| "loss": 0.0074, |
| "step": 3820 |
| }, |
| { |
| "epoch": 13.116438356164384, |
| "grad_norm": 0.06311442703008652, |
| "learning_rate": 9.462514843386845e-05, |
| "loss": 0.0058, |
| "step": 3830 |
| }, |
| { |
| "epoch": 13.150684931506849, |
| "grad_norm": 0.06989779323339462, |
| "learning_rate": 9.458779827231237e-05, |
| "loss": 0.0071, |
| "step": 3840 |
| }, |
| { |
| "epoch": 13.184931506849315, |
| "grad_norm": 0.07357992976903915, |
| "learning_rate": 9.45503262094184e-05, |
| "loss": 0.0062, |
| "step": 3850 |
| }, |
| { |
| "epoch": 13.219178082191782, |
| "grad_norm": 0.07631329447031021, |
| "learning_rate": 9.451273234763371e-05, |
| "loss": 0.0054, |
| "step": 3860 |
| }, |
| { |
| "epoch": 13.253424657534246, |
| "grad_norm": 0.07587280869483948, |
| "learning_rate": 9.447501678973852e-05, |
| "loss": 0.0062, |
| "step": 3870 |
| }, |
| { |
| "epoch": 13.287671232876713, |
| "grad_norm": 0.07038256525993347, |
| "learning_rate": 9.443717963884569e-05, |
| "loss": 0.0067, |
| "step": 3880 |
| }, |
| { |
| "epoch": 13.321917808219178, |
| "grad_norm": 0.06246664375066757, |
| "learning_rate": 9.439922099840054e-05, |
| "loss": 0.0055, |
| "step": 3890 |
| }, |
| { |
| "epoch": 13.356164383561644, |
| "grad_norm": 0.07346326857805252, |
| "learning_rate": 9.43611409721806e-05, |
| "loss": 0.0059, |
| "step": 3900 |
| }, |
| { |
| "epoch": 13.39041095890411, |
| "grad_norm": 0.08977990597486496, |
| "learning_rate": 9.432293966429514e-05, |
| "loss": 0.0063, |
| "step": 3910 |
| }, |
| { |
| "epoch": 13.424657534246576, |
| "grad_norm": 0.06775444746017456, |
| "learning_rate": 9.428461717918511e-05, |
| "loss": 0.0057, |
| "step": 3920 |
| }, |
| { |
| "epoch": 13.45890410958904, |
| "grad_norm": 0.08324276655912399, |
| "learning_rate": 9.424617362162271e-05, |
| "loss": 0.0047, |
| "step": 3930 |
| }, |
| { |
| "epoch": 13.493150684931507, |
| "grad_norm": 0.09544079750776291, |
| "learning_rate": 9.420760909671118e-05, |
| "loss": 0.0066, |
| "step": 3940 |
| }, |
| { |
| "epoch": 13.527397260273972, |
| "grad_norm": 0.09019865840673447, |
| "learning_rate": 9.416892370988444e-05, |
| "loss": 0.0071, |
| "step": 3950 |
| }, |
| { |
| "epoch": 13.561643835616438, |
| "grad_norm": 0.07820634543895721, |
| "learning_rate": 9.413011756690685e-05, |
| "loss": 0.0078, |
| "step": 3960 |
| }, |
| { |
| "epoch": 13.595890410958905, |
| "grad_norm": 0.07065964490175247, |
| "learning_rate": 9.409119077387294e-05, |
| "loss": 0.0079, |
| "step": 3970 |
| }, |
| { |
| "epoch": 13.63013698630137, |
| "grad_norm": 0.07494448125362396, |
| "learning_rate": 9.405214343720707e-05, |
| "loss": 0.0054, |
| "step": 3980 |
| }, |
| { |
| "epoch": 13.664383561643836, |
| "grad_norm": 0.07330919057130814, |
| "learning_rate": 9.401297566366318e-05, |
| "loss": 0.0073, |
| "step": 3990 |
| }, |
| { |
| "epoch": 13.698630136986301, |
| "grad_norm": 0.07755449414253235, |
| "learning_rate": 9.397368756032445e-05, |
| "loss": 0.0053, |
| "step": 4000 |
| }, |
| { |
| "epoch": 13.732876712328768, |
| "grad_norm": 0.0835888609290123, |
| "learning_rate": 9.393427923460308e-05, |
| "loss": 0.0069, |
| "step": 4010 |
| }, |
| { |
| "epoch": 13.767123287671232, |
| "grad_norm": 0.09608691185712814, |
| "learning_rate": 9.389475079423988e-05, |
| "loss": 0.0062, |
| "step": 4020 |
| }, |
| { |
| "epoch": 13.801369863013699, |
| "grad_norm": 0.06515517085790634, |
| "learning_rate": 9.385510234730415e-05, |
| "loss": 0.0049, |
| "step": 4030 |
| }, |
| { |
| "epoch": 13.835616438356164, |
| "grad_norm": 0.07440678030252457, |
| "learning_rate": 9.381533400219318e-05, |
| "loss": 0.0062, |
| "step": 4040 |
| }, |
| { |
| "epoch": 13.86986301369863, |
| "grad_norm": 0.0674986019730568, |
| "learning_rate": 9.377544586763215e-05, |
| "loss": 0.0057, |
| "step": 4050 |
| }, |
| { |
| "epoch": 13.904109589041095, |
| "grad_norm": 0.09010426700115204, |
| "learning_rate": 9.373543805267368e-05, |
| "loss": 0.0084, |
| "step": 4060 |
| }, |
| { |
| "epoch": 13.938356164383562, |
| "grad_norm": 0.09228496253490448, |
| "learning_rate": 9.369531066669758e-05, |
| "loss": 0.0065, |
| "step": 4070 |
| }, |
| { |
| "epoch": 13.972602739726028, |
| "grad_norm": 0.11372701078653336, |
| "learning_rate": 9.365506381941066e-05, |
| "loss": 0.0067, |
| "step": 4080 |
| }, |
| { |
| "epoch": 14.006849315068493, |
| "grad_norm": 0.08348413556814194, |
| "learning_rate": 9.36146976208462e-05, |
| "loss": 0.0065, |
| "step": 4090 |
| }, |
| { |
| "epoch": 14.04109589041096, |
| "grad_norm": 0.0798589214682579, |
| "learning_rate": 9.357421218136386e-05, |
| "loss": 0.0056, |
| "step": 4100 |
| }, |
| { |
| "epoch": 14.075342465753424, |
| "grad_norm": 0.0991537868976593, |
| "learning_rate": 9.353360761164931e-05, |
| "loss": 0.0051, |
| "step": 4110 |
| }, |
| { |
| "epoch": 14.10958904109589, |
| "grad_norm": 0.0866604894399643, |
| "learning_rate": 9.349288402271388e-05, |
| "loss": 0.0049, |
| "step": 4120 |
| }, |
| { |
| "epoch": 14.143835616438356, |
| "grad_norm": 0.07060110569000244, |
| "learning_rate": 9.345204152589428e-05, |
| "loss": 0.0059, |
| "step": 4130 |
| }, |
| { |
| "epoch": 14.178082191780822, |
| "grad_norm": 0.12644457817077637, |
| "learning_rate": 9.341108023285238e-05, |
| "loss": 0.0064, |
| "step": 4140 |
| }, |
| { |
| "epoch": 14.212328767123287, |
| "grad_norm": 0.10066480189561844, |
| "learning_rate": 9.337000025557476e-05, |
| "loss": 0.0053, |
| "step": 4150 |
| }, |
| { |
| "epoch": 14.246575342465754, |
| "grad_norm": 0.06575191020965576, |
| "learning_rate": 9.332880170637252e-05, |
| "loss": 0.0065, |
| "step": 4160 |
| }, |
| { |
| "epoch": 14.280821917808218, |
| "grad_norm": 0.06894955784082413, |
| "learning_rate": 9.328748469788093e-05, |
| "loss": 0.0053, |
| "step": 4170 |
| }, |
| { |
| "epoch": 14.315068493150685, |
| "grad_norm": 0.08669496327638626, |
| "learning_rate": 9.32460493430591e-05, |
| "loss": 0.005, |
| "step": 4180 |
| }, |
| { |
| "epoch": 14.349315068493151, |
| "grad_norm": 0.08158606290817261, |
| "learning_rate": 9.320449575518972e-05, |
| "loss": 0.006, |
| "step": 4190 |
| }, |
| { |
| "epoch": 14.383561643835616, |
| "grad_norm": 0.09702680259943008, |
| "learning_rate": 9.316282404787871e-05, |
| "loss": 0.005, |
| "step": 4200 |
| }, |
| { |
| "epoch": 14.417808219178083, |
| "grad_norm": 0.08489725738763809, |
| "learning_rate": 9.31210343350549e-05, |
| "loss": 0.0059, |
| "step": 4210 |
| }, |
| { |
| "epoch": 14.452054794520548, |
| "grad_norm": 0.08486241847276688, |
| "learning_rate": 9.30791267309698e-05, |
| "loss": 0.0056, |
| "step": 4220 |
| }, |
| { |
| "epoch": 14.486301369863014, |
| "grad_norm": 0.11101726442575455, |
| "learning_rate": 9.30371013501972e-05, |
| "loss": 0.0063, |
| "step": 4230 |
| }, |
| { |
| "epoch": 14.520547945205479, |
| "grad_norm": 0.10619505494832993, |
| "learning_rate": 9.299495830763286e-05, |
| "loss": 0.0063, |
| "step": 4240 |
| }, |
| { |
| "epoch": 14.554794520547945, |
| "grad_norm": 0.08418061584234238, |
| "learning_rate": 9.295269771849427e-05, |
| "loss": 0.0054, |
| "step": 4250 |
| }, |
| { |
| "epoch": 14.58904109589041, |
| "grad_norm": 0.07915527373552322, |
| "learning_rate": 9.291031969832026e-05, |
| "loss": 0.0068, |
| "step": 4260 |
| }, |
| { |
| "epoch": 14.623287671232877, |
| "grad_norm": 0.0827726274728775, |
| "learning_rate": 9.286782436297073e-05, |
| "loss": 0.0077, |
| "step": 4270 |
| }, |
| { |
| "epoch": 14.657534246575342, |
| "grad_norm": 0.07672649621963501, |
| "learning_rate": 9.282521182862629e-05, |
| "loss": 0.0059, |
| "step": 4280 |
| }, |
| { |
| "epoch": 14.691780821917808, |
| "grad_norm": 0.08115788549184799, |
| "learning_rate": 9.278248221178798e-05, |
| "loss": 0.0051, |
| "step": 4290 |
| }, |
| { |
| "epoch": 14.726027397260275, |
| "grad_norm": 0.06016877293586731, |
| "learning_rate": 9.273963562927695e-05, |
| "loss": 0.0053, |
| "step": 4300 |
| }, |
| { |
| "epoch": 14.76027397260274, |
| "grad_norm": 0.06568919122219086, |
| "learning_rate": 9.269667219823412e-05, |
| "loss": 0.0053, |
| "step": 4310 |
| }, |
| { |
| "epoch": 14.794520547945206, |
| "grad_norm": 0.06511034816503525, |
| "learning_rate": 9.265359203611987e-05, |
| "loss": 0.0045, |
| "step": 4320 |
| }, |
| { |
| "epoch": 14.82876712328767, |
| "grad_norm": 0.0742722898721695, |
| "learning_rate": 9.261039526071374e-05, |
| "loss": 0.0067, |
| "step": 4330 |
| }, |
| { |
| "epoch": 14.863013698630137, |
| "grad_norm": 0.0778801292181015, |
| "learning_rate": 9.256708199011401e-05, |
| "loss": 0.007, |
| "step": 4340 |
| }, |
| { |
| "epoch": 14.897260273972602, |
| "grad_norm": 0.08015526086091995, |
| "learning_rate": 9.252365234273755e-05, |
| "loss": 0.0058, |
| "step": 4350 |
| }, |
| { |
| "epoch": 14.931506849315069, |
| "grad_norm": 0.08591359853744507, |
| "learning_rate": 9.248010643731935e-05, |
| "loss": 0.0048, |
| "step": 4360 |
| }, |
| { |
| "epoch": 14.965753424657533, |
| "grad_norm": 0.09805981069803238, |
| "learning_rate": 9.243644439291223e-05, |
| "loss": 0.006, |
| "step": 4370 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.06617870181798935, |
| "learning_rate": 9.239266632888659e-05, |
| "loss": 0.0068, |
| "step": 4380 |
| }, |
| { |
| "epoch": 15.034246575342467, |
| "grad_norm": 0.08026549965143204, |
| "learning_rate": 9.234877236492997e-05, |
| "loss": 0.0049, |
| "step": 4390 |
| }, |
| { |
| "epoch": 15.068493150684931, |
| "grad_norm": 0.081352598965168, |
| "learning_rate": 9.230476262104677e-05, |
| "loss": 0.0062, |
| "step": 4400 |
| }, |
| { |
| "epoch": 15.102739726027398, |
| "grad_norm": 0.07488571107387543, |
| "learning_rate": 9.226063721755799e-05, |
| "loss": 0.0059, |
| "step": 4410 |
| }, |
| { |
| "epoch": 15.136986301369863, |
| "grad_norm": 0.09087924659252167, |
| "learning_rate": 9.221639627510076e-05, |
| "loss": 0.0081, |
| "step": 4420 |
| }, |
| { |
| "epoch": 15.17123287671233, |
| "grad_norm": 0.08181889355182648, |
| "learning_rate": 9.217203991462815e-05, |
| "loss": 0.0072, |
| "step": 4430 |
| }, |
| { |
| "epoch": 15.205479452054794, |
| "grad_norm": 0.0886894091963768, |
| "learning_rate": 9.212756825740873e-05, |
| "loss": 0.0062, |
| "step": 4440 |
| }, |
| { |
| "epoch": 15.23972602739726, |
| "grad_norm": 0.06616362184286118, |
| "learning_rate": 9.208298142502636e-05, |
| "loss": 0.0055, |
| "step": 4450 |
| }, |
| { |
| "epoch": 15.273972602739725, |
| "grad_norm": 0.07078687101602554, |
| "learning_rate": 9.20382795393797e-05, |
| "loss": 0.0058, |
| "step": 4460 |
| }, |
| { |
| "epoch": 15.308219178082192, |
| "grad_norm": 0.09435564279556274, |
| "learning_rate": 9.199346272268199e-05, |
| "loss": 0.0056, |
| "step": 4470 |
| }, |
| { |
| "epoch": 15.342465753424657, |
| "grad_norm": 0.10773882269859314, |
| "learning_rate": 9.194853109746074e-05, |
| "loss": 0.0077, |
| "step": 4480 |
| }, |
| { |
| "epoch": 15.376712328767123, |
| "grad_norm": 0.08089710026979446, |
| "learning_rate": 9.190348478655724e-05, |
| "loss": 0.0069, |
| "step": 4490 |
| }, |
| { |
| "epoch": 15.41095890410959, |
| "grad_norm": 0.0753050446510315, |
| "learning_rate": 9.185832391312644e-05, |
| "loss": 0.0083, |
| "step": 4500 |
| }, |
| { |
| "epoch": 15.445205479452055, |
| "grad_norm": 0.07557237148284912, |
| "learning_rate": 9.18130486006364e-05, |
| "loss": 0.0064, |
| "step": 4510 |
| }, |
| { |
| "epoch": 15.479452054794521, |
| "grad_norm": 0.07062719762325287, |
| "learning_rate": 9.176765897286813e-05, |
| "loss": 0.0079, |
| "step": 4520 |
| }, |
| { |
| "epoch": 15.513698630136986, |
| "grad_norm": 0.05990029498934746, |
| "learning_rate": 9.17221551539151e-05, |
| "loss": 0.006, |
| "step": 4530 |
| }, |
| { |
| "epoch": 15.547945205479452, |
| "grad_norm": 0.08006462454795837, |
| "learning_rate": 9.167653726818305e-05, |
| "loss": 0.0063, |
| "step": 4540 |
| }, |
| { |
| "epoch": 15.582191780821917, |
| "grad_norm": 0.0626312643289566, |
| "learning_rate": 9.163080544038952e-05, |
| "loss": 0.0059, |
| "step": 4550 |
| }, |
| { |
| "epoch": 15.616438356164384, |
| "grad_norm": 0.07849642634391785, |
| "learning_rate": 9.158495979556358e-05, |
| "loss": 0.0062, |
| "step": 4560 |
| }, |
| { |
| "epoch": 15.650684931506849, |
| "grad_norm": 0.06645455211400986, |
| "learning_rate": 9.153900045904549e-05, |
| "loss": 0.0054, |
| "step": 4570 |
| }, |
| { |
| "epoch": 15.684931506849315, |
| "grad_norm": 0.09787298738956451, |
| "learning_rate": 9.14929275564863e-05, |
| "loss": 0.005, |
| "step": 4580 |
| }, |
| { |
| "epoch": 15.719178082191782, |
| "grad_norm": 0.09240013360977173, |
| "learning_rate": 9.144674121384757e-05, |
| "loss": 0.0062, |
| "step": 4590 |
| }, |
| { |
| "epoch": 15.753424657534246, |
| "grad_norm": 0.06534786522388458, |
| "learning_rate": 9.140044155740101e-05, |
| "loss": 0.0059, |
| "step": 4600 |
| }, |
| { |
| "epoch": 15.787671232876713, |
| "grad_norm": 0.0793117806315422, |
| "learning_rate": 9.135402871372808e-05, |
| "loss": 0.0068, |
| "step": 4610 |
| }, |
| { |
| "epoch": 15.821917808219178, |
| "grad_norm": 0.07911541312932968, |
| "learning_rate": 9.130750280971978e-05, |
| "loss": 0.0062, |
| "step": 4620 |
| }, |
| { |
| "epoch": 15.856164383561644, |
| "grad_norm": 0.11696206033229828, |
| "learning_rate": 9.126086397257612e-05, |
| "loss": 0.0051, |
| "step": 4630 |
| }, |
| { |
| "epoch": 15.89041095890411, |
| "grad_norm": 0.09704367071390152, |
| "learning_rate": 9.121411232980588e-05, |
| "loss": 0.0061, |
| "step": 4640 |
| }, |
| { |
| "epoch": 15.924657534246576, |
| "grad_norm": 0.09515467286109924, |
| "learning_rate": 9.116724800922629e-05, |
| "loss": 0.0063, |
| "step": 4650 |
| }, |
| { |
| "epoch": 15.95890410958904, |
| "grad_norm": 0.0956050381064415, |
| "learning_rate": 9.112027113896262e-05, |
| "loss": 0.0051, |
| "step": 4660 |
| }, |
| { |
| "epoch": 15.993150684931507, |
| "grad_norm": 0.06389576941728592, |
| "learning_rate": 9.107318184744781e-05, |
| "loss": 0.0054, |
| "step": 4670 |
| }, |
| { |
| "epoch": 16.027397260273972, |
| "grad_norm": 0.08562270551919937, |
| "learning_rate": 9.102598026342222e-05, |
| "loss": 0.0057, |
| "step": 4680 |
| }, |
| { |
| "epoch": 16.061643835616437, |
| "grad_norm": 0.07711412012577057, |
| "learning_rate": 9.097866651593317e-05, |
| "loss": 0.005, |
| "step": 4690 |
| }, |
| { |
| "epoch": 16.095890410958905, |
| "grad_norm": 0.09755413234233856, |
| "learning_rate": 9.093124073433463e-05, |
| "loss": 0.0052, |
| "step": 4700 |
| }, |
| { |
| "epoch": 16.13013698630137, |
| "grad_norm": 0.09233304858207703, |
| "learning_rate": 9.088370304828685e-05, |
| "loss": 0.0051, |
| "step": 4710 |
| }, |
| { |
| "epoch": 16.164383561643834, |
| "grad_norm": 0.08450305461883545, |
| "learning_rate": 9.083605358775612e-05, |
| "loss": 0.0062, |
| "step": 4720 |
| }, |
| { |
| "epoch": 16.198630136986303, |
| "grad_norm": 0.057008977979421616, |
| "learning_rate": 9.078829248301417e-05, |
| "loss": 0.0061, |
| "step": 4730 |
| }, |
| { |
| "epoch": 16.232876712328768, |
| "grad_norm": 0.06467018276453018, |
| "learning_rate": 9.074041986463808e-05, |
| "loss": 0.006, |
| "step": 4740 |
| }, |
| { |
| "epoch": 16.267123287671232, |
| "grad_norm": 0.08674854785203934, |
| "learning_rate": 9.069243586350975e-05, |
| "loss": 0.0053, |
| "step": 4750 |
| }, |
| { |
| "epoch": 16.301369863013697, |
| "grad_norm": 0.06117716431617737, |
| "learning_rate": 9.064434061081562e-05, |
| "loss": 0.0056, |
| "step": 4760 |
| }, |
| { |
| "epoch": 16.335616438356166, |
| "grad_norm": 0.07046956568956375, |
| "learning_rate": 9.059613423804623e-05, |
| "loss": 0.0049, |
| "step": 4770 |
| }, |
| { |
| "epoch": 16.36986301369863, |
| "grad_norm": 0.11593141406774521, |
| "learning_rate": 9.0547816876996e-05, |
| "loss": 0.0073, |
| "step": 4780 |
| }, |
| { |
| "epoch": 16.404109589041095, |
| "grad_norm": 0.07755231112241745, |
| "learning_rate": 9.049938865976275e-05, |
| "loss": 0.007, |
| "step": 4790 |
| }, |
| { |
| "epoch": 16.438356164383563, |
| "grad_norm": 0.10014522075653076, |
| "learning_rate": 9.045084971874738e-05, |
| "loss": 0.0066, |
| "step": 4800 |
| }, |
| { |
| "epoch": 16.472602739726028, |
| "grad_norm": 0.07113610208034515, |
| "learning_rate": 9.040220018665347e-05, |
| "loss": 0.0056, |
| "step": 4810 |
| }, |
| { |
| "epoch": 16.506849315068493, |
| "grad_norm": 0.08371955156326294, |
| "learning_rate": 9.035344019648702e-05, |
| "loss": 0.0064, |
| "step": 4820 |
| }, |
| { |
| "epoch": 16.541095890410958, |
| "grad_norm": 0.08940327167510986, |
| "learning_rate": 9.030456988155596e-05, |
| "loss": 0.0075, |
| "step": 4830 |
| }, |
| { |
| "epoch": 16.575342465753426, |
| "grad_norm": 0.0665537565946579, |
| "learning_rate": 9.025558937546988e-05, |
| "loss": 0.0043, |
| "step": 4840 |
| }, |
| { |
| "epoch": 16.60958904109589, |
| "grad_norm": 0.07203962653875351, |
| "learning_rate": 9.020649881213958e-05, |
| "loss": 0.0064, |
| "step": 4850 |
| }, |
| { |
| "epoch": 16.643835616438356, |
| "grad_norm": 0.06501701474189758, |
| "learning_rate": 9.015729832577681e-05, |
| "loss": 0.0056, |
| "step": 4860 |
| }, |
| { |
| "epoch": 16.67808219178082, |
| "grad_norm": 0.08230343461036682, |
| "learning_rate": 9.010798805089384e-05, |
| "loss": 0.0054, |
| "step": 4870 |
| }, |
| { |
| "epoch": 16.71232876712329, |
| "grad_norm": 0.08290053904056549, |
| "learning_rate": 9.005856812230304e-05, |
| "loss": 0.0064, |
| "step": 4880 |
| }, |
| { |
| "epoch": 16.746575342465754, |
| "grad_norm": 0.05811435729265213, |
| "learning_rate": 9.000903867511666e-05, |
| "loss": 0.0044, |
| "step": 4890 |
| }, |
| { |
| "epoch": 16.78082191780822, |
| "grad_norm": 0.08605169504880905, |
| "learning_rate": 8.995939984474624e-05, |
| "loss": 0.0052, |
| "step": 4900 |
| }, |
| { |
| "epoch": 16.815068493150687, |
| "grad_norm": 0.08715452998876572, |
| "learning_rate": 8.990965176690252e-05, |
| "loss": 0.0095, |
| "step": 4910 |
| }, |
| { |
| "epoch": 16.84931506849315, |
| "grad_norm": 0.08186474442481995, |
| "learning_rate": 8.98597945775948e-05, |
| "loss": 0.0076, |
| "step": 4920 |
| }, |
| { |
| "epoch": 16.883561643835616, |
| "grad_norm": 0.07201925665140152, |
| "learning_rate": 8.980982841313074e-05, |
| "loss": 0.0063, |
| "step": 4930 |
| }, |
| { |
| "epoch": 16.91780821917808, |
| "grad_norm": 0.08393460512161255, |
| "learning_rate": 8.975975341011596e-05, |
| "loss": 0.0043, |
| "step": 4940 |
| }, |
| { |
| "epoch": 16.95205479452055, |
| "grad_norm": 0.07584802061319351, |
| "learning_rate": 8.970956970545355e-05, |
| "loss": 0.0049, |
| "step": 4950 |
| }, |
| { |
| "epoch": 16.986301369863014, |
| "grad_norm": 0.09148117899894714, |
| "learning_rate": 8.965927743634391e-05, |
| "loss": 0.0051, |
| "step": 4960 |
| }, |
| { |
| "epoch": 17.02054794520548, |
| "grad_norm": 0.07554657012224197, |
| "learning_rate": 8.96088767402841e-05, |
| "loss": 0.0048, |
| "step": 4970 |
| }, |
| { |
| "epoch": 17.054794520547944, |
| "grad_norm": 0.06879697740077972, |
| "learning_rate": 8.955836775506776e-05, |
| "loss": 0.0067, |
| "step": 4980 |
| }, |
| { |
| "epoch": 17.089041095890412, |
| "grad_norm": 0.06374925374984741, |
| "learning_rate": 8.950775061878453e-05, |
| "loss": 0.0045, |
| "step": 4990 |
| }, |
| { |
| "epoch": 17.123287671232877, |
| "grad_norm": 0.07682766765356064, |
| "learning_rate": 8.945702546981969e-05, |
| "loss": 0.0053, |
| "step": 5000 |
| }, |
| { |
| "epoch": 17.15753424657534, |
| "grad_norm": 0.057352907955646515, |
| "learning_rate": 8.940619244685388e-05, |
| "loss": 0.0052, |
| "step": 5010 |
| }, |
| { |
| "epoch": 17.19178082191781, |
| "grad_norm": 0.07238159328699112, |
| "learning_rate": 8.935525168886262e-05, |
| "loss": 0.0058, |
| "step": 5020 |
| }, |
| { |
| "epoch": 17.226027397260275, |
| "grad_norm": 0.06098822504281998, |
| "learning_rate": 8.930420333511606e-05, |
| "loss": 0.0051, |
| "step": 5030 |
| }, |
| { |
| "epoch": 17.26027397260274, |
| "grad_norm": 0.0835380032658577, |
| "learning_rate": 8.92530475251784e-05, |
| "loss": 0.0069, |
| "step": 5040 |
| }, |
| { |
| "epoch": 17.294520547945204, |
| "grad_norm": 0.04726843908429146, |
| "learning_rate": 8.920178439890765e-05, |
| "loss": 0.0049, |
| "step": 5050 |
| }, |
| { |
| "epoch": 17.328767123287673, |
| "grad_norm": 0.09047394245862961, |
| "learning_rate": 8.91504140964553e-05, |
| "loss": 0.0045, |
| "step": 5060 |
| }, |
| { |
| "epoch": 17.363013698630137, |
| "grad_norm": 0.05919186770915985, |
| "learning_rate": 8.909893675826574e-05, |
| "loss": 0.0057, |
| "step": 5070 |
| }, |
| { |
| "epoch": 17.397260273972602, |
| "grad_norm": 0.08238182961940765, |
| "learning_rate": 8.90473525250761e-05, |
| "loss": 0.0053, |
| "step": 5080 |
| }, |
| { |
| "epoch": 17.431506849315067, |
| "grad_norm": 0.0925091803073883, |
| "learning_rate": 8.899566153791566e-05, |
| "loss": 0.0064, |
| "step": 5090 |
| }, |
| { |
| "epoch": 17.465753424657535, |
| "grad_norm": 0.0697120800614357, |
| "learning_rate": 8.894386393810563e-05, |
| "loss": 0.0055, |
| "step": 5100 |
| }, |
| { |
| "epoch": 17.5, |
| "grad_norm": 0.0903661847114563, |
| "learning_rate": 8.889195986725865e-05, |
| "loss": 0.0071, |
| "step": 5110 |
| }, |
| { |
| "epoch": 17.534246575342465, |
| "grad_norm": 0.07673303037881851, |
| "learning_rate": 8.883994946727849e-05, |
| "loss": 0.0049, |
| "step": 5120 |
| }, |
| { |
| "epoch": 17.568493150684933, |
| "grad_norm": 0.07298251241445541, |
| "learning_rate": 8.878783288035957e-05, |
| "loss": 0.006, |
| "step": 5130 |
| }, |
| { |
| "epoch": 17.602739726027398, |
| "grad_norm": 0.08428546786308289, |
| "learning_rate": 8.873561024898668e-05, |
| "loss": 0.0068, |
| "step": 5140 |
| }, |
| { |
| "epoch": 17.636986301369863, |
| "grad_norm": 0.06158934161067009, |
| "learning_rate": 8.868328171593448e-05, |
| "loss": 0.005, |
| "step": 5150 |
| }, |
| { |
| "epoch": 17.671232876712327, |
| "grad_norm": 0.07968197017908096, |
| "learning_rate": 8.863084742426719e-05, |
| "loss": 0.0048, |
| "step": 5160 |
| }, |
| { |
| "epoch": 17.705479452054796, |
| "grad_norm": 0.09236017614603043, |
| "learning_rate": 8.857830751733815e-05, |
| "loss": 0.0055, |
| "step": 5170 |
| }, |
| { |
| "epoch": 17.73972602739726, |
| "grad_norm": 0.0854865089058876, |
| "learning_rate": 8.852566213878947e-05, |
| "loss": 0.008, |
| "step": 5180 |
| }, |
| { |
| "epoch": 17.773972602739725, |
| "grad_norm": 0.06545963138341904, |
| "learning_rate": 8.84729114325516e-05, |
| "loss": 0.0052, |
| "step": 5190 |
| }, |
| { |
| "epoch": 17.80821917808219, |
| "grad_norm": 0.05856500193476677, |
| "learning_rate": 8.842005554284296e-05, |
| "loss": 0.005, |
| "step": 5200 |
| }, |
| { |
| "epoch": 17.84246575342466, |
| "grad_norm": 0.06909248977899551, |
| "learning_rate": 8.836709461416952e-05, |
| "loss": 0.0046, |
| "step": 5210 |
| }, |
| { |
| "epoch": 17.876712328767123, |
| "grad_norm": 0.07687606662511826, |
| "learning_rate": 8.831402879132446e-05, |
| "loss": 0.0064, |
| "step": 5220 |
| }, |
| { |
| "epoch": 17.910958904109588, |
| "grad_norm": 0.08718976378440857, |
| "learning_rate": 8.82608582193877e-05, |
| "loss": 0.0052, |
| "step": 5230 |
| }, |
| { |
| "epoch": 17.945205479452056, |
| "grad_norm": 0.05759764462709427, |
| "learning_rate": 8.820758304372557e-05, |
| "loss": 0.0053, |
| "step": 5240 |
| }, |
| { |
| "epoch": 17.97945205479452, |
| "grad_norm": 0.051869217306375504, |
| "learning_rate": 8.815420340999033e-05, |
| "loss": 0.0048, |
| "step": 5250 |
| }, |
| { |
| "epoch": 18.013698630136986, |
| "grad_norm": 0.10209794342517853, |
| "learning_rate": 8.810071946411989e-05, |
| "loss": 0.0057, |
| "step": 5260 |
| }, |
| { |
| "epoch": 18.04794520547945, |
| "grad_norm": 0.08743973076343536, |
| "learning_rate": 8.804713135233731e-05, |
| "loss": 0.0052, |
| "step": 5270 |
| }, |
| { |
| "epoch": 18.08219178082192, |
| "grad_norm": 0.07851141691207886, |
| "learning_rate": 8.799343922115044e-05, |
| "loss": 0.0055, |
| "step": 5280 |
| }, |
| { |
| "epoch": 18.116438356164384, |
| "grad_norm": 0.05638684332370758, |
| "learning_rate": 8.79396432173515e-05, |
| "loss": 0.0055, |
| "step": 5290 |
| }, |
| { |
| "epoch": 18.15068493150685, |
| "grad_norm": 0.08562038093805313, |
| "learning_rate": 8.788574348801675e-05, |
| "loss": 0.0067, |
| "step": 5300 |
| }, |
| { |
| "epoch": 18.184931506849313, |
| "grad_norm": 0.09454862028360367, |
| "learning_rate": 8.783174018050594e-05, |
| "loss": 0.008, |
| "step": 5310 |
| }, |
| { |
| "epoch": 18.21917808219178, |
| "grad_norm": 0.08692476898431778, |
| "learning_rate": 8.77776334424621e-05, |
| "loss": 0.006, |
| "step": 5320 |
| }, |
| { |
| "epoch": 18.253424657534246, |
| "grad_norm": 0.09937483817338943, |
| "learning_rate": 8.772342342181095e-05, |
| "loss": 0.0052, |
| "step": 5330 |
| }, |
| { |
| "epoch": 18.28767123287671, |
| "grad_norm": 0.10558980703353882, |
| "learning_rate": 8.766911026676064e-05, |
| "loss": 0.0041, |
| "step": 5340 |
| }, |
| { |
| "epoch": 18.32191780821918, |
| "grad_norm": 0.062475964426994324, |
| "learning_rate": 8.761469412580125e-05, |
| "loss": 0.0052, |
| "step": 5350 |
| }, |
| { |
| "epoch": 18.356164383561644, |
| "grad_norm": 0.0724620670080185, |
| "learning_rate": 8.756017514770443e-05, |
| "loss": 0.0052, |
| "step": 5360 |
| }, |
| { |
| "epoch": 18.39041095890411, |
| "grad_norm": 0.057776160538196564, |
| "learning_rate": 8.750555348152298e-05, |
| "loss": 0.0055, |
| "step": 5370 |
| }, |
| { |
| "epoch": 18.424657534246574, |
| "grad_norm": 0.07162385433912277, |
| "learning_rate": 8.745082927659047e-05, |
| "loss": 0.0051, |
| "step": 5380 |
| }, |
| { |
| "epoch": 18.458904109589042, |
| "grad_norm": 0.08095235377550125, |
| "learning_rate": 8.739600268252078e-05, |
| "loss": 0.0061, |
| "step": 5390 |
| }, |
| { |
| "epoch": 18.493150684931507, |
| "grad_norm": 0.06584731489419937, |
| "learning_rate": 8.73410738492077e-05, |
| "loss": 0.0069, |
| "step": 5400 |
| }, |
| { |
| "epoch": 18.527397260273972, |
| "grad_norm": 0.09591003507375717, |
| "learning_rate": 8.728604292682459e-05, |
| "loss": 0.0051, |
| "step": 5410 |
| }, |
| { |
| "epoch": 18.561643835616437, |
| "grad_norm": 0.10172990709543228, |
| "learning_rate": 8.723091006582389e-05, |
| "loss": 0.0056, |
| "step": 5420 |
| }, |
| { |
| "epoch": 18.595890410958905, |
| "grad_norm": 0.06665683537721634, |
| "learning_rate": 8.717567541693673e-05, |
| "loss": 0.0055, |
| "step": 5430 |
| }, |
| { |
| "epoch": 18.63013698630137, |
| "grad_norm": 0.055505841970443726, |
| "learning_rate": 8.71203391311725e-05, |
| "loss": 0.0053, |
| "step": 5440 |
| }, |
| { |
| "epoch": 18.664383561643834, |
| "grad_norm": 0.09754104167222977, |
| "learning_rate": 8.706490135981855e-05, |
| "loss": 0.0053, |
| "step": 5450 |
| }, |
| { |
| "epoch": 18.698630136986303, |
| "grad_norm": 0.08901935815811157, |
| "learning_rate": 8.700936225443959e-05, |
| "loss": 0.006, |
| "step": 5460 |
| }, |
| { |
| "epoch": 18.732876712328768, |
| "grad_norm": 0.057462576776742935, |
| "learning_rate": 8.695372196687743e-05, |
| "loss": 0.0058, |
| "step": 5470 |
| }, |
| { |
| "epoch": 18.767123287671232, |
| "grad_norm": 0.09456628561019897, |
| "learning_rate": 8.689798064925049e-05, |
| "loss": 0.0042, |
| "step": 5480 |
| }, |
| { |
| "epoch": 18.801369863013697, |
| "grad_norm": 0.11748138815164566, |
| "learning_rate": 8.684213845395339e-05, |
| "loss": 0.0051, |
| "step": 5490 |
| }, |
| { |
| "epoch": 18.835616438356166, |
| "grad_norm": 0.07330834865570068, |
| "learning_rate": 8.678619553365659e-05, |
| "loss": 0.0062, |
| "step": 5500 |
| }, |
| { |
| "epoch": 18.86986301369863, |
| "grad_norm": 0.09065715223550797, |
| "learning_rate": 8.673015204130586e-05, |
| "loss": 0.0046, |
| "step": 5510 |
| }, |
| { |
| "epoch": 18.904109589041095, |
| "grad_norm": 0.07880376279354095, |
| "learning_rate": 8.6674008130122e-05, |
| "loss": 0.0052, |
| "step": 5520 |
| }, |
| { |
| "epoch": 18.938356164383563, |
| "grad_norm": 0.07799544185400009, |
| "learning_rate": 8.661776395360029e-05, |
| "loss": 0.0053, |
| "step": 5530 |
| }, |
| { |
| "epoch": 18.972602739726028, |
| "grad_norm": 0.09239964932203293, |
| "learning_rate": 8.656141966551019e-05, |
| "loss": 0.0058, |
| "step": 5540 |
| }, |
| { |
| "epoch": 19.006849315068493, |
| "grad_norm": 0.06592954695224762, |
| "learning_rate": 8.650497541989482e-05, |
| "loss": 0.0049, |
| "step": 5550 |
| }, |
| { |
| "epoch": 19.041095890410958, |
| "grad_norm": 0.05416293814778328, |
| "learning_rate": 8.644843137107059e-05, |
| "loss": 0.0045, |
| "step": 5560 |
| }, |
| { |
| "epoch": 19.075342465753426, |
| "grad_norm": 0.0726301446557045, |
| "learning_rate": 8.639178767362676e-05, |
| "loss": 0.0056, |
| "step": 5570 |
| }, |
| { |
| "epoch": 19.10958904109589, |
| "grad_norm": 0.05670231953263283, |
| "learning_rate": 8.633504448242505e-05, |
| "loss": 0.0052, |
| "step": 5580 |
| }, |
| { |
| "epoch": 19.143835616438356, |
| "grad_norm": 0.062235865741968155, |
| "learning_rate": 8.627820195259918e-05, |
| "loss": 0.0052, |
| "step": 5590 |
| }, |
| { |
| "epoch": 19.17808219178082, |
| "grad_norm": 0.05244023725390434, |
| "learning_rate": 8.622126023955446e-05, |
| "loss": 0.0052, |
| "step": 5600 |
| }, |
| { |
| "epoch": 19.21232876712329, |
| "grad_norm": 0.08390739560127258, |
| "learning_rate": 8.616421949896734e-05, |
| "loss": 0.0058, |
| "step": 5610 |
| }, |
| { |
| "epoch": 19.246575342465754, |
| "grad_norm": 0.05916329845786095, |
| "learning_rate": 8.610707988678503e-05, |
| "loss": 0.0046, |
| "step": 5620 |
| }, |
| { |
| "epoch": 19.28082191780822, |
| "grad_norm": 0.06701207906007767, |
| "learning_rate": 8.604984155922506e-05, |
| "loss": 0.0065, |
| "step": 5630 |
| }, |
| { |
| "epoch": 19.315068493150687, |
| "grad_norm": 0.06987061351537704, |
| "learning_rate": 8.599250467277483e-05, |
| "loss": 0.0064, |
| "step": 5640 |
| }, |
| { |
| "epoch": 19.34931506849315, |
| "grad_norm": 0.060252390801906586, |
| "learning_rate": 8.59350693841912e-05, |
| "loss": 0.0046, |
| "step": 5650 |
| }, |
| { |
| "epoch": 19.383561643835616, |
| "grad_norm": 0.06767981499433517, |
| "learning_rate": 8.587753585050004e-05, |
| "loss": 0.006, |
| "step": 5660 |
| }, |
| { |
| "epoch": 19.41780821917808, |
| "grad_norm": 0.06105669587850571, |
| "learning_rate": 8.581990422899585e-05, |
| "loss": 0.0063, |
| "step": 5670 |
| }, |
| { |
| "epoch": 19.45205479452055, |
| "grad_norm": 0.08388019353151321, |
| "learning_rate": 8.576217467724128e-05, |
| "loss": 0.0052, |
| "step": 5680 |
| }, |
| { |
| "epoch": 19.486301369863014, |
| "grad_norm": 0.10021601617336273, |
| "learning_rate": 8.570434735306671e-05, |
| "loss": 0.0055, |
| "step": 5690 |
| }, |
| { |
| "epoch": 19.52054794520548, |
| "grad_norm": 0.07756322622299194, |
| "learning_rate": 8.564642241456986e-05, |
| "loss": 0.0051, |
| "step": 5700 |
| }, |
| { |
| "epoch": 19.554794520547944, |
| "grad_norm": 0.08748367428779602, |
| "learning_rate": 8.558840002011528e-05, |
| "loss": 0.0067, |
| "step": 5710 |
| }, |
| { |
| "epoch": 19.589041095890412, |
| "grad_norm": 0.09222008287906647, |
| "learning_rate": 8.553028032833397e-05, |
| "loss": 0.0063, |
| "step": 5720 |
| }, |
| { |
| "epoch": 19.623287671232877, |
| "grad_norm": 0.07356042414903641, |
| "learning_rate": 8.547206349812298e-05, |
| "loss": 0.0047, |
| "step": 5730 |
| }, |
| { |
| "epoch": 19.65753424657534, |
| "grad_norm": 0.0655691996216774, |
| "learning_rate": 8.541374968864487e-05, |
| "loss": 0.0062, |
| "step": 5740 |
| }, |
| { |
| "epoch": 19.69178082191781, |
| "grad_norm": 0.09560838341712952, |
| "learning_rate": 8.535533905932738e-05, |
| "loss": 0.006, |
| "step": 5750 |
| }, |
| { |
| "epoch": 19.726027397260275, |
| "grad_norm": 0.11108992248773575, |
| "learning_rate": 8.529683176986295e-05, |
| "loss": 0.0059, |
| "step": 5760 |
| }, |
| { |
| "epoch": 19.76027397260274, |
| "grad_norm": 0.06656841933727264, |
| "learning_rate": 8.523822798020827e-05, |
| "loss": 0.0065, |
| "step": 5770 |
| }, |
| { |
| "epoch": 19.794520547945204, |
| "grad_norm": 0.056710727512836456, |
| "learning_rate": 8.517952785058385e-05, |
| "loss": 0.0051, |
| "step": 5780 |
| }, |
| { |
| "epoch": 19.828767123287673, |
| "grad_norm": 0.0718056783080101, |
| "learning_rate": 8.512073154147362e-05, |
| "loss": 0.0055, |
| "step": 5790 |
| }, |
| { |
| "epoch": 19.863013698630137, |
| "grad_norm": 0.08344350010156631, |
| "learning_rate": 8.506183921362443e-05, |
| "loss": 0.0042, |
| "step": 5800 |
| }, |
| { |
| "epoch": 19.897260273972602, |
| "grad_norm": 0.0831236019730568, |
| "learning_rate": 8.500285102804568e-05, |
| "loss": 0.0063, |
| "step": 5810 |
| }, |
| { |
| "epoch": 19.931506849315067, |
| "grad_norm": 0.08485836535692215, |
| "learning_rate": 8.494376714600878e-05, |
| "loss": 0.0044, |
| "step": 5820 |
| }, |
| { |
| "epoch": 19.965753424657535, |
| "grad_norm": 0.09129001945257187, |
| "learning_rate": 8.488458772904684e-05, |
| "loss": 0.005, |
| "step": 5830 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.060976918786764145, |
| "learning_rate": 8.482531293895412e-05, |
| "loss": 0.0039, |
| "step": 5840 |
| }, |
| { |
| "epoch": 20.034246575342465, |
| "grad_norm": 0.07683663815259933, |
| "learning_rate": 8.476594293778561e-05, |
| "loss": 0.0066, |
| "step": 5850 |
| }, |
| { |
| "epoch": 20.068493150684933, |
| "grad_norm": 0.09113059937953949, |
| "learning_rate": 8.470647788785665e-05, |
| "loss": 0.0057, |
| "step": 5860 |
| }, |
| { |
| "epoch": 20.102739726027398, |
| "grad_norm": 0.060158830136060715, |
| "learning_rate": 8.46469179517424e-05, |
| "loss": 0.0041, |
| "step": 5870 |
| }, |
| { |
| "epoch": 20.136986301369863, |
| "grad_norm": 0.05656911060214043, |
| "learning_rate": 8.458726329227747e-05, |
| "loss": 0.005, |
| "step": 5880 |
| }, |
| { |
| "epoch": 20.171232876712327, |
| "grad_norm": 0.06181691959500313, |
| "learning_rate": 8.452751407255541e-05, |
| "loss": 0.0061, |
| "step": 5890 |
| }, |
| { |
| "epoch": 20.205479452054796, |
| "grad_norm": 0.08877629786729813, |
| "learning_rate": 8.44676704559283e-05, |
| "loss": 0.0056, |
| "step": 5900 |
| }, |
| { |
| "epoch": 20.23972602739726, |
| "grad_norm": 0.08738001435995102, |
| "learning_rate": 8.44077326060063e-05, |
| "loss": 0.0057, |
| "step": 5910 |
| }, |
| { |
| "epoch": 20.273972602739725, |
| "grad_norm": 0.0723235234618187, |
| "learning_rate": 8.434770068665723e-05, |
| "loss": 0.0051, |
| "step": 5920 |
| }, |
| { |
| "epoch": 20.30821917808219, |
| "grad_norm": 0.08437537401914597, |
| "learning_rate": 8.428757486200603e-05, |
| "loss": 0.0062, |
| "step": 5930 |
| }, |
| { |
| "epoch": 20.34246575342466, |
| "grad_norm": 0.08028864115476608, |
| "learning_rate": 8.422735529643444e-05, |
| "loss": 0.0058, |
| "step": 5940 |
| }, |
| { |
| "epoch": 20.376712328767123, |
| "grad_norm": 0.06959905475378036, |
| "learning_rate": 8.416704215458043e-05, |
| "loss": 0.0082, |
| "step": 5950 |
| }, |
| { |
| "epoch": 20.410958904109588, |
| "grad_norm": 0.06425705552101135, |
| "learning_rate": 8.410663560133784e-05, |
| "loss": 0.005, |
| "step": 5960 |
| }, |
| { |
| "epoch": 20.445205479452056, |
| "grad_norm": 0.06436647474765778, |
| "learning_rate": 8.404613580185585e-05, |
| "loss": 0.0049, |
| "step": 5970 |
| }, |
| { |
| "epoch": 20.47945205479452, |
| "grad_norm": 0.057509083300828934, |
| "learning_rate": 8.398554292153866e-05, |
| "loss": 0.0071, |
| "step": 5980 |
| }, |
| { |
| "epoch": 20.513698630136986, |
| "grad_norm": 0.04900655895471573, |
| "learning_rate": 8.392485712604483e-05, |
| "loss": 0.0067, |
| "step": 5990 |
| }, |
| { |
| "epoch": 20.54794520547945, |
| "grad_norm": 0.05448565259575844, |
| "learning_rate": 8.386407858128706e-05, |
| "loss": 0.0058, |
| "step": 6000 |
| }, |
| { |
| "epoch": 20.58219178082192, |
| "grad_norm": 0.06079663708806038, |
| "learning_rate": 8.380320745343153e-05, |
| "loss": 0.0072, |
| "step": 6010 |
| }, |
| { |
| "epoch": 20.616438356164384, |
| "grad_norm": 0.053850773721933365, |
| "learning_rate": 8.37422439088976e-05, |
| "loss": 0.0072, |
| "step": 6020 |
| }, |
| { |
| "epoch": 20.65068493150685, |
| "grad_norm": 0.09747336804866791, |
| "learning_rate": 8.368118811435726e-05, |
| "loss": 0.0046, |
| "step": 6030 |
| }, |
| { |
| "epoch": 20.684931506849313, |
| "grad_norm": 0.09227459877729416, |
| "learning_rate": 8.362004023673474e-05, |
| "loss": 0.0053, |
| "step": 6040 |
| }, |
| { |
| "epoch": 20.71917808219178, |
| "grad_norm": 0.061509717255830765, |
| "learning_rate": 8.355880044320598e-05, |
| "loss": 0.0047, |
| "step": 6050 |
| }, |
| { |
| "epoch": 20.753424657534246, |
| "grad_norm": 0.06945177167654037, |
| "learning_rate": 8.349746890119826e-05, |
| "loss": 0.0058, |
| "step": 6060 |
| }, |
| { |
| "epoch": 20.78767123287671, |
| "grad_norm": 0.05989925190806389, |
| "learning_rate": 8.343604577838964e-05, |
| "loss": 0.0043, |
| "step": 6070 |
| }, |
| { |
| "epoch": 20.82191780821918, |
| "grad_norm": 0.049721136689186096, |
| "learning_rate": 8.337453124270863e-05, |
| "loss": 0.0055, |
| "step": 6080 |
| }, |
| { |
| "epoch": 20.856164383561644, |
| "grad_norm": 0.05695385858416557, |
| "learning_rate": 8.331292546233362e-05, |
| "loss": 0.0051, |
| "step": 6090 |
| }, |
| { |
| "epoch": 20.89041095890411, |
| "grad_norm": 0.06528819352388382, |
| "learning_rate": 8.32512286056924e-05, |
| "loss": 0.0055, |
| "step": 6100 |
| }, |
| { |
| "epoch": 20.924657534246574, |
| "grad_norm": 0.09410160034894943, |
| "learning_rate": 8.318944084146192e-05, |
| "loss": 0.0048, |
| "step": 6110 |
| }, |
| { |
| "epoch": 20.958904109589042, |
| "grad_norm": 0.05946965143084526, |
| "learning_rate": 8.31275623385675e-05, |
| "loss": 0.0047, |
| "step": 6120 |
| }, |
| { |
| "epoch": 20.993150684931507, |
| "grad_norm": 0.07674143463373184, |
| "learning_rate": 8.306559326618259e-05, |
| "loss": 0.0054, |
| "step": 6130 |
| }, |
| { |
| "epoch": 21.027397260273972, |
| "grad_norm": 0.079231858253479, |
| "learning_rate": 8.300353379372834e-05, |
| "loss": 0.0066, |
| "step": 6140 |
| }, |
| { |
| "epoch": 21.061643835616437, |
| "grad_norm": 0.07016433775424957, |
| "learning_rate": 8.29413840908729e-05, |
| "loss": 0.005, |
| "step": 6150 |
| }, |
| { |
| "epoch": 21.095890410958905, |
| "grad_norm": 0.07434679567813873, |
| "learning_rate": 8.287914432753123e-05, |
| "loss": 0.0049, |
| "step": 6160 |
| }, |
| { |
| "epoch": 21.13013698630137, |
| "grad_norm": 0.06633684784173965, |
| "learning_rate": 8.281681467386446e-05, |
| "loss": 0.0066, |
| "step": 6170 |
| }, |
| { |
| "epoch": 21.164383561643834, |
| "grad_norm": 0.0985659658908844, |
| "learning_rate": 8.275439530027948e-05, |
| "loss": 0.0072, |
| "step": 6180 |
| }, |
| { |
| "epoch": 21.198630136986303, |
| "grad_norm": 0.07376008480787277, |
| "learning_rate": 8.269188637742846e-05, |
| "loss": 0.0061, |
| "step": 6190 |
| }, |
| { |
| "epoch": 21.232876712328768, |
| "grad_norm": 0.08632157742977142, |
| "learning_rate": 8.262928807620843e-05, |
| "loss": 0.0056, |
| "step": 6200 |
| }, |
| { |
| "epoch": 21.267123287671232, |
| "grad_norm": 0.05637221783399582, |
| "learning_rate": 8.256660056776076e-05, |
| "loss": 0.0052, |
| "step": 6210 |
| }, |
| { |
| "epoch": 21.301369863013697, |
| "grad_norm": 0.07299555838108063, |
| "learning_rate": 8.250382402347065e-05, |
| "loss": 0.0044, |
| "step": 6220 |
| }, |
| { |
| "epoch": 21.335616438356166, |
| "grad_norm": 0.07100588828325272, |
| "learning_rate": 8.244095861496686e-05, |
| "loss": 0.008, |
| "step": 6230 |
| }, |
| { |
| "epoch": 21.36986301369863, |
| "grad_norm": 0.05591908469796181, |
| "learning_rate": 8.237800451412095e-05, |
| "loss": 0.0059, |
| "step": 6240 |
| }, |
| { |
| "epoch": 21.404109589041095, |
| "grad_norm": 0.06881851702928543, |
| "learning_rate": 8.231496189304704e-05, |
| "loss": 0.0044, |
| "step": 6250 |
| }, |
| { |
| "epoch": 21.438356164383563, |
| "grad_norm": 0.07660767436027527, |
| "learning_rate": 8.225183092410128e-05, |
| "loss": 0.0047, |
| "step": 6260 |
| }, |
| { |
| "epoch": 21.472602739726028, |
| "grad_norm": 0.07003331184387207, |
| "learning_rate": 8.218861177988129e-05, |
| "loss": 0.0057, |
| "step": 6270 |
| }, |
| { |
| "epoch": 21.506849315068493, |
| "grad_norm": 0.06258975714445114, |
| "learning_rate": 8.212530463322583e-05, |
| "loss": 0.0045, |
| "step": 6280 |
| }, |
| { |
| "epoch": 21.541095890410958, |
| "grad_norm": 0.07067704945802689, |
| "learning_rate": 8.206190965721419e-05, |
| "loss": 0.0045, |
| "step": 6290 |
| }, |
| { |
| "epoch": 21.575342465753426, |
| "grad_norm": 0.08280424028635025, |
| "learning_rate": 8.199842702516583e-05, |
| "loss": 0.0053, |
| "step": 6300 |
| }, |
| { |
| "epoch": 21.60958904109589, |
| "grad_norm": 0.0606718584895134, |
| "learning_rate": 8.193485691063985e-05, |
| "loss": 0.0054, |
| "step": 6310 |
| }, |
| { |
| "epoch": 21.643835616438356, |
| "grad_norm": 0.06702598184347153, |
| "learning_rate": 8.18711994874345e-05, |
| "loss": 0.0045, |
| "step": 6320 |
| }, |
| { |
| "epoch": 21.67808219178082, |
| "grad_norm": 0.06098796799778938, |
| "learning_rate": 8.180745492958674e-05, |
| "loss": 0.0039, |
| "step": 6330 |
| }, |
| { |
| "epoch": 21.71232876712329, |
| "grad_norm": 0.06085795536637306, |
| "learning_rate": 8.174362341137177e-05, |
| "loss": 0.0046, |
| "step": 6340 |
| }, |
| { |
| "epoch": 21.746575342465754, |
| "grad_norm": 0.06964936852455139, |
| "learning_rate": 8.167970510730253e-05, |
| "loss": 0.0047, |
| "step": 6350 |
| }, |
| { |
| "epoch": 21.78082191780822, |
| "grad_norm": 0.05623121187090874, |
| "learning_rate": 8.161570019212921e-05, |
| "loss": 0.0045, |
| "step": 6360 |
| }, |
| { |
| "epoch": 21.815068493150687, |
| "grad_norm": 0.049630485475063324, |
| "learning_rate": 8.155160884083881e-05, |
| "loss": 0.0041, |
| "step": 6370 |
| }, |
| { |
| "epoch": 21.84931506849315, |
| "grad_norm": 0.06835552304983139, |
| "learning_rate": 8.148743122865463e-05, |
| "loss": 0.0052, |
| "step": 6380 |
| }, |
| { |
| "epoch": 21.883561643835616, |
| "grad_norm": 0.07761190831661224, |
| "learning_rate": 8.14231675310358e-05, |
| "loss": 0.0061, |
| "step": 6390 |
| }, |
| { |
| "epoch": 21.91780821917808, |
| "grad_norm": 0.05890411511063576, |
| "learning_rate": 8.135881792367686e-05, |
| "loss": 0.0044, |
| "step": 6400 |
| }, |
| { |
| "epoch": 21.95205479452055, |
| "grad_norm": 0.06293132156133652, |
| "learning_rate": 8.129438258250712e-05, |
| "loss": 0.0046, |
| "step": 6410 |
| }, |
| { |
| "epoch": 21.986301369863014, |
| "grad_norm": 0.06598832458257675, |
| "learning_rate": 8.12298616836904e-05, |
| "loss": 0.0047, |
| "step": 6420 |
| }, |
| { |
| "epoch": 22.02054794520548, |
| "grad_norm": 0.0738854929804802, |
| "learning_rate": 8.116525540362434e-05, |
| "loss": 0.0049, |
| "step": 6430 |
| }, |
| { |
| "epoch": 22.054794520547944, |
| "grad_norm": 0.0818733349442482, |
| "learning_rate": 8.110056391894005e-05, |
| "loss": 0.0039, |
| "step": 6440 |
| }, |
| { |
| "epoch": 22.089041095890412, |
| "grad_norm": 0.07615378499031067, |
| "learning_rate": 8.103578740650156e-05, |
| "loss": 0.0055, |
| "step": 6450 |
| }, |
| { |
| "epoch": 22.123287671232877, |
| "grad_norm": 0.0781155526638031, |
| "learning_rate": 8.097092604340542e-05, |
| "loss": 0.0045, |
| "step": 6460 |
| }, |
| { |
| "epoch": 22.15753424657534, |
| "grad_norm": 0.065615713596344, |
| "learning_rate": 8.090598000698009e-05, |
| "loss": 0.0055, |
| "step": 6470 |
| }, |
| { |
| "epoch": 22.19178082191781, |
| "grad_norm": 0.06982345879077911, |
| "learning_rate": 8.084094947478556e-05, |
| "loss": 0.0046, |
| "step": 6480 |
| }, |
| { |
| "epoch": 22.226027397260275, |
| "grad_norm": 0.04483773186802864, |
| "learning_rate": 8.077583462461283e-05, |
| "loss": 0.0045, |
| "step": 6490 |
| }, |
| { |
| "epoch": 22.26027397260274, |
| "grad_norm": 0.08611573278903961, |
| "learning_rate": 8.07106356344834e-05, |
| "loss": 0.0069, |
| "step": 6500 |
| }, |
| { |
| "epoch": 22.294520547945204, |
| "grad_norm": 0.08743693679571152, |
| "learning_rate": 8.064535268264883e-05, |
| "loss": 0.0052, |
| "step": 6510 |
| }, |
| { |
| "epoch": 22.328767123287673, |
| "grad_norm": 0.07372572273015976, |
| "learning_rate": 8.057998594759022e-05, |
| "loss": 0.0067, |
| "step": 6520 |
| }, |
| { |
| "epoch": 22.363013698630137, |
| "grad_norm": 0.07083258777856827, |
| "learning_rate": 8.051453560801772e-05, |
| "loss": 0.0057, |
| "step": 6530 |
| }, |
| { |
| "epoch": 22.397260273972602, |
| "grad_norm": 0.06519380211830139, |
| "learning_rate": 8.044900184287007e-05, |
| "loss": 0.0041, |
| "step": 6540 |
| }, |
| { |
| "epoch": 22.431506849315067, |
| "grad_norm": 0.07042599469423294, |
| "learning_rate": 8.038338483131407e-05, |
| "loss": 0.0038, |
| "step": 6550 |
| }, |
| { |
| "epoch": 22.465753424657535, |
| "grad_norm": 0.07242155075073242, |
| "learning_rate": 8.031768475274413e-05, |
| "loss": 0.0053, |
| "step": 6560 |
| }, |
| { |
| "epoch": 22.5, |
| "grad_norm": 0.06891541928052902, |
| "learning_rate": 8.025190178678175e-05, |
| "loss": 0.0064, |
| "step": 6570 |
| }, |
| { |
| "epoch": 22.534246575342465, |
| "grad_norm": 0.06691320240497589, |
| "learning_rate": 8.018603611327504e-05, |
| "loss": 0.0057, |
| "step": 6580 |
| }, |
| { |
| "epoch": 22.568493150684933, |
| "grad_norm": 0.06496941298246384, |
| "learning_rate": 8.012008791229826e-05, |
| "loss": 0.0054, |
| "step": 6590 |
| }, |
| { |
| "epoch": 22.602739726027398, |
| "grad_norm": 0.06711320579051971, |
| "learning_rate": 8.005405736415126e-05, |
| "loss": 0.0061, |
| "step": 6600 |
| }, |
| { |
| "epoch": 22.636986301369863, |
| "grad_norm": 0.05645699426531792, |
| "learning_rate": 7.998794464935904e-05, |
| "loss": 0.0056, |
| "step": 6610 |
| }, |
| { |
| "epoch": 22.671232876712327, |
| "grad_norm": 0.11775130033493042, |
| "learning_rate": 7.992174994867123e-05, |
| "loss": 0.0075, |
| "step": 6620 |
| }, |
| { |
| "epoch": 22.705479452054796, |
| "grad_norm": 0.05640696734189987, |
| "learning_rate": 7.985547344306161e-05, |
| "loss": 0.005, |
| "step": 6630 |
| }, |
| { |
| "epoch": 22.73972602739726, |
| "grad_norm": 0.08137935400009155, |
| "learning_rate": 7.978911531372765e-05, |
| "loss": 0.0046, |
| "step": 6640 |
| }, |
| { |
| "epoch": 22.773972602739725, |
| "grad_norm": 0.07216529548168182, |
| "learning_rate": 7.972267574208991e-05, |
| "loss": 0.0045, |
| "step": 6650 |
| }, |
| { |
| "epoch": 22.80821917808219, |
| "grad_norm": 0.07081134617328644, |
| "learning_rate": 7.965615490979163e-05, |
| "loss": 0.0047, |
| "step": 6660 |
| }, |
| { |
| "epoch": 22.84246575342466, |
| "grad_norm": 0.08713933825492859, |
| "learning_rate": 7.958955299869825e-05, |
| "loss": 0.0049, |
| "step": 6670 |
| }, |
| { |
| "epoch": 22.876712328767123, |
| "grad_norm": 0.06303700804710388, |
| "learning_rate": 7.952287019089685e-05, |
| "loss": 0.0045, |
| "step": 6680 |
| }, |
| { |
| "epoch": 22.910958904109588, |
| "grad_norm": 0.09050445258617401, |
| "learning_rate": 7.945610666869568e-05, |
| "loss": 0.005, |
| "step": 6690 |
| }, |
| { |
| "epoch": 22.945205479452056, |
| "grad_norm": 0.07567279040813446, |
| "learning_rate": 7.938926261462366e-05, |
| "loss": 0.0043, |
| "step": 6700 |
| }, |
| { |
| "epoch": 22.97945205479452, |
| "grad_norm": 0.06795069575309753, |
| "learning_rate": 7.932233821142987e-05, |
| "loss": 0.0041, |
| "step": 6710 |
| }, |
| { |
| "epoch": 23.013698630136986, |
| "grad_norm": 0.07449833303689957, |
| "learning_rate": 7.925533364208309e-05, |
| "loss": 0.0063, |
| "step": 6720 |
| }, |
| { |
| "epoch": 23.04794520547945, |
| "grad_norm": 0.07487854361534119, |
| "learning_rate": 7.918824908977123e-05, |
| "loss": 0.0055, |
| "step": 6730 |
| }, |
| { |
| "epoch": 23.08219178082192, |
| "grad_norm": 0.07336029410362244, |
| "learning_rate": 7.912108473790092e-05, |
| "loss": 0.0063, |
| "step": 6740 |
| }, |
| { |
| "epoch": 23.116438356164384, |
| "grad_norm": 0.05453534796833992, |
| "learning_rate": 7.905384077009693e-05, |
| "loss": 0.0038, |
| "step": 6750 |
| }, |
| { |
| "epoch": 23.15068493150685, |
| "grad_norm": 0.08057594299316406, |
| "learning_rate": 7.898651737020166e-05, |
| "loss": 0.0046, |
| "step": 6760 |
| }, |
| { |
| "epoch": 23.184931506849313, |
| "grad_norm": 0.07083872705698013, |
| "learning_rate": 7.891911472227478e-05, |
| "loss": 0.0038, |
| "step": 6770 |
| }, |
| { |
| "epoch": 23.21917808219178, |
| "grad_norm": 0.07410766929388046, |
| "learning_rate": 7.88516330105925e-05, |
| "loss": 0.0057, |
| "step": 6780 |
| }, |
| { |
| "epoch": 23.253424657534246, |
| "grad_norm": 0.0587015375494957, |
| "learning_rate": 7.878407241964729e-05, |
| "loss": 0.0039, |
| "step": 6790 |
| }, |
| { |
| "epoch": 23.28767123287671, |
| "grad_norm": 0.05226041004061699, |
| "learning_rate": 7.871643313414718e-05, |
| "loss": 0.0041, |
| "step": 6800 |
| }, |
| { |
| "epoch": 23.32191780821918, |
| "grad_norm": 0.06701230257749557, |
| "learning_rate": 7.864871533901544e-05, |
| "loss": 0.0048, |
| "step": 6810 |
| }, |
| { |
| "epoch": 23.356164383561644, |
| "grad_norm": 0.046900056302547455, |
| "learning_rate": 7.858091921938988e-05, |
| "loss": 0.0051, |
| "step": 6820 |
| }, |
| { |
| "epoch": 23.39041095890411, |
| "grad_norm": 0.07751727104187012, |
| "learning_rate": 7.851304496062254e-05, |
| "loss": 0.0048, |
| "step": 6830 |
| }, |
| { |
| "epoch": 23.424657534246574, |
| "grad_norm": 0.06238679215312004, |
| "learning_rate": 7.844509274827907e-05, |
| "loss": 0.0051, |
| "step": 6840 |
| }, |
| { |
| "epoch": 23.458904109589042, |
| "grad_norm": 0.060652438551187515, |
| "learning_rate": 7.837706276813819e-05, |
| "loss": 0.0048, |
| "step": 6850 |
| }, |
| { |
| "epoch": 23.493150684931507, |
| "grad_norm": 0.0687926784157753, |
| "learning_rate": 7.830895520619128e-05, |
| "loss": 0.0059, |
| "step": 6860 |
| }, |
| { |
| "epoch": 23.527397260273972, |
| "grad_norm": 0.08176784217357635, |
| "learning_rate": 7.824077024864179e-05, |
| "loss": 0.0053, |
| "step": 6870 |
| }, |
| { |
| "epoch": 23.561643835616437, |
| "grad_norm": 0.05597813054919243, |
| "learning_rate": 7.817250808190483e-05, |
| "loss": 0.0035, |
| "step": 6880 |
| }, |
| { |
| "epoch": 23.595890410958905, |
| "grad_norm": 0.06768350303173065, |
| "learning_rate": 7.810416889260653e-05, |
| "loss": 0.0056, |
| "step": 6890 |
| }, |
| { |
| "epoch": 23.63013698630137, |
| "grad_norm": 0.08265560865402222, |
| "learning_rate": 7.803575286758364e-05, |
| "loss": 0.0045, |
| "step": 6900 |
| }, |
| { |
| "epoch": 23.664383561643834, |
| "grad_norm": 0.06400995701551437, |
| "learning_rate": 7.796726019388295e-05, |
| "loss": 0.0047, |
| "step": 6910 |
| }, |
| { |
| "epoch": 23.698630136986303, |
| "grad_norm": 0.06078376621007919, |
| "learning_rate": 7.789869105876083e-05, |
| "loss": 0.0039, |
| "step": 6920 |
| }, |
| { |
| "epoch": 23.732876712328768, |
| "grad_norm": 0.08402138203382492, |
| "learning_rate": 7.783004564968263e-05, |
| "loss": 0.0054, |
| "step": 6930 |
| }, |
| { |
| "epoch": 23.767123287671232, |
| "grad_norm": 0.05891257897019386, |
| "learning_rate": 7.776132415432234e-05, |
| "loss": 0.0052, |
| "step": 6940 |
| }, |
| { |
| "epoch": 23.801369863013697, |
| "grad_norm": 0.04951402544975281, |
| "learning_rate": 7.769252676056187e-05, |
| "loss": 0.0046, |
| "step": 6950 |
| }, |
| { |
| "epoch": 23.835616438356166, |
| "grad_norm": 0.06751031428575516, |
| "learning_rate": 7.762365365649067e-05, |
| "loss": 0.0036, |
| "step": 6960 |
| }, |
| { |
| "epoch": 23.86986301369863, |
| "grad_norm": 0.052420638501644135, |
| "learning_rate": 7.755470503040516e-05, |
| "loss": 0.0046, |
| "step": 6970 |
| }, |
| { |
| "epoch": 23.904109589041095, |
| "grad_norm": 0.05422347038984299, |
| "learning_rate": 7.748568107080832e-05, |
| "loss": 0.0049, |
| "step": 6980 |
| }, |
| { |
| "epoch": 23.938356164383563, |
| "grad_norm": 0.08441081643104553, |
| "learning_rate": 7.741658196640892e-05, |
| "loss": 0.0067, |
| "step": 6990 |
| }, |
| { |
| "epoch": 23.972602739726028, |
| "grad_norm": 0.08261924237012863, |
| "learning_rate": 7.734740790612136e-05, |
| "loss": 0.0078, |
| "step": 7000 |
| }, |
| { |
| "epoch": 24.006849315068493, |
| "grad_norm": 0.06718011945486069, |
| "learning_rate": 7.727815907906481e-05, |
| "loss": 0.0049, |
| "step": 7010 |
| }, |
| { |
| "epoch": 24.041095890410958, |
| "grad_norm": 0.07961238920688629, |
| "learning_rate": 7.720883567456298e-05, |
| "loss": 0.0051, |
| "step": 7020 |
| }, |
| { |
| "epoch": 24.075342465753426, |
| "grad_norm": 0.06769448518753052, |
| "learning_rate": 7.713943788214337e-05, |
| "loss": 0.0061, |
| "step": 7030 |
| }, |
| { |
| "epoch": 24.10958904109589, |
| "grad_norm": 0.06722130626440048, |
| "learning_rate": 7.70699658915369e-05, |
| "loss": 0.005, |
| "step": 7040 |
| }, |
| { |
| "epoch": 24.143835616438356, |
| "grad_norm": 0.07311046123504639, |
| "learning_rate": 7.700041989267736e-05, |
| "loss": 0.0052, |
| "step": 7050 |
| }, |
| { |
| "epoch": 24.17808219178082, |
| "grad_norm": 0.0782022699713707, |
| "learning_rate": 7.693080007570084e-05, |
| "loss": 0.0045, |
| "step": 7060 |
| }, |
| { |
| "epoch": 24.21232876712329, |
| "grad_norm": 0.06442761421203613, |
| "learning_rate": 7.686110663094525e-05, |
| "loss": 0.0044, |
| "step": 7070 |
| }, |
| { |
| "epoch": 24.246575342465754, |
| "grad_norm": 0.05376974493265152, |
| "learning_rate": 7.679133974894983e-05, |
| "loss": 0.0051, |
| "step": 7080 |
| }, |
| { |
| "epoch": 24.28082191780822, |
| "grad_norm": 0.06022098660469055, |
| "learning_rate": 7.672149962045457e-05, |
| "loss": 0.0063, |
| "step": 7090 |
| }, |
| { |
| "epoch": 24.315068493150687, |
| "grad_norm": 0.07531668990850449, |
| "learning_rate": 7.66515864363997e-05, |
| "loss": 0.0057, |
| "step": 7100 |
| }, |
| { |
| "epoch": 24.34931506849315, |
| "grad_norm": 0.06881547719240189, |
| "learning_rate": 7.658160038792518e-05, |
| "loss": 0.0041, |
| "step": 7110 |
| }, |
| { |
| "epoch": 24.383561643835616, |
| "grad_norm": 0.06569792330265045, |
| "learning_rate": 7.651154166637025e-05, |
| "loss": 0.0053, |
| "step": 7120 |
| }, |
| { |
| "epoch": 24.41780821917808, |
| "grad_norm": 0.09814415872097015, |
| "learning_rate": 7.644141046327271e-05, |
| "loss": 0.0049, |
| "step": 7130 |
| }, |
| { |
| "epoch": 24.45205479452055, |
| "grad_norm": 0.04979328811168671, |
| "learning_rate": 7.637120697036866e-05, |
| "loss": 0.0051, |
| "step": 7140 |
| }, |
| { |
| "epoch": 24.486301369863014, |
| "grad_norm": 0.09558571875095367, |
| "learning_rate": 7.630093137959171e-05, |
| "loss": 0.0065, |
| "step": 7150 |
| }, |
| { |
| "epoch": 24.52054794520548, |
| "grad_norm": 0.07612846791744232, |
| "learning_rate": 7.623058388307269e-05, |
| "loss": 0.0052, |
| "step": 7160 |
| }, |
| { |
| "epoch": 24.554794520547944, |
| "grad_norm": 0.08981167525053024, |
| "learning_rate": 7.616016467313891e-05, |
| "loss": 0.0049, |
| "step": 7170 |
| }, |
| { |
| "epoch": 24.589041095890412, |
| "grad_norm": 0.08244021236896515, |
| "learning_rate": 7.608967394231387e-05, |
| "loss": 0.0049, |
| "step": 7180 |
| }, |
| { |
| "epoch": 24.623287671232877, |
| "grad_norm": 0.049214284867048264, |
| "learning_rate": 7.60191118833165e-05, |
| "loss": 0.0037, |
| "step": 7190 |
| }, |
| { |
| "epoch": 24.65753424657534, |
| "grad_norm": 0.061441682279109955, |
| "learning_rate": 7.594847868906076e-05, |
| "loss": 0.0054, |
| "step": 7200 |
| }, |
| { |
| "epoch": 24.69178082191781, |
| "grad_norm": 0.07628575712442398, |
| "learning_rate": 7.587777455265515e-05, |
| "loss": 0.0056, |
| "step": 7210 |
| }, |
| { |
| "epoch": 24.726027397260275, |
| "grad_norm": 0.06337640434503555, |
| "learning_rate": 7.580699966740201e-05, |
| "loss": 0.0038, |
| "step": 7220 |
| }, |
| { |
| "epoch": 24.76027397260274, |
| "grad_norm": 0.05385832488536835, |
| "learning_rate": 7.573615422679726e-05, |
| "loss": 0.0043, |
| "step": 7230 |
| }, |
| { |
| "epoch": 24.794520547945204, |
| "grad_norm": 0.06283440440893173, |
| "learning_rate": 7.566523842452958e-05, |
| "loss": 0.0048, |
| "step": 7240 |
| }, |
| { |
| "epoch": 24.828767123287673, |
| "grad_norm": 0.04975597560405731, |
| "learning_rate": 7.559425245448006e-05, |
| "loss": 0.0049, |
| "step": 7250 |
| }, |
| { |
| "epoch": 24.863013698630137, |
| "grad_norm": 0.07069192081689835, |
| "learning_rate": 7.552319651072164e-05, |
| "loss": 0.0049, |
| "step": 7260 |
| }, |
| { |
| "epoch": 24.897260273972602, |
| "grad_norm": 0.07121792435646057, |
| "learning_rate": 7.545207078751857e-05, |
| "loss": 0.0052, |
| "step": 7270 |
| }, |
| { |
| "epoch": 24.931506849315067, |
| "grad_norm": 0.09325384348630905, |
| "learning_rate": 7.538087547932585e-05, |
| "loss": 0.0045, |
| "step": 7280 |
| }, |
| { |
| "epoch": 24.965753424657535, |
| "grad_norm": 0.08620842546224594, |
| "learning_rate": 7.530961078078873e-05, |
| "loss": 0.0046, |
| "step": 7290 |
| }, |
| { |
| "epoch": 25.0, |
| "grad_norm": 0.06410706043243408, |
| "learning_rate": 7.52382768867422e-05, |
| "loss": 0.0042, |
| "step": 7300 |
| }, |
| { |
| "epoch": 25.034246575342465, |
| "grad_norm": 0.07789020985364914, |
| "learning_rate": 7.516687399221037e-05, |
| "loss": 0.0046, |
| "step": 7310 |
| }, |
| { |
| "epoch": 25.068493150684933, |
| "grad_norm": 0.059597231447696686, |
| "learning_rate": 7.509540229240601e-05, |
| "loss": 0.0048, |
| "step": 7320 |
| }, |
| { |
| "epoch": 25.102739726027398, |
| "grad_norm": 0.05253349244594574, |
| "learning_rate": 7.50238619827301e-05, |
| "loss": 0.0055, |
| "step": 7330 |
| }, |
| { |
| "epoch": 25.136986301369863, |
| "grad_norm": 0.06278624385595322, |
| "learning_rate": 7.495225325877103e-05, |
| "loss": 0.0046, |
| "step": 7340 |
| }, |
| { |
| "epoch": 25.171232876712327, |
| "grad_norm": 0.07764960080385208, |
| "learning_rate": 7.488057631630437e-05, |
| "loss": 0.0046, |
| "step": 7350 |
| }, |
| { |
| "epoch": 25.205479452054796, |
| "grad_norm": 0.09635750204324722, |
| "learning_rate": 7.480883135129211e-05, |
| "loss": 0.0039, |
| "step": 7360 |
| }, |
| { |
| "epoch": 25.23972602739726, |
| "grad_norm": 0.08985763788223267, |
| "learning_rate": 7.473701855988227e-05, |
| "loss": 0.0057, |
| "step": 7370 |
| }, |
| { |
| "epoch": 25.273972602739725, |
| "grad_norm": 0.05667169764637947, |
| "learning_rate": 7.466513813840825e-05, |
| "loss": 0.0042, |
| "step": 7380 |
| }, |
| { |
| "epoch": 25.30821917808219, |
| "grad_norm": 0.050024062395095825, |
| "learning_rate": 7.45931902833884e-05, |
| "loss": 0.0044, |
| "step": 7390 |
| }, |
| { |
| "epoch": 25.34246575342466, |
| "grad_norm": 0.05679883807897568, |
| "learning_rate": 7.452117519152542e-05, |
| "loss": 0.0045, |
| "step": 7400 |
| }, |
| { |
| "epoch": 25.376712328767123, |
| "grad_norm": 0.07050922513008118, |
| "learning_rate": 7.444909305970578e-05, |
| "loss": 0.0047, |
| "step": 7410 |
| }, |
| { |
| "epoch": 25.410958904109588, |
| "grad_norm": 0.059567615389823914, |
| "learning_rate": 7.437694408499933e-05, |
| "loss": 0.0054, |
| "step": 7420 |
| }, |
| { |
| "epoch": 25.445205479452056, |
| "grad_norm": 0.10416945815086365, |
| "learning_rate": 7.430472846465856e-05, |
| "loss": 0.0062, |
| "step": 7430 |
| }, |
| { |
| "epoch": 25.47945205479452, |
| "grad_norm": 0.09317982196807861, |
| "learning_rate": 7.423244639611826e-05, |
| "loss": 0.0061, |
| "step": 7440 |
| }, |
| { |
| "epoch": 25.513698630136986, |
| "grad_norm": 0.06658624857664108, |
| "learning_rate": 7.416009807699482e-05, |
| "loss": 0.0064, |
| "step": 7450 |
| }, |
| { |
| "epoch": 25.54794520547945, |
| "grad_norm": 0.07275442034006119, |
| "learning_rate": 7.408768370508576e-05, |
| "loss": 0.007, |
| "step": 7460 |
| }, |
| { |
| "epoch": 25.58219178082192, |
| "grad_norm": 0.09432009607553482, |
| "learning_rate": 7.401520347836926e-05, |
| "loss": 0.0077, |
| "step": 7470 |
| }, |
| { |
| "epoch": 25.616438356164384, |
| "grad_norm": 0.06725318729877472, |
| "learning_rate": 7.394265759500348e-05, |
| "loss": 0.0049, |
| "step": 7480 |
| }, |
| { |
| "epoch": 25.65068493150685, |
| "grad_norm": 0.07666585594415665, |
| "learning_rate": 7.387004625332608e-05, |
| "loss": 0.0078, |
| "step": 7490 |
| }, |
| { |
| "epoch": 25.684931506849313, |
| "grad_norm": 0.07260438054800034, |
| "learning_rate": 7.379736965185368e-05, |
| "loss": 0.0052, |
| "step": 7500 |
| }, |
| { |
| "epoch": 25.71917808219178, |
| "grad_norm": 0.060822173953056335, |
| "learning_rate": 7.372462798928137e-05, |
| "loss": 0.0049, |
| "step": 7510 |
| }, |
| { |
| "epoch": 25.753424657534246, |
| "grad_norm": 0.049856215715408325, |
| "learning_rate": 7.365182146448205e-05, |
| "loss": 0.0036, |
| "step": 7520 |
| }, |
| { |
| "epoch": 25.78767123287671, |
| "grad_norm": 0.04413225129246712, |
| "learning_rate": 7.357895027650598e-05, |
| "loss": 0.004, |
| "step": 7530 |
| }, |
| { |
| "epoch": 25.82191780821918, |
| "grad_norm": 0.07596855610609055, |
| "learning_rate": 7.350601462458024e-05, |
| "loss": 0.0045, |
| "step": 7540 |
| }, |
| { |
| "epoch": 25.856164383561644, |
| "grad_norm": 0.05063284933567047, |
| "learning_rate": 7.343301470810808e-05, |
| "loss": 0.0042, |
| "step": 7550 |
| }, |
| { |
| "epoch": 25.89041095890411, |
| "grad_norm": 0.05205608904361725, |
| "learning_rate": 7.335995072666848e-05, |
| "loss": 0.0052, |
| "step": 7560 |
| }, |
| { |
| "epoch": 25.924657534246574, |
| "grad_norm": 0.0644666850566864, |
| "learning_rate": 7.328682288001561e-05, |
| "loss": 0.0054, |
| "step": 7570 |
| }, |
| { |
| "epoch": 25.958904109589042, |
| "grad_norm": 0.042358387261629105, |
| "learning_rate": 7.32136313680782e-05, |
| "loss": 0.0055, |
| "step": 7580 |
| }, |
| { |
| "epoch": 25.993150684931507, |
| "grad_norm": 0.0498831570148468, |
| "learning_rate": 7.3140376390959e-05, |
| "loss": 0.005, |
| "step": 7590 |
| }, |
| { |
| "epoch": 26.027397260273972, |
| "grad_norm": 0.059835679829120636, |
| "learning_rate": 7.30670581489344e-05, |
| "loss": 0.005, |
| "step": 7600 |
| }, |
| { |
| "epoch": 26.061643835616437, |
| "grad_norm": 0.052039116621017456, |
| "learning_rate": 7.299367684245362e-05, |
| "loss": 0.005, |
| "step": 7610 |
| }, |
| { |
| "epoch": 26.095890410958905, |
| "grad_norm": 0.06602521985769272, |
| "learning_rate": 7.292023267213835e-05, |
| "loss": 0.0044, |
| "step": 7620 |
| }, |
| { |
| "epoch": 26.13013698630137, |
| "grad_norm": 0.05572016164660454, |
| "learning_rate": 7.284672583878219e-05, |
| "loss": 0.0046, |
| "step": 7630 |
| }, |
| { |
| "epoch": 26.164383561643834, |
| "grad_norm": 0.06919296830892563, |
| "learning_rate": 7.277315654334997e-05, |
| "loss": 0.005, |
| "step": 7640 |
| }, |
| { |
| "epoch": 26.198630136986303, |
| "grad_norm": 0.056638121604919434, |
| "learning_rate": 7.269952498697734e-05, |
| "loss": 0.004, |
| "step": 7650 |
| }, |
| { |
| "epoch": 26.232876712328768, |
| "grad_norm": 0.060026634484529495, |
| "learning_rate": 7.262583137097018e-05, |
| "loss": 0.0041, |
| "step": 7660 |
| }, |
| { |
| "epoch": 26.267123287671232, |
| "grad_norm": 0.07666108757257462, |
| "learning_rate": 7.255207589680402e-05, |
| "loss": 0.0044, |
| "step": 7670 |
| }, |
| { |
| "epoch": 26.301369863013697, |
| "grad_norm": 0.05837415158748627, |
| "learning_rate": 7.247825876612353e-05, |
| "loss": 0.0048, |
| "step": 7680 |
| }, |
| { |
| "epoch": 26.335616438356166, |
| "grad_norm": 0.04758793115615845, |
| "learning_rate": 7.240438018074189e-05, |
| "loss": 0.0066, |
| "step": 7690 |
| }, |
| { |
| "epoch": 26.36986301369863, |
| "grad_norm": 0.0517563596367836, |
| "learning_rate": 7.233044034264034e-05, |
| "loss": 0.0054, |
| "step": 7700 |
| }, |
| { |
| "epoch": 26.404109589041095, |
| "grad_norm": 0.06696631014347076, |
| "learning_rate": 7.225643945396757e-05, |
| "loss": 0.0046, |
| "step": 7710 |
| }, |
| { |
| "epoch": 26.438356164383563, |
| "grad_norm": 0.06236398220062256, |
| "learning_rate": 7.218237771703921e-05, |
| "loss": 0.0059, |
| "step": 7720 |
| }, |
| { |
| "epoch": 26.472602739726028, |
| "grad_norm": 0.04172263294458389, |
| "learning_rate": 7.210825533433719e-05, |
| "loss": 0.0049, |
| "step": 7730 |
| }, |
| { |
| "epoch": 26.506849315068493, |
| "grad_norm": 0.07122749090194702, |
| "learning_rate": 7.203407250850928e-05, |
| "loss": 0.006, |
| "step": 7740 |
| }, |
| { |
| "epoch": 26.541095890410958, |
| "grad_norm": 0.06881573796272278, |
| "learning_rate": 7.195982944236851e-05, |
| "loss": 0.0078, |
| "step": 7750 |
| }, |
| { |
| "epoch": 26.575342465753426, |
| "grad_norm": 0.10515818744897842, |
| "learning_rate": 7.188552633889259e-05, |
| "loss": 0.0055, |
| "step": 7760 |
| }, |
| { |
| "epoch": 26.60958904109589, |
| "grad_norm": 0.06753715127706528, |
| "learning_rate": 7.181116340122336e-05, |
| "loss": 0.0046, |
| "step": 7770 |
| }, |
| { |
| "epoch": 26.643835616438356, |
| "grad_norm": 0.08554303646087646, |
| "learning_rate": 7.173674083266624e-05, |
| "loss": 0.0048, |
| "step": 7780 |
| }, |
| { |
| "epoch": 26.67808219178082, |
| "grad_norm": 0.062416162341833115, |
| "learning_rate": 7.166225883668969e-05, |
| "loss": 0.0041, |
| "step": 7790 |
| }, |
| { |
| "epoch": 26.71232876712329, |
| "grad_norm": 0.06461095809936523, |
| "learning_rate": 7.158771761692464e-05, |
| "loss": 0.0054, |
| "step": 7800 |
| }, |
| { |
| "epoch": 26.746575342465754, |
| "grad_norm": 0.05914429947733879, |
| "learning_rate": 7.151311737716397e-05, |
| "loss": 0.0044, |
| "step": 7810 |
| }, |
| { |
| "epoch": 26.78082191780822, |
| "grad_norm": 0.06104607135057449, |
| "learning_rate": 7.143845832136188e-05, |
| "loss": 0.0051, |
| "step": 7820 |
| }, |
| { |
| "epoch": 26.815068493150687, |
| "grad_norm": 0.08471526950597763, |
| "learning_rate": 7.136374065363334e-05, |
| "loss": 0.0051, |
| "step": 7830 |
| }, |
| { |
| "epoch": 26.84931506849315, |
| "grad_norm": 0.05899347364902496, |
| "learning_rate": 7.128896457825364e-05, |
| "loss": 0.0046, |
| "step": 7840 |
| }, |
| { |
| "epoch": 26.883561643835616, |
| "grad_norm": 0.05316589027643204, |
| "learning_rate": 7.121413029965769e-05, |
| "loss": 0.0036, |
| "step": 7850 |
| }, |
| { |
| "epoch": 26.91780821917808, |
| "grad_norm": 0.06834858655929565, |
| "learning_rate": 7.113923802243957e-05, |
| "loss": 0.0047, |
| "step": 7860 |
| }, |
| { |
| "epoch": 26.95205479452055, |
| "grad_norm": 0.06230799853801727, |
| "learning_rate": 7.10642879513519e-05, |
| "loss": 0.0048, |
| "step": 7870 |
| }, |
| { |
| "epoch": 26.986301369863014, |
| "grad_norm": 0.06642285734415054, |
| "learning_rate": 7.09892802913053e-05, |
| "loss": 0.0052, |
| "step": 7880 |
| }, |
| { |
| "epoch": 27.02054794520548, |
| "grad_norm": 0.053017787635326385, |
| "learning_rate": 7.091421524736784e-05, |
| "loss": 0.0051, |
| "step": 7890 |
| }, |
| { |
| "epoch": 27.054794520547944, |
| "grad_norm": 0.07587441056966782, |
| "learning_rate": 7.083909302476453e-05, |
| "loss": 0.0042, |
| "step": 7900 |
| }, |
| { |
| "epoch": 27.089041095890412, |
| "grad_norm": 0.042078837752342224, |
| "learning_rate": 7.076391382887661e-05, |
| "loss": 0.006, |
| "step": 7910 |
| }, |
| { |
| "epoch": 27.123287671232877, |
| "grad_norm": 0.07141551375389099, |
| "learning_rate": 7.068867786524116e-05, |
| "loss": 0.0048, |
| "step": 7920 |
| }, |
| { |
| "epoch": 27.15753424657534, |
| "grad_norm": 0.06363295763731003, |
| "learning_rate": 7.061338533955043e-05, |
| "loss": 0.0048, |
| "step": 7930 |
| }, |
| { |
| "epoch": 27.19178082191781, |
| "grad_norm": 0.06564045697450638, |
| "learning_rate": 7.053803645765128e-05, |
| "loss": 0.0048, |
| "step": 7940 |
| }, |
| { |
| "epoch": 27.226027397260275, |
| "grad_norm": 0.06791261583566666, |
| "learning_rate": 7.04626314255447e-05, |
| "loss": 0.0063, |
| "step": 7950 |
| }, |
| { |
| "epoch": 27.26027397260274, |
| "grad_norm": 0.1112477108836174, |
| "learning_rate": 7.038717044938519e-05, |
| "loss": 0.0065, |
| "step": 7960 |
| }, |
| { |
| "epoch": 27.294520547945204, |
| "grad_norm": 0.09073132276535034, |
| "learning_rate": 7.031165373548014e-05, |
| "loss": 0.005, |
| "step": 7970 |
| }, |
| { |
| "epoch": 27.328767123287673, |
| "grad_norm": 0.06326626986265182, |
| "learning_rate": 7.023608149028937e-05, |
| "loss": 0.0058, |
| "step": 7980 |
| }, |
| { |
| "epoch": 27.363013698630137, |
| "grad_norm": 0.06269578635692596, |
| "learning_rate": 7.016045392042452e-05, |
| "loss": 0.0047, |
| "step": 7990 |
| }, |
| { |
| "epoch": 27.397260273972602, |
| "grad_norm": 0.07107697427272797, |
| "learning_rate": 7.008477123264848e-05, |
| "loss": 0.0055, |
| "step": 8000 |
| }, |
| { |
| "epoch": 27.431506849315067, |
| "grad_norm": 0.05553178861737251, |
| "learning_rate": 7.000903363387482e-05, |
| "loss": 0.006, |
| "step": 8010 |
| }, |
| { |
| "epoch": 27.465753424657535, |
| "grad_norm": 0.054099246859550476, |
| "learning_rate": 6.993324133116726e-05, |
| "loss": 0.0044, |
| "step": 8020 |
| }, |
| { |
| "epoch": 27.5, |
| "grad_norm": 0.0510900542140007, |
| "learning_rate": 6.985739453173903e-05, |
| "loss": 0.005, |
| "step": 8030 |
| }, |
| { |
| "epoch": 27.534246575342465, |
| "grad_norm": 0.061171576380729675, |
| "learning_rate": 6.978149344295242e-05, |
| "loss": 0.0043, |
| "step": 8040 |
| }, |
| { |
| "epoch": 27.568493150684933, |
| "grad_norm": 0.06363870948553085, |
| "learning_rate": 6.97055382723181e-05, |
| "loss": 0.0039, |
| "step": 8050 |
| }, |
| { |
| "epoch": 27.602739726027398, |
| "grad_norm": 0.061849091202020645, |
| "learning_rate": 6.962952922749457e-05, |
| "loss": 0.0037, |
| "step": 8060 |
| }, |
| { |
| "epoch": 27.636986301369863, |
| "grad_norm": 0.0676080584526062, |
| "learning_rate": 6.955346651628771e-05, |
| "loss": 0.0044, |
| "step": 8070 |
| }, |
| { |
| "epoch": 27.671232876712327, |
| "grad_norm": 0.0678730309009552, |
| "learning_rate": 6.947735034665002e-05, |
| "loss": 0.0045, |
| "step": 8080 |
| }, |
| { |
| "epoch": 27.705479452054796, |
| "grad_norm": 0.05755266547203064, |
| "learning_rate": 6.940118092668022e-05, |
| "loss": 0.0045, |
| "step": 8090 |
| }, |
| { |
| "epoch": 27.73972602739726, |
| "grad_norm": 0.05419217795133591, |
| "learning_rate": 6.932495846462261e-05, |
| "loss": 0.0043, |
| "step": 8100 |
| }, |
| { |
| "epoch": 27.773972602739725, |
| "grad_norm": 0.04824934899806976, |
| "learning_rate": 6.924868316886649e-05, |
| "loss": 0.0046, |
| "step": 8110 |
| }, |
| { |
| "epoch": 27.80821917808219, |
| "grad_norm": 0.06754368543624878, |
| "learning_rate": 6.917235524794558e-05, |
| "loss": 0.0046, |
| "step": 8120 |
| }, |
| { |
| "epoch": 27.84246575342466, |
| "grad_norm": 0.07079575955867767, |
| "learning_rate": 6.909597491053751e-05, |
| "loss": 0.005, |
| "step": 8130 |
| }, |
| { |
| "epoch": 27.876712328767123, |
| "grad_norm": 0.07714605331420898, |
| "learning_rate": 6.901954236546323e-05, |
| "loss": 0.0053, |
| "step": 8140 |
| }, |
| { |
| "epoch": 27.910958904109588, |
| "grad_norm": 0.07691439241170883, |
| "learning_rate": 6.894305782168638e-05, |
| "loss": 0.0039, |
| "step": 8150 |
| }, |
| { |
| "epoch": 27.945205479452056, |
| "grad_norm": 0.05573904141783714, |
| "learning_rate": 6.886652148831279e-05, |
| "loss": 0.0051, |
| "step": 8160 |
| }, |
| { |
| "epoch": 27.97945205479452, |
| "grad_norm": 0.042645107954740524, |
| "learning_rate": 6.878993357458986e-05, |
| "loss": 0.0036, |
| "step": 8170 |
| }, |
| { |
| "epoch": 28.013698630136986, |
| "grad_norm": 0.0758211612701416, |
| "learning_rate": 6.871329428990602e-05, |
| "loss": 0.0054, |
| "step": 8180 |
| }, |
| { |
| "epoch": 28.04794520547945, |
| "grad_norm": 0.06774193048477173, |
| "learning_rate": 6.863660384379017e-05, |
| "loss": 0.0033, |
| "step": 8190 |
| }, |
| { |
| "epoch": 28.08219178082192, |
| "grad_norm": 0.06408020853996277, |
| "learning_rate": 6.855986244591104e-05, |
| "loss": 0.0051, |
| "step": 8200 |
| }, |
| { |
| "epoch": 28.116438356164384, |
| "grad_norm": 0.06723714619874954, |
| "learning_rate": 6.84830703060767e-05, |
| "loss": 0.0049, |
| "step": 8210 |
| }, |
| { |
| "epoch": 28.15068493150685, |
| "grad_norm": 0.06539978086948395, |
| "learning_rate": 6.840622763423391e-05, |
| "loss": 0.0034, |
| "step": 8220 |
| }, |
| { |
| "epoch": 28.184931506849313, |
| "grad_norm": 0.06268458813428879, |
| "learning_rate": 6.83293346404676e-05, |
| "loss": 0.0049, |
| "step": 8230 |
| }, |
| { |
| "epoch": 28.21917808219178, |
| "grad_norm": 0.060107748955488205, |
| "learning_rate": 6.825239153500029e-05, |
| "loss": 0.0045, |
| "step": 8240 |
| }, |
| { |
| "epoch": 28.253424657534246, |
| "grad_norm": 0.08829207718372345, |
| "learning_rate": 6.817539852819149e-05, |
| "loss": 0.0041, |
| "step": 8250 |
| }, |
| { |
| "epoch": 28.28767123287671, |
| "grad_norm": 0.07106557488441467, |
| "learning_rate": 6.809835583053715e-05, |
| "loss": 0.0038, |
| "step": 8260 |
| }, |
| { |
| "epoch": 28.32191780821918, |
| "grad_norm": 0.04838179796934128, |
| "learning_rate": 6.802126365266905e-05, |
| "loss": 0.0044, |
| "step": 8270 |
| }, |
| { |
| "epoch": 28.356164383561644, |
| "grad_norm": 0.07035278528928757, |
| "learning_rate": 6.794412220535426e-05, |
| "loss": 0.004, |
| "step": 8280 |
| }, |
| { |
| "epoch": 28.39041095890411, |
| "grad_norm": 0.06344835460186005, |
| "learning_rate": 6.786693169949455e-05, |
| "loss": 0.0048, |
| "step": 8290 |
| }, |
| { |
| "epoch": 28.424657534246574, |
| "grad_norm": 0.07208611816167831, |
| "learning_rate": 6.778969234612584e-05, |
| "loss": 0.0042, |
| "step": 8300 |
| }, |
| { |
| "epoch": 28.458904109589042, |
| "grad_norm": 0.06950279325246811, |
| "learning_rate": 6.771240435641754e-05, |
| "loss": 0.0036, |
| "step": 8310 |
| }, |
| { |
| "epoch": 28.493150684931507, |
| "grad_norm": 0.06592731922864914, |
| "learning_rate": 6.763506794167208e-05, |
| "loss": 0.0051, |
| "step": 8320 |
| }, |
| { |
| "epoch": 28.527397260273972, |
| "grad_norm": 0.0695786327123642, |
| "learning_rate": 6.755768331332424e-05, |
| "loss": 0.0061, |
| "step": 8330 |
| }, |
| { |
| "epoch": 28.561643835616437, |
| "grad_norm": 0.05047084018588066, |
| "learning_rate": 6.748025068294067e-05, |
| "loss": 0.0053, |
| "step": 8340 |
| }, |
| { |
| "epoch": 28.595890410958905, |
| "grad_norm": 0.07383386045694351, |
| "learning_rate": 6.740277026221923e-05, |
| "loss": 0.0038, |
| "step": 8350 |
| }, |
| { |
| "epoch": 28.63013698630137, |
| "grad_norm": 0.05435343086719513, |
| "learning_rate": 6.732524226298841e-05, |
| "loss": 0.0042, |
| "step": 8360 |
| }, |
| { |
| "epoch": 28.664383561643834, |
| "grad_norm": 0.04976963996887207, |
| "learning_rate": 6.72476668972068e-05, |
| "loss": 0.005, |
| "step": 8370 |
| }, |
| { |
| "epoch": 28.698630136986303, |
| "grad_norm": 0.0554593987762928, |
| "learning_rate": 6.71700443769625e-05, |
| "loss": 0.005, |
| "step": 8380 |
| }, |
| { |
| "epoch": 28.732876712328768, |
| "grad_norm": 0.055551644414663315, |
| "learning_rate": 6.709237491447249e-05, |
| "loss": 0.0053, |
| "step": 8390 |
| }, |
| { |
| "epoch": 28.767123287671232, |
| "grad_norm": 0.05606789514422417, |
| "learning_rate": 6.701465872208216e-05, |
| "loss": 0.004, |
| "step": 8400 |
| }, |
| { |
| "epoch": 28.801369863013697, |
| "grad_norm": 0.05831046402454376, |
| "learning_rate": 6.693689601226458e-05, |
| "loss": 0.0032, |
| "step": 8410 |
| }, |
| { |
| "epoch": 28.835616438356166, |
| "grad_norm": 0.05428626015782356, |
| "learning_rate": 6.685908699762002e-05, |
| "loss": 0.0043, |
| "step": 8420 |
| }, |
| { |
| "epoch": 28.86986301369863, |
| "grad_norm": 0.05782606080174446, |
| "learning_rate": 6.67812318908754e-05, |
| "loss": 0.0045, |
| "step": 8430 |
| }, |
| { |
| "epoch": 28.904109589041095, |
| "grad_norm": 0.05124809592962265, |
| "learning_rate": 6.670333090488356e-05, |
| "loss": 0.0044, |
| "step": 8440 |
| }, |
| { |
| "epoch": 28.938356164383563, |
| "grad_norm": 0.06171569600701332, |
| "learning_rate": 6.662538425262285e-05, |
| "loss": 0.0046, |
| "step": 8450 |
| }, |
| { |
| "epoch": 28.972602739726028, |
| "grad_norm": 0.07845284789800644, |
| "learning_rate": 6.654739214719641e-05, |
| "loss": 0.0055, |
| "step": 8460 |
| }, |
| { |
| "epoch": 29.006849315068493, |
| "grad_norm": 0.06284799426794052, |
| "learning_rate": 6.646935480183173e-05, |
| "loss": 0.0038, |
| "step": 8470 |
| }, |
| { |
| "epoch": 29.041095890410958, |
| "grad_norm": 0.045351553708314896, |
| "learning_rate": 6.639127242987988e-05, |
| "loss": 0.006, |
| "step": 8480 |
| }, |
| { |
| "epoch": 29.075342465753426, |
| "grad_norm": 0.05155348405241966, |
| "learning_rate": 6.631314524481513e-05, |
| "loss": 0.0049, |
| "step": 8490 |
| }, |
| { |
| "epoch": 29.10958904109589, |
| "grad_norm": 0.04898626729846001, |
| "learning_rate": 6.623497346023418e-05, |
| "loss": 0.0042, |
| "step": 8500 |
| }, |
| { |
| "epoch": 29.143835616438356, |
| "grad_norm": 0.06170529127120972, |
| "learning_rate": 6.615675728985572e-05, |
| "loss": 0.0063, |
| "step": 8510 |
| }, |
| { |
| "epoch": 29.17808219178082, |
| "grad_norm": 0.0662521943449974, |
| "learning_rate": 6.607849694751977e-05, |
| "loss": 0.0043, |
| "step": 8520 |
| }, |
| { |
| "epoch": 29.21232876712329, |
| "grad_norm": 0.057435885071754456, |
| "learning_rate": 6.600019264718713e-05, |
| "loss": 0.0053, |
| "step": 8530 |
| }, |
| { |
| "epoch": 29.246575342465754, |
| "grad_norm": 0.05537862703204155, |
| "learning_rate": 6.592184460293877e-05, |
| "loss": 0.0039, |
| "step": 8540 |
| }, |
| { |
| "epoch": 29.28082191780822, |
| "grad_norm": 0.05145610123872757, |
| "learning_rate": 6.584345302897523e-05, |
| "loss": 0.005, |
| "step": 8550 |
| }, |
| { |
| "epoch": 29.315068493150687, |
| "grad_norm": 0.06142713874578476, |
| "learning_rate": 6.576501813961609e-05, |
| "loss": 0.0045, |
| "step": 8560 |
| }, |
| { |
| "epoch": 29.34931506849315, |
| "grad_norm": 0.05496953800320625, |
| "learning_rate": 6.568654014929932e-05, |
| "loss": 0.0049, |
| "step": 8570 |
| }, |
| { |
| "epoch": 29.383561643835616, |
| "grad_norm": 0.057442113757133484, |
| "learning_rate": 6.56080192725808e-05, |
| "loss": 0.0049, |
| "step": 8580 |
| }, |
| { |
| "epoch": 29.41780821917808, |
| "grad_norm": 0.07670370489358902, |
| "learning_rate": 6.552945572413358e-05, |
| "loss": 0.004, |
| "step": 8590 |
| }, |
| { |
| "epoch": 29.45205479452055, |
| "grad_norm": 0.06951688230037689, |
| "learning_rate": 6.545084971874738e-05, |
| "loss": 0.0046, |
| "step": 8600 |
| }, |
| { |
| "epoch": 29.486301369863014, |
| "grad_norm": 0.0623633898794651, |
| "learning_rate": 6.537220147132805e-05, |
| "loss": 0.0038, |
| "step": 8610 |
| }, |
| { |
| "epoch": 29.52054794520548, |
| "grad_norm": 0.06844084709882736, |
| "learning_rate": 6.529351119689688e-05, |
| "loss": 0.0055, |
| "step": 8620 |
| }, |
| { |
| "epoch": 29.554794520547944, |
| "grad_norm": 0.0466805100440979, |
| "learning_rate": 6.521477911059008e-05, |
| "loss": 0.0043, |
| "step": 8630 |
| }, |
| { |
| "epoch": 29.589041095890412, |
| "grad_norm": 0.0462033748626709, |
| "learning_rate": 6.513600542765817e-05, |
| "loss": 0.0041, |
| "step": 8640 |
| }, |
| { |
| "epoch": 29.623287671232877, |
| "grad_norm": 0.04172181710600853, |
| "learning_rate": 6.505719036346539e-05, |
| "loss": 0.0039, |
| "step": 8650 |
| }, |
| { |
| "epoch": 29.65753424657534, |
| "grad_norm": 0.059904795140028, |
| "learning_rate": 6.497833413348909e-05, |
| "loss": 0.004, |
| "step": 8660 |
| }, |
| { |
| "epoch": 29.69178082191781, |
| "grad_norm": 0.04163758084177971, |
| "learning_rate": 6.489943695331923e-05, |
| "loss": 0.0048, |
| "step": 8670 |
| }, |
| { |
| "epoch": 29.726027397260275, |
| "grad_norm": 0.06971383094787598, |
| "learning_rate": 6.48204990386577e-05, |
| "loss": 0.0046, |
| "step": 8680 |
| }, |
| { |
| "epoch": 29.76027397260274, |
| "grad_norm": 0.04700847715139389, |
| "learning_rate": 6.474152060531768e-05, |
| "loss": 0.0043, |
| "step": 8690 |
| }, |
| { |
| "epoch": 29.794520547945204, |
| "grad_norm": 0.06670918315649033, |
| "learning_rate": 6.466250186922325e-05, |
| "loss": 0.0058, |
| "step": 8700 |
| }, |
| { |
| "epoch": 29.828767123287673, |
| "grad_norm": 0.05440834164619446, |
| "learning_rate": 6.458344304640858e-05, |
| "loss": 0.0042, |
| "step": 8710 |
| }, |
| { |
| "epoch": 29.863013698630137, |
| "grad_norm": 0.0671214759349823, |
| "learning_rate": 6.450434435301751e-05, |
| "loss": 0.0043, |
| "step": 8720 |
| }, |
| { |
| "epoch": 29.897260273972602, |
| "grad_norm": 0.05730283632874489, |
| "learning_rate": 6.44252060053028e-05, |
| "loss": 0.0045, |
| "step": 8730 |
| }, |
| { |
| "epoch": 29.931506849315067, |
| "grad_norm": 0.06602773815393448, |
| "learning_rate": 6.43460282196257e-05, |
| "loss": 0.0048, |
| "step": 8740 |
| }, |
| { |
| "epoch": 29.965753424657535, |
| "grad_norm": 0.07988374680280685, |
| "learning_rate": 6.426681121245527e-05, |
| "loss": 0.0046, |
| "step": 8750 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 0.06527750939130783, |
| "learning_rate": 6.418755520036775e-05, |
| "loss": 0.0038, |
| "step": 8760 |
| }, |
| { |
| "epoch": 30.034246575342465, |
| "grad_norm": 0.04482458159327507, |
| "learning_rate": 6.410826040004607e-05, |
| "loss": 0.0043, |
| "step": 8770 |
| }, |
| { |
| "epoch": 30.068493150684933, |
| "grad_norm": 0.06171128898859024, |
| "learning_rate": 6.402892702827916e-05, |
| "loss": 0.0038, |
| "step": 8780 |
| }, |
| { |
| "epoch": 30.102739726027398, |
| "grad_norm": 0.06300436705350876, |
| "learning_rate": 6.394955530196147e-05, |
| "loss": 0.0042, |
| "step": 8790 |
| }, |
| { |
| "epoch": 30.136986301369863, |
| "grad_norm": 0.07563607394695282, |
| "learning_rate": 6.387014543809223e-05, |
| "loss": 0.0043, |
| "step": 8800 |
| }, |
| { |
| "epoch": 30.171232876712327, |
| "grad_norm": 0.05031699687242508, |
| "learning_rate": 6.3790697653775e-05, |
| "loss": 0.0042, |
| "step": 8810 |
| }, |
| { |
| "epoch": 30.205479452054796, |
| "grad_norm": 0.05471353605389595, |
| "learning_rate": 6.371121216621698e-05, |
| "loss": 0.0051, |
| "step": 8820 |
| }, |
| { |
| "epoch": 30.23972602739726, |
| "grad_norm": 0.04786813631653786, |
| "learning_rate": 6.363168919272846e-05, |
| "loss": 0.003, |
| "step": 8830 |
| }, |
| { |
| "epoch": 30.273972602739725, |
| "grad_norm": 0.0538649708032608, |
| "learning_rate": 6.355212895072223e-05, |
| "loss": 0.005, |
| "step": 8840 |
| }, |
| { |
| "epoch": 30.30821917808219, |
| "grad_norm": 0.07921342551708221, |
| "learning_rate": 6.34725316577129e-05, |
| "loss": 0.0046, |
| "step": 8850 |
| }, |
| { |
| "epoch": 30.34246575342466, |
| "grad_norm": 0.058982837945222855, |
| "learning_rate": 6.339289753131649e-05, |
| "loss": 0.0045, |
| "step": 8860 |
| }, |
| { |
| "epoch": 30.376712328767123, |
| "grad_norm": 0.07950620353221893, |
| "learning_rate": 6.331322678924962e-05, |
| "loss": 0.0053, |
| "step": 8870 |
| }, |
| { |
| "epoch": 30.410958904109588, |
| "grad_norm": 0.10326464474201202, |
| "learning_rate": 6.323351964932908e-05, |
| "loss": 0.006, |
| "step": 8880 |
| }, |
| { |
| "epoch": 30.445205479452056, |
| "grad_norm": 0.05070188269019127, |
| "learning_rate": 6.315377632947115e-05, |
| "loss": 0.0048, |
| "step": 8890 |
| }, |
| { |
| "epoch": 30.47945205479452, |
| "grad_norm": 0.07777089625597, |
| "learning_rate": 6.307399704769099e-05, |
| "loss": 0.0045, |
| "step": 8900 |
| }, |
| { |
| "epoch": 30.513698630136986, |
| "grad_norm": 0.07219505310058594, |
| "learning_rate": 6.299418202210214e-05, |
| "loss": 0.0054, |
| "step": 8910 |
| }, |
| { |
| "epoch": 30.54794520547945, |
| "grad_norm": 0.06435932964086533, |
| "learning_rate": 6.291433147091583e-05, |
| "loss": 0.0041, |
| "step": 8920 |
| }, |
| { |
| "epoch": 30.58219178082192, |
| "grad_norm": 0.05930699408054352, |
| "learning_rate": 6.283444561244042e-05, |
| "loss": 0.0042, |
| "step": 8930 |
| }, |
| { |
| "epoch": 30.616438356164384, |
| "grad_norm": 0.07478418946266174, |
| "learning_rate": 6.275452466508077e-05, |
| "loss": 0.005, |
| "step": 8940 |
| }, |
| { |
| "epoch": 30.65068493150685, |
| "grad_norm": 0.06302210688591003, |
| "learning_rate": 6.26745688473377e-05, |
| "loss": 0.0048, |
| "step": 8950 |
| }, |
| { |
| "epoch": 30.684931506849313, |
| "grad_norm": 0.057475507259368896, |
| "learning_rate": 6.259457837780742e-05, |
| "loss": 0.0059, |
| "step": 8960 |
| }, |
| { |
| "epoch": 30.71917808219178, |
| "grad_norm": 0.06078030914068222, |
| "learning_rate": 6.251455347518073e-05, |
| "loss": 0.0046, |
| "step": 8970 |
| }, |
| { |
| "epoch": 30.753424657534246, |
| "grad_norm": 0.059874407947063446, |
| "learning_rate": 6.243449435824276e-05, |
| "loss": 0.005, |
| "step": 8980 |
| }, |
| { |
| "epoch": 30.78767123287671, |
| "grad_norm": 0.06151744723320007, |
| "learning_rate": 6.235440124587198e-05, |
| "loss": 0.0043, |
| "step": 8990 |
| }, |
| { |
| "epoch": 30.82191780821918, |
| "grad_norm": 0.07911258190870285, |
| "learning_rate": 6.227427435703997e-05, |
| "loss": 0.0049, |
| "step": 9000 |
| }, |
| { |
| "epoch": 30.856164383561644, |
| "grad_norm": 0.059228286147117615, |
| "learning_rate": 6.219411391081055e-05, |
| "loss": 0.0042, |
| "step": 9010 |
| }, |
| { |
| "epoch": 30.89041095890411, |
| "grad_norm": 0.05565868318080902, |
| "learning_rate": 6.211392012633932e-05, |
| "loss": 0.0036, |
| "step": 9020 |
| }, |
| { |
| "epoch": 30.924657534246574, |
| "grad_norm": 0.053873926401138306, |
| "learning_rate": 6.203369322287306e-05, |
| "loss": 0.0054, |
| "step": 9030 |
| }, |
| { |
| "epoch": 30.958904109589042, |
| "grad_norm": 0.04394320026040077, |
| "learning_rate": 6.195343341974899e-05, |
| "loss": 0.0048, |
| "step": 9040 |
| }, |
| { |
| "epoch": 30.993150684931507, |
| "grad_norm": 0.04236266762018204, |
| "learning_rate": 6.187314093639444e-05, |
| "loss": 0.0046, |
| "step": 9050 |
| }, |
| { |
| "epoch": 31.027397260273972, |
| "grad_norm": 0.06337432563304901, |
| "learning_rate": 6.179281599232591e-05, |
| "loss": 0.0055, |
| "step": 9060 |
| }, |
| { |
| "epoch": 31.061643835616437, |
| "grad_norm": 0.07716334611177444, |
| "learning_rate": 6.17124588071488e-05, |
| "loss": 0.0042, |
| "step": 9070 |
| }, |
| { |
| "epoch": 31.095890410958905, |
| "grad_norm": 0.06364544481039047, |
| "learning_rate": 6.163206960055651e-05, |
| "loss": 0.0035, |
| "step": 9080 |
| }, |
| { |
| "epoch": 31.13013698630137, |
| "grad_norm": 0.05644696578383446, |
| "learning_rate": 6.155164859233012e-05, |
| "loss": 0.0047, |
| "step": 9090 |
| }, |
| { |
| "epoch": 31.164383561643834, |
| "grad_norm": 0.08092032372951508, |
| "learning_rate": 6.147119600233758e-05, |
| "loss": 0.005, |
| "step": 9100 |
| }, |
| { |
| "epoch": 31.198630136986303, |
| "grad_norm": 0.06652740389108658, |
| "learning_rate": 6.13907120505332e-05, |
| "loss": 0.0043, |
| "step": 9110 |
| }, |
| { |
| "epoch": 31.232876712328768, |
| "grad_norm": 0.07216228544712067, |
| "learning_rate": 6.131019695695702e-05, |
| "loss": 0.0056, |
| "step": 9120 |
| }, |
| { |
| "epoch": 31.267123287671232, |
| "grad_norm": 0.05830219388008118, |
| "learning_rate": 6.122965094173424e-05, |
| "loss": 0.0046, |
| "step": 9130 |
| }, |
| { |
| "epoch": 31.301369863013697, |
| "grad_norm": 0.06598392874002457, |
| "learning_rate": 6.11490742250746e-05, |
| "loss": 0.0051, |
| "step": 9140 |
| }, |
| { |
| "epoch": 31.335616438356166, |
| "grad_norm": 0.05769110471010208, |
| "learning_rate": 6.106846702727172e-05, |
| "loss": 0.0038, |
| "step": 9150 |
| }, |
| { |
| "epoch": 31.36986301369863, |
| "grad_norm": 0.05641612783074379, |
| "learning_rate": 6.0987829568702656e-05, |
| "loss": 0.0057, |
| "step": 9160 |
| }, |
| { |
| "epoch": 31.404109589041095, |
| "grad_norm": 0.06108755245804787, |
| "learning_rate": 6.090716206982714e-05, |
| "loss": 0.0052, |
| "step": 9170 |
| }, |
| { |
| "epoch": 31.438356164383563, |
| "grad_norm": 0.05891823023557663, |
| "learning_rate": 6.0826464751186994e-05, |
| "loss": 0.004, |
| "step": 9180 |
| }, |
| { |
| "epoch": 31.472602739726028, |
| "grad_norm": 0.05389763042330742, |
| "learning_rate": 6.074573783340562e-05, |
| "loss": 0.0038, |
| "step": 9190 |
| }, |
| { |
| "epoch": 31.506849315068493, |
| "grad_norm": 0.04580220952630043, |
| "learning_rate": 6.066498153718735e-05, |
| "loss": 0.0033, |
| "step": 9200 |
| }, |
| { |
| "epoch": 31.541095890410958, |
| "grad_norm": 0.06206485256552696, |
| "learning_rate": 6.0584196083316794e-05, |
| "loss": 0.0038, |
| "step": 9210 |
| }, |
| { |
| "epoch": 31.575342465753426, |
| "grad_norm": 0.05921825021505356, |
| "learning_rate": 6.05033816926583e-05, |
| "loss": 0.0053, |
| "step": 9220 |
| }, |
| { |
| "epoch": 31.60958904109589, |
| "grad_norm": 0.08074521273374557, |
| "learning_rate": 6.042253858615532e-05, |
| "loss": 0.004, |
| "step": 9230 |
| }, |
| { |
| "epoch": 31.643835616438356, |
| "grad_norm": 0.06720732897520065, |
| "learning_rate": 6.034166698482984e-05, |
| "loss": 0.0036, |
| "step": 9240 |
| }, |
| { |
| "epoch": 31.67808219178082, |
| "grad_norm": 0.04913540184497833, |
| "learning_rate": 6.026076710978171e-05, |
| "loss": 0.0056, |
| "step": 9250 |
| }, |
| { |
| "epoch": 31.71232876712329, |
| "grad_norm": 0.03931824490427971, |
| "learning_rate": 6.017983918218812e-05, |
| "loss": 0.0041, |
| "step": 9260 |
| }, |
| { |
| "epoch": 31.746575342465754, |
| "grad_norm": 0.04003632441163063, |
| "learning_rate": 6.009888342330292e-05, |
| "loss": 0.0041, |
| "step": 9270 |
| }, |
| { |
| "epoch": 31.78082191780822, |
| "grad_norm": 0.05170729383826256, |
| "learning_rate": 6.001790005445607e-05, |
| "loss": 0.0043, |
| "step": 9280 |
| }, |
| { |
| "epoch": 31.815068493150687, |
| "grad_norm": 0.05929422751069069, |
| "learning_rate": 5.9936889297052986e-05, |
| "loss": 0.0035, |
| "step": 9290 |
| }, |
| { |
| "epoch": 31.84931506849315, |
| "grad_norm": 0.05444969981908798, |
| "learning_rate": 5.985585137257401e-05, |
| "loss": 0.004, |
| "step": 9300 |
| }, |
| { |
| "epoch": 31.883561643835616, |
| "grad_norm": 0.05639302730560303, |
| "learning_rate": 5.977478650257374e-05, |
| "loss": 0.0057, |
| "step": 9310 |
| }, |
| { |
| "epoch": 31.91780821917808, |
| "grad_norm": 0.06555838137865067, |
| "learning_rate": 5.969369490868042e-05, |
| "loss": 0.0046, |
| "step": 9320 |
| }, |
| { |
| "epoch": 31.95205479452055, |
| "grad_norm": 0.04445594176650047, |
| "learning_rate": 5.961257681259535e-05, |
| "loss": 0.0038, |
| "step": 9330 |
| }, |
| { |
| "epoch": 31.986301369863014, |
| "grad_norm": 0.06617361307144165, |
| "learning_rate": 5.953143243609235e-05, |
| "loss": 0.0047, |
| "step": 9340 |
| }, |
| { |
| "epoch": 32.02054794520548, |
| "grad_norm": 0.04996689781546593, |
| "learning_rate": 5.945026200101702e-05, |
| "loss": 0.0043, |
| "step": 9350 |
| }, |
| { |
| "epoch": 32.054794520547944, |
| "grad_norm": 0.0414830707013607, |
| "learning_rate": 5.9369065729286245e-05, |
| "loss": 0.0034, |
| "step": 9360 |
| }, |
| { |
| "epoch": 32.08904109589041, |
| "grad_norm": 0.04811393842101097, |
| "learning_rate": 5.92878438428875e-05, |
| "loss": 0.0044, |
| "step": 9370 |
| }, |
| { |
| "epoch": 32.12328767123287, |
| "grad_norm": 0.04682043194770813, |
| "learning_rate": 5.9206596563878357e-05, |
| "loss": 0.0029, |
| "step": 9380 |
| }, |
| { |
| "epoch": 32.157534246575345, |
| "grad_norm": 0.056545063853263855, |
| "learning_rate": 5.912532411438576e-05, |
| "loss": 0.0035, |
| "step": 9390 |
| }, |
| { |
| "epoch": 32.19178082191781, |
| "grad_norm": 0.047463856637477875, |
| "learning_rate": 5.90440267166055e-05, |
| "loss": 0.0038, |
| "step": 9400 |
| }, |
| { |
| "epoch": 32.226027397260275, |
| "grad_norm": 0.05764775723218918, |
| "learning_rate": 5.896270459280153e-05, |
| "loss": 0.0046, |
| "step": 9410 |
| }, |
| { |
| "epoch": 32.26027397260274, |
| "grad_norm": 0.057075630873441696, |
| "learning_rate": 5.888135796530544e-05, |
| "loss": 0.0039, |
| "step": 9420 |
| }, |
| { |
| "epoch": 32.294520547945204, |
| "grad_norm": 0.05559380352497101, |
| "learning_rate": 5.8799987056515804e-05, |
| "loss": 0.0035, |
| "step": 9430 |
| }, |
| { |
| "epoch": 32.32876712328767, |
| "grad_norm": 0.05065063014626503, |
| "learning_rate": 5.871859208889759e-05, |
| "loss": 0.0038, |
| "step": 9440 |
| }, |
| { |
| "epoch": 32.363013698630134, |
| "grad_norm": 0.051984649151563644, |
| "learning_rate": 5.8637173284981526e-05, |
| "loss": 0.0048, |
| "step": 9450 |
| }, |
| { |
| "epoch": 32.397260273972606, |
| "grad_norm": 0.055756259709596634, |
| "learning_rate": 5.85557308673635e-05, |
| "loss": 0.0043, |
| "step": 9460 |
| }, |
| { |
| "epoch": 32.43150684931507, |
| "grad_norm": 0.05831045284867287, |
| "learning_rate": 5.847426505870399e-05, |
| "loss": 0.0045, |
| "step": 9470 |
| }, |
| { |
| "epoch": 32.465753424657535, |
| "grad_norm": 0.05842358618974686, |
| "learning_rate": 5.8392776081727385e-05, |
| "loss": 0.0047, |
| "step": 9480 |
| }, |
| { |
| "epoch": 32.5, |
| "grad_norm": 0.057939786463975906, |
| "learning_rate": 5.831126415922148e-05, |
| "loss": 0.0047, |
| "step": 9490 |
| }, |
| { |
| "epoch": 32.534246575342465, |
| "grad_norm": 0.05761862173676491, |
| "learning_rate": 5.8229729514036705e-05, |
| "loss": 0.0044, |
| "step": 9500 |
| }, |
| { |
| "epoch": 32.56849315068493, |
| "grad_norm": 0.03868666663765907, |
| "learning_rate": 5.8148172369085686e-05, |
| "loss": 0.0037, |
| "step": 9510 |
| }, |
| { |
| "epoch": 32.602739726027394, |
| "grad_norm": 0.04971576854586601, |
| "learning_rate": 5.8066592947342555e-05, |
| "loss": 0.0039, |
| "step": 9520 |
| }, |
| { |
| "epoch": 32.636986301369866, |
| "grad_norm": 0.039059728384017944, |
| "learning_rate": 5.798499147184233e-05, |
| "loss": 0.0034, |
| "step": 9530 |
| }, |
| { |
| "epoch": 32.67123287671233, |
| "grad_norm": 0.04403064772486687, |
| "learning_rate": 5.7903368165680327e-05, |
| "loss": 0.0057, |
| "step": 9540 |
| }, |
| { |
| "epoch": 32.705479452054796, |
| "grad_norm": 0.05422419682145119, |
| "learning_rate": 5.782172325201155e-05, |
| "loss": 0.0041, |
| "step": 9550 |
| }, |
| { |
| "epoch": 32.73972602739726, |
| "grad_norm": 0.04113056883215904, |
| "learning_rate": 5.7740056954050084e-05, |
| "loss": 0.0041, |
| "step": 9560 |
| }, |
| { |
| "epoch": 32.773972602739725, |
| "grad_norm": 0.061150237917900085, |
| "learning_rate": 5.765836949506843e-05, |
| "loss": 0.0033, |
| "step": 9570 |
| }, |
| { |
| "epoch": 32.80821917808219, |
| "grad_norm": 0.04960055649280548, |
| "learning_rate": 5.757666109839702e-05, |
| "loss": 0.0034, |
| "step": 9580 |
| }, |
| { |
| "epoch": 32.842465753424655, |
| "grad_norm": 0.07494784146547318, |
| "learning_rate": 5.74949319874235e-05, |
| "loss": 0.0043, |
| "step": 9590 |
| }, |
| { |
| "epoch": 32.87671232876713, |
| "grad_norm": 0.054274559020996094, |
| "learning_rate": 5.74131823855921e-05, |
| "loss": 0.0039, |
| "step": 9600 |
| }, |
| { |
| "epoch": 32.91095890410959, |
| "grad_norm": 0.05537914112210274, |
| "learning_rate": 5.733141251640315e-05, |
| "loss": 0.0028, |
| "step": 9610 |
| }, |
| { |
| "epoch": 32.945205479452056, |
| "grad_norm": 0.06061186641454697, |
| "learning_rate": 5.72496226034123e-05, |
| "loss": 0.004, |
| "step": 9620 |
| }, |
| { |
| "epoch": 32.97945205479452, |
| "grad_norm": 0.07560919225215912, |
| "learning_rate": 5.7167812870230094e-05, |
| "loss": 0.0043, |
| "step": 9630 |
| }, |
| { |
| "epoch": 33.013698630136986, |
| "grad_norm": 0.0694555938243866, |
| "learning_rate": 5.7085983540521216e-05, |
| "loss": 0.0044, |
| "step": 9640 |
| }, |
| { |
| "epoch": 33.04794520547945, |
| "grad_norm": 0.05044952780008316, |
| "learning_rate": 5.70041348380039e-05, |
| "loss": 0.0037, |
| "step": 9650 |
| }, |
| { |
| "epoch": 33.082191780821915, |
| "grad_norm": 0.043574009090662, |
| "learning_rate": 5.692226698644938e-05, |
| "loss": 0.0036, |
| "step": 9660 |
| }, |
| { |
| "epoch": 33.11643835616438, |
| "grad_norm": 0.055326227098703384, |
| "learning_rate": 5.6840380209681255e-05, |
| "loss": 0.0051, |
| "step": 9670 |
| }, |
| { |
| "epoch": 33.15068493150685, |
| "grad_norm": 0.05255505442619324, |
| "learning_rate": 5.675847473157485e-05, |
| "loss": 0.0034, |
| "step": 9680 |
| }, |
| { |
| "epoch": 33.18493150684932, |
| "grad_norm": 0.04825739935040474, |
| "learning_rate": 5.667655077605659e-05, |
| "loss": 0.0037, |
| "step": 9690 |
| }, |
| { |
| "epoch": 33.21917808219178, |
| "grad_norm": 0.054841298609972, |
| "learning_rate": 5.6594608567103456e-05, |
| "loss": 0.0045, |
| "step": 9700 |
| }, |
| { |
| "epoch": 33.25342465753425, |
| "grad_norm": 0.04731395095586777, |
| "learning_rate": 5.65126483287423e-05, |
| "loss": 0.0043, |
| "step": 9710 |
| }, |
| { |
| "epoch": 33.28767123287671, |
| "grad_norm": 0.05347118899226189, |
| "learning_rate": 5.6430670285049314e-05, |
| "loss": 0.0037, |
| "step": 9720 |
| }, |
| { |
| "epoch": 33.321917808219176, |
| "grad_norm": 0.053511351346969604, |
| "learning_rate": 5.634867466014932e-05, |
| "loss": 0.0037, |
| "step": 9730 |
| }, |
| { |
| "epoch": 33.35616438356164, |
| "grad_norm": 0.056357644498348236, |
| "learning_rate": 5.6266661678215216e-05, |
| "loss": 0.005, |
| "step": 9740 |
| }, |
| { |
| "epoch": 33.39041095890411, |
| "grad_norm": 0.04963238537311554, |
| "learning_rate": 5.618463156346739e-05, |
| "loss": 0.0032, |
| "step": 9750 |
| }, |
| { |
| "epoch": 33.42465753424658, |
| "grad_norm": 0.0459924079477787, |
| "learning_rate": 5.6102584540173006e-05, |
| "loss": 0.0043, |
| "step": 9760 |
| }, |
| { |
| "epoch": 33.45890410958904, |
| "grad_norm": 0.04573008045554161, |
| "learning_rate": 5.602052083264555e-05, |
| "loss": 0.0039, |
| "step": 9770 |
| }, |
| { |
| "epoch": 33.49315068493151, |
| "grad_norm": 0.056432124227285385, |
| "learning_rate": 5.5938440665244006e-05, |
| "loss": 0.004, |
| "step": 9780 |
| }, |
| { |
| "epoch": 33.52739726027397, |
| "grad_norm": 0.03997287154197693, |
| "learning_rate": 5.585634426237246e-05, |
| "loss": 0.0035, |
| "step": 9790 |
| }, |
| { |
| "epoch": 33.56164383561644, |
| "grad_norm": 0.036699339747428894, |
| "learning_rate": 5.577423184847932e-05, |
| "loss": 0.0048, |
| "step": 9800 |
| }, |
| { |
| "epoch": 33.5958904109589, |
| "grad_norm": 0.07555674761533737, |
| "learning_rate": 5.569210364805677e-05, |
| "loss": 0.0037, |
| "step": 9810 |
| }, |
| { |
| "epoch": 33.63013698630137, |
| "grad_norm": 0.07923568040132523, |
| "learning_rate": 5.560995988564023e-05, |
| "loss": 0.0041, |
| "step": 9820 |
| }, |
| { |
| "epoch": 33.66438356164384, |
| "grad_norm": 0.05765343829989433, |
| "learning_rate": 5.552780078580756e-05, |
| "loss": 0.0038, |
| "step": 9830 |
| }, |
| { |
| "epoch": 33.6986301369863, |
| "grad_norm": 0.04562293365597725, |
| "learning_rate": 5.544562657317863e-05, |
| "loss": 0.0048, |
| "step": 9840 |
| }, |
| { |
| "epoch": 33.73287671232877, |
| "grad_norm": 0.040983159095048904, |
| "learning_rate": 5.5363437472414595e-05, |
| "loss": 0.0031, |
| "step": 9850 |
| }, |
| { |
| "epoch": 33.76712328767123, |
| "grad_norm": 0.04989850893616676, |
| "learning_rate": 5.52812337082173e-05, |
| "loss": 0.004, |
| "step": 9860 |
| }, |
| { |
| "epoch": 33.8013698630137, |
| "grad_norm": 0.05472975969314575, |
| "learning_rate": 5.519901550532871e-05, |
| "loss": 0.005, |
| "step": 9870 |
| }, |
| { |
| "epoch": 33.83561643835616, |
| "grad_norm": 0.04672018066048622, |
| "learning_rate": 5.511678308853026e-05, |
| "loss": 0.0032, |
| "step": 9880 |
| }, |
| { |
| "epoch": 33.86986301369863, |
| "grad_norm": 0.055184490978717804, |
| "learning_rate": 5.5034536682642224e-05, |
| "loss": 0.004, |
| "step": 9890 |
| }, |
| { |
| "epoch": 33.9041095890411, |
| "grad_norm": 0.04532390460371971, |
| "learning_rate": 5.495227651252315e-05, |
| "loss": 0.0034, |
| "step": 9900 |
| }, |
| { |
| "epoch": 33.93835616438356, |
| "grad_norm": 0.04610401391983032, |
| "learning_rate": 5.487000280306917e-05, |
| "loss": 0.0038, |
| "step": 9910 |
| }, |
| { |
| "epoch": 33.97260273972603, |
| "grad_norm": 0.06480266153812408, |
| "learning_rate": 5.478771577921351e-05, |
| "loss": 0.0043, |
| "step": 9920 |
| }, |
| { |
| "epoch": 34.00684931506849, |
| "grad_norm": 0.055622607469558716, |
| "learning_rate": 5.470541566592573e-05, |
| "loss": 0.0042, |
| "step": 9930 |
| }, |
| { |
| "epoch": 34.04109589041096, |
| "grad_norm": 0.05858009308576584, |
| "learning_rate": 5.462310268821118e-05, |
| "loss": 0.0047, |
| "step": 9940 |
| }, |
| { |
| "epoch": 34.07534246575342, |
| "grad_norm": 0.05842429772019386, |
| "learning_rate": 5.454077707111042e-05, |
| "loss": 0.0047, |
| "step": 9950 |
| }, |
| { |
| "epoch": 34.10958904109589, |
| "grad_norm": 0.055152349174022675, |
| "learning_rate": 5.445843903969854e-05, |
| "loss": 0.005, |
| "step": 9960 |
| }, |
| { |
| "epoch": 34.14383561643836, |
| "grad_norm": 0.05233810469508171, |
| "learning_rate": 5.4376088819084556e-05, |
| "loss": 0.004, |
| "step": 9970 |
| }, |
| { |
| "epoch": 34.178082191780824, |
| "grad_norm": 0.04537783935666084, |
| "learning_rate": 5.4293726634410855e-05, |
| "loss": 0.0037, |
| "step": 9980 |
| }, |
| { |
| "epoch": 34.21232876712329, |
| "grad_norm": 0.0472206212580204, |
| "learning_rate": 5.4211352710852495e-05, |
| "loss": 0.004, |
| "step": 9990 |
| }, |
| { |
| "epoch": 34.24657534246575, |
| "grad_norm": 0.04411351680755615, |
| "learning_rate": 5.4128967273616625e-05, |
| "loss": 0.0037, |
| "step": 10000 |
| }, |
| { |
| "epoch": 34.28082191780822, |
| "grad_norm": 0.05850313976407051, |
| "learning_rate": 5.404657054794189e-05, |
| "loss": 0.0054, |
| "step": 10010 |
| }, |
| { |
| "epoch": 34.31506849315068, |
| "grad_norm": 0.05117764323949814, |
| "learning_rate": 5.396416275909779e-05, |
| "loss": 0.0038, |
| "step": 10020 |
| }, |
| { |
| "epoch": 34.34931506849315, |
| "grad_norm": 0.05348801612854004, |
| "learning_rate": 5.3881744132384104e-05, |
| "loss": 0.0038, |
| "step": 10030 |
| }, |
| { |
| "epoch": 34.38356164383562, |
| "grad_norm": 0.05136050656437874, |
| "learning_rate": 5.379931489313016e-05, |
| "loss": 0.0043, |
| "step": 10040 |
| }, |
| { |
| "epoch": 34.417808219178085, |
| "grad_norm": 0.06988473981618881, |
| "learning_rate": 5.371687526669439e-05, |
| "loss": 0.0063, |
| "step": 10050 |
| }, |
| { |
| "epoch": 34.45205479452055, |
| "grad_norm": 0.06275316327810287, |
| "learning_rate": 5.363442547846356e-05, |
| "loss": 0.0059, |
| "step": 10060 |
| }, |
| { |
| "epoch": 34.486301369863014, |
| "grad_norm": 0.06286466866731644, |
| "learning_rate": 5.355196575385225e-05, |
| "loss": 0.0047, |
| "step": 10070 |
| }, |
| { |
| "epoch": 34.52054794520548, |
| "grad_norm": 0.06469148397445679, |
| "learning_rate": 5.3469496318302204e-05, |
| "loss": 0.0048, |
| "step": 10080 |
| }, |
| { |
| "epoch": 34.554794520547944, |
| "grad_norm": 0.04653120040893555, |
| "learning_rate": 5.3387017397281704e-05, |
| "loss": 0.0035, |
| "step": 10090 |
| }, |
| { |
| "epoch": 34.58904109589041, |
| "grad_norm": 0.04517116770148277, |
| "learning_rate": 5.330452921628497e-05, |
| "loss": 0.0038, |
| "step": 10100 |
| }, |
| { |
| "epoch": 34.62328767123287, |
| "grad_norm": 0.05882929638028145, |
| "learning_rate": 5.322203200083154e-05, |
| "loss": 0.0046, |
| "step": 10110 |
| }, |
| { |
| "epoch": 34.657534246575345, |
| "grad_norm": 0.06491271406412125, |
| "learning_rate": 5.313952597646568e-05, |
| "loss": 0.003, |
| "step": 10120 |
| }, |
| { |
| "epoch": 34.69178082191781, |
| "grad_norm": 0.05783591791987419, |
| "learning_rate": 5.305701136875566e-05, |
| "loss": 0.0051, |
| "step": 10130 |
| }, |
| { |
| "epoch": 34.726027397260275, |
| "grad_norm": 0.06943771988153458, |
| "learning_rate": 5.297448840329329e-05, |
| "loss": 0.0035, |
| "step": 10140 |
| }, |
| { |
| "epoch": 34.76027397260274, |
| "grad_norm": 0.07877009361982346, |
| "learning_rate": 5.2891957305693205e-05, |
| "loss": 0.0034, |
| "step": 10150 |
| }, |
| { |
| "epoch": 34.794520547945204, |
| "grad_norm": 0.06451980769634247, |
| "learning_rate": 5.280941830159227e-05, |
| "loss": 0.0054, |
| "step": 10160 |
| }, |
| { |
| "epoch": 34.82876712328767, |
| "grad_norm": 0.051155924797058105, |
| "learning_rate": 5.2726871616649e-05, |
| "loss": 0.0042, |
| "step": 10170 |
| }, |
| { |
| "epoch": 34.863013698630134, |
| "grad_norm": 0.08243589103221893, |
| "learning_rate": 5.264431747654284e-05, |
| "loss": 0.0049, |
| "step": 10180 |
| }, |
| { |
| "epoch": 34.897260273972606, |
| "grad_norm": 0.0775737464427948, |
| "learning_rate": 5.2561756106973656e-05, |
| "loss": 0.0049, |
| "step": 10190 |
| }, |
| { |
| "epoch": 34.93150684931507, |
| "grad_norm": 0.06622209399938583, |
| "learning_rate": 5.247918773366112e-05, |
| "loss": 0.0057, |
| "step": 10200 |
| }, |
| { |
| "epoch": 34.965753424657535, |
| "grad_norm": 0.0489589087665081, |
| "learning_rate": 5.2396612582343986e-05, |
| "loss": 0.0034, |
| "step": 10210 |
| }, |
| { |
| "epoch": 35.0, |
| "grad_norm": 0.03715719282627106, |
| "learning_rate": 5.231403087877955e-05, |
| "loss": 0.0037, |
| "step": 10220 |
| }, |
| { |
| "epoch": 35.034246575342465, |
| "grad_norm": 0.06789140403270721, |
| "learning_rate": 5.2231442848743064e-05, |
| "loss": 0.0037, |
| "step": 10230 |
| }, |
| { |
| "epoch": 35.06849315068493, |
| "grad_norm": 0.054343558847904205, |
| "learning_rate": 5.214884871802703e-05, |
| "loss": 0.0045, |
| "step": 10240 |
| }, |
| { |
| "epoch": 35.102739726027394, |
| "grad_norm": 0.051918212324380875, |
| "learning_rate": 5.2066248712440656e-05, |
| "loss": 0.005, |
| "step": 10250 |
| }, |
| { |
| "epoch": 35.136986301369866, |
| "grad_norm": 0.06282070279121399, |
| "learning_rate": 5.198364305780922e-05, |
| "loss": 0.0046, |
| "step": 10260 |
| }, |
| { |
| "epoch": 35.17123287671233, |
| "grad_norm": 0.048972342163324356, |
| "learning_rate": 5.1901031979973394e-05, |
| "loss": 0.0042, |
| "step": 10270 |
| }, |
| { |
| "epoch": 35.205479452054796, |
| "grad_norm": 0.04695377126336098, |
| "learning_rate": 5.1818415704788725e-05, |
| "loss": 0.0052, |
| "step": 10280 |
| }, |
| { |
| "epoch": 35.23972602739726, |
| "grad_norm": 0.059636637568473816, |
| "learning_rate": 5.1735794458124956e-05, |
| "loss": 0.0045, |
| "step": 10290 |
| }, |
| { |
| "epoch": 35.273972602739725, |
| "grad_norm": 0.049566976726055145, |
| "learning_rate": 5.165316846586541e-05, |
| "loss": 0.0034, |
| "step": 10300 |
| }, |
| { |
| "epoch": 35.30821917808219, |
| "grad_norm": 0.0541643388569355, |
| "learning_rate": 5.157053795390642e-05, |
| "loss": 0.0031, |
| "step": 10310 |
| }, |
| { |
| "epoch": 35.342465753424655, |
| "grad_norm": 0.06319770216941833, |
| "learning_rate": 5.148790314815663e-05, |
| "loss": 0.0048, |
| "step": 10320 |
| }, |
| { |
| "epoch": 35.37671232876713, |
| "grad_norm": 0.0485026016831398, |
| "learning_rate": 5.1405264274536445e-05, |
| "loss": 0.0053, |
| "step": 10330 |
| }, |
| { |
| "epoch": 35.41095890410959, |
| "grad_norm": 0.04801278933882713, |
| "learning_rate": 5.132262155897739e-05, |
| "loss": 0.0042, |
| "step": 10340 |
| }, |
| { |
| "epoch": 35.445205479452056, |
| "grad_norm": 0.05489344894886017, |
| "learning_rate": 5.123997522742151e-05, |
| "loss": 0.0044, |
| "step": 10350 |
| }, |
| { |
| "epoch": 35.47945205479452, |
| "grad_norm": 0.06271504610776901, |
| "learning_rate": 5.1157325505820694e-05, |
| "loss": 0.0044, |
| "step": 10360 |
| }, |
| { |
| "epoch": 35.513698630136986, |
| "grad_norm": 0.0559224933385849, |
| "learning_rate": 5.107467262013614e-05, |
| "loss": 0.0059, |
| "step": 10370 |
| }, |
| { |
| "epoch": 35.54794520547945, |
| "grad_norm": 0.04511038959026337, |
| "learning_rate": 5.0992016796337686e-05, |
| "loss": 0.004, |
| "step": 10380 |
| }, |
| { |
| "epoch": 35.582191780821915, |
| "grad_norm": 0.04959840700030327, |
| "learning_rate": 5.0909358260403186e-05, |
| "loss": 0.0049, |
| "step": 10390 |
| }, |
| { |
| "epoch": 35.61643835616438, |
| "grad_norm": 0.04910969361662865, |
| "learning_rate": 5.0826697238317935e-05, |
| "loss": 0.0036, |
| "step": 10400 |
| }, |
| { |
| "epoch": 35.65068493150685, |
| "grad_norm": 0.04376056417822838, |
| "learning_rate": 5.074403395607399e-05, |
| "loss": 0.0032, |
| "step": 10410 |
| }, |
| { |
| "epoch": 35.68493150684932, |
| "grad_norm": 0.03833574429154396, |
| "learning_rate": 5.066136863966963e-05, |
| "loss": 0.0038, |
| "step": 10420 |
| }, |
| { |
| "epoch": 35.71917808219178, |
| "grad_norm": 0.05588557571172714, |
| "learning_rate": 5.057870151510864e-05, |
| "loss": 0.0038, |
| "step": 10430 |
| }, |
| { |
| "epoch": 35.75342465753425, |
| "grad_norm": 0.05649462342262268, |
| "learning_rate": 5.0496032808399815e-05, |
| "loss": 0.0037, |
| "step": 10440 |
| }, |
| { |
| "epoch": 35.78767123287671, |
| "grad_norm": 0.05454027280211449, |
| "learning_rate": 5.041336274555625e-05, |
| "loss": 0.0029, |
| "step": 10450 |
| }, |
| { |
| "epoch": 35.821917808219176, |
| "grad_norm": 0.05826808884739876, |
| "learning_rate": 5.033069155259471e-05, |
| "loss": 0.004, |
| "step": 10460 |
| }, |
| { |
| "epoch": 35.85616438356164, |
| "grad_norm": 0.05111277848482132, |
| "learning_rate": 5.02480194555351e-05, |
| "loss": 0.0045, |
| "step": 10470 |
| }, |
| { |
| "epoch": 35.89041095890411, |
| "grad_norm": 0.045382965356111526, |
| "learning_rate": 5.016534668039976e-05, |
| "loss": 0.0035, |
| "step": 10480 |
| }, |
| { |
| "epoch": 35.92465753424658, |
| "grad_norm": 0.04012225195765495, |
| "learning_rate": 5.0082673453212914e-05, |
| "loss": 0.0049, |
| "step": 10490 |
| }, |
| { |
| "epoch": 35.95890410958904, |
| "grad_norm": 0.05032500624656677, |
| "learning_rate": 5e-05, |
| "loss": 0.0045, |
| "step": 10500 |
| }, |
| { |
| "epoch": 35.99315068493151, |
| "grad_norm": 0.054377779364585876, |
| "learning_rate": 4.991732654678709e-05, |
| "loss": 0.0046, |
| "step": 10510 |
| }, |
| { |
| "epoch": 36.02739726027397, |
| "grad_norm": 0.07059525698423386, |
| "learning_rate": 4.9834653319600246e-05, |
| "loss": 0.0037, |
| "step": 10520 |
| }, |
| { |
| "epoch": 36.06164383561644, |
| "grad_norm": 0.07613108307123184, |
| "learning_rate": 4.975198054446492e-05, |
| "loss": 0.0043, |
| "step": 10530 |
| }, |
| { |
| "epoch": 36.0958904109589, |
| "grad_norm": 0.050086334347724915, |
| "learning_rate": 4.96693084474053e-05, |
| "loss": 0.0049, |
| "step": 10540 |
| }, |
| { |
| "epoch": 36.13013698630137, |
| "grad_norm": 0.054679181426763535, |
| "learning_rate": 4.9586637254443756e-05, |
| "loss": 0.0041, |
| "step": 10550 |
| }, |
| { |
| "epoch": 36.16438356164384, |
| "grad_norm": 0.04521835222840309, |
| "learning_rate": 4.950396719160018e-05, |
| "loss": 0.0042, |
| "step": 10560 |
| }, |
| { |
| "epoch": 36.1986301369863, |
| "grad_norm": 0.054949574172496796, |
| "learning_rate": 4.942129848489137e-05, |
| "loss": 0.0046, |
| "step": 10570 |
| }, |
| { |
| "epoch": 36.23287671232877, |
| "grad_norm": 0.05560026690363884, |
| "learning_rate": 4.93386313603304e-05, |
| "loss": 0.0041, |
| "step": 10580 |
| }, |
| { |
| "epoch": 36.26712328767123, |
| "grad_norm": 0.04978262633085251, |
| "learning_rate": 4.925596604392603e-05, |
| "loss": 0.0043, |
| "step": 10590 |
| }, |
| { |
| "epoch": 36.3013698630137, |
| "grad_norm": 0.05357494577765465, |
| "learning_rate": 4.917330276168208e-05, |
| "loss": 0.005, |
| "step": 10600 |
| }, |
| { |
| "epoch": 36.33561643835616, |
| "grad_norm": 0.06006361544132233, |
| "learning_rate": 4.909064173959681e-05, |
| "loss": 0.0034, |
| "step": 10610 |
| }, |
| { |
| "epoch": 36.36986301369863, |
| "grad_norm": 0.06323786079883575, |
| "learning_rate": 4.9007983203662326e-05, |
| "loss": 0.0043, |
| "step": 10620 |
| }, |
| { |
| "epoch": 36.4041095890411, |
| "grad_norm": 0.054876018315553665, |
| "learning_rate": 4.892532737986387e-05, |
| "loss": 0.0033, |
| "step": 10630 |
| }, |
| { |
| "epoch": 36.43835616438356, |
| "grad_norm": 0.05244192108511925, |
| "learning_rate": 4.884267449417931e-05, |
| "loss": 0.004, |
| "step": 10640 |
| }, |
| { |
| "epoch": 36.47260273972603, |
| "grad_norm": 0.03981318697333336, |
| "learning_rate": 4.87600247725785e-05, |
| "loss": 0.0038, |
| "step": 10650 |
| }, |
| { |
| "epoch": 36.50684931506849, |
| "grad_norm": 0.07409898191690445, |
| "learning_rate": 4.867737844102261e-05, |
| "loss": 0.005, |
| "step": 10660 |
| }, |
| { |
| "epoch": 36.54109589041096, |
| "grad_norm": 0.052267491817474365, |
| "learning_rate": 4.8594735725463567e-05, |
| "loss": 0.0037, |
| "step": 10670 |
| }, |
| { |
| "epoch": 36.57534246575342, |
| "grad_norm": 0.06407027691602707, |
| "learning_rate": 4.851209685184338e-05, |
| "loss": 0.004, |
| "step": 10680 |
| }, |
| { |
| "epoch": 36.60958904109589, |
| "grad_norm": 0.07134547829627991, |
| "learning_rate": 4.8429462046093585e-05, |
| "loss": 0.0033, |
| "step": 10690 |
| }, |
| { |
| "epoch": 36.64383561643836, |
| "grad_norm": 0.07874727249145508, |
| "learning_rate": 4.834683153413459e-05, |
| "loss": 0.0041, |
| "step": 10700 |
| }, |
| { |
| "epoch": 36.678082191780824, |
| "grad_norm": 0.05885026976466179, |
| "learning_rate": 4.826420554187506e-05, |
| "loss": 0.0042, |
| "step": 10710 |
| }, |
| { |
| "epoch": 36.71232876712329, |
| "grad_norm": 0.04963897913694382, |
| "learning_rate": 4.818158429521129e-05, |
| "loss": 0.0042, |
| "step": 10720 |
| }, |
| { |
| "epoch": 36.74657534246575, |
| "grad_norm": 0.062294941395521164, |
| "learning_rate": 4.809896802002662e-05, |
| "loss": 0.0048, |
| "step": 10730 |
| }, |
| { |
| "epoch": 36.78082191780822, |
| "grad_norm": 0.04608921334147453, |
| "learning_rate": 4.801635694219079e-05, |
| "loss": 0.0032, |
| "step": 10740 |
| }, |
| { |
| "epoch": 36.81506849315068, |
| "grad_norm": 0.04494161158800125, |
| "learning_rate": 4.7933751287559335e-05, |
| "loss": 0.0046, |
| "step": 10750 |
| }, |
| { |
| "epoch": 36.84931506849315, |
| "grad_norm": 0.0627615675330162, |
| "learning_rate": 4.785115128197298e-05, |
| "loss": 0.0035, |
| "step": 10760 |
| }, |
| { |
| "epoch": 36.88356164383562, |
| "grad_norm": 0.0486239530146122, |
| "learning_rate": 4.776855715125694e-05, |
| "loss": 0.0043, |
| "step": 10770 |
| }, |
| { |
| "epoch": 36.917808219178085, |
| "grad_norm": 0.04762973263859749, |
| "learning_rate": 4.7685969121220456e-05, |
| "loss": 0.0034, |
| "step": 10780 |
| }, |
| { |
| "epoch": 36.95205479452055, |
| "grad_norm": 0.04642920196056366, |
| "learning_rate": 4.7603387417656026e-05, |
| "loss": 0.0041, |
| "step": 10790 |
| }, |
| { |
| "epoch": 36.986301369863014, |
| "grad_norm": 0.04814445227384567, |
| "learning_rate": 4.7520812266338885e-05, |
| "loss": 0.0049, |
| "step": 10800 |
| }, |
| { |
| "epoch": 37.02054794520548, |
| "grad_norm": 0.04671894758939743, |
| "learning_rate": 4.743824389302635e-05, |
| "loss": 0.0039, |
| "step": 10810 |
| }, |
| { |
| "epoch": 37.054794520547944, |
| "grad_norm": 0.03668942302465439, |
| "learning_rate": 4.735568252345718e-05, |
| "loss": 0.0042, |
| "step": 10820 |
| }, |
| { |
| "epoch": 37.08904109589041, |
| "grad_norm": 0.07752338796854019, |
| "learning_rate": 4.7273128383351015e-05, |
| "loss": 0.0037, |
| "step": 10830 |
| }, |
| { |
| "epoch": 37.12328767123287, |
| "grad_norm": 0.05838659033179283, |
| "learning_rate": 4.7190581698407725e-05, |
| "loss": 0.0042, |
| "step": 10840 |
| }, |
| { |
| "epoch": 37.157534246575345, |
| "grad_norm": 0.054939839988946915, |
| "learning_rate": 4.710804269430681e-05, |
| "loss": 0.0046, |
| "step": 10850 |
| }, |
| { |
| "epoch": 37.19178082191781, |
| "grad_norm": 0.06352076679468155, |
| "learning_rate": 4.702551159670672e-05, |
| "loss": 0.0059, |
| "step": 10860 |
| }, |
| { |
| "epoch": 37.226027397260275, |
| "grad_norm": 0.060442935675382614, |
| "learning_rate": 4.694298863124435e-05, |
| "loss": 0.0049, |
| "step": 10870 |
| }, |
| { |
| "epoch": 37.26027397260274, |
| "grad_norm": 0.0606268011033535, |
| "learning_rate": 4.6860474023534335e-05, |
| "loss": 0.0043, |
| "step": 10880 |
| }, |
| { |
| "epoch": 37.294520547945204, |
| "grad_norm": 0.10119619220495224, |
| "learning_rate": 4.677796799916845e-05, |
| "loss": 0.0051, |
| "step": 10890 |
| }, |
| { |
| "epoch": 37.32876712328767, |
| "grad_norm": 0.04673849418759346, |
| "learning_rate": 4.669547078371504e-05, |
| "loss": 0.0033, |
| "step": 10900 |
| }, |
| { |
| "epoch": 37.363013698630134, |
| "grad_norm": 0.04612864926457405, |
| "learning_rate": 4.66129826027183e-05, |
| "loss": 0.0034, |
| "step": 10910 |
| }, |
| { |
| "epoch": 37.397260273972606, |
| "grad_norm": 0.05315816029906273, |
| "learning_rate": 4.65305036816978e-05, |
| "loss": 0.0038, |
| "step": 10920 |
| }, |
| { |
| "epoch": 37.43150684931507, |
| "grad_norm": 0.04591621458530426, |
| "learning_rate": 4.6448034246147754e-05, |
| "loss": 0.005, |
| "step": 10930 |
| }, |
| { |
| "epoch": 37.465753424657535, |
| "grad_norm": 0.05279077589511871, |
| "learning_rate": 4.6365574521536445e-05, |
| "loss": 0.0038, |
| "step": 10940 |
| }, |
| { |
| "epoch": 37.5, |
| "grad_norm": 0.05146599933505058, |
| "learning_rate": 4.6283124733305624e-05, |
| "loss": 0.0043, |
| "step": 10950 |
| }, |
| { |
| "epoch": 37.534246575342465, |
| "grad_norm": 0.05392171069979668, |
| "learning_rate": 4.620068510686985e-05, |
| "loss": 0.0034, |
| "step": 10960 |
| }, |
| { |
| "epoch": 37.56849315068493, |
| "grad_norm": 0.05198705196380615, |
| "learning_rate": 4.611825586761591e-05, |
| "loss": 0.0041, |
| "step": 10970 |
| }, |
| { |
| "epoch": 37.602739726027394, |
| "grad_norm": 0.046402864158153534, |
| "learning_rate": 4.60358372409022e-05, |
| "loss": 0.003, |
| "step": 10980 |
| }, |
| { |
| "epoch": 37.636986301369866, |
| "grad_norm": 0.06605874747037888, |
| "learning_rate": 4.5953429452058135e-05, |
| "loss": 0.0053, |
| "step": 10990 |
| }, |
| { |
| "epoch": 37.67123287671233, |
| "grad_norm": 0.04339564964175224, |
| "learning_rate": 4.5871032726383386e-05, |
| "loss": 0.0034, |
| "step": 11000 |
| }, |
| { |
| "epoch": 37.705479452054796, |
| "grad_norm": 0.0516190268099308, |
| "learning_rate": 4.5788647289147516e-05, |
| "loss": 0.0036, |
| "step": 11010 |
| }, |
| { |
| "epoch": 37.73972602739726, |
| "grad_norm": 0.07312241941690445, |
| "learning_rate": 4.570627336558915e-05, |
| "loss": 0.0035, |
| "step": 11020 |
| }, |
| { |
| "epoch": 37.773972602739725, |
| "grad_norm": 0.03444405645132065, |
| "learning_rate": 4.562391118091544e-05, |
| "loss": 0.0029, |
| "step": 11030 |
| }, |
| { |
| "epoch": 37.80821917808219, |
| "grad_norm": 0.03673717379570007, |
| "learning_rate": 4.554156096030149e-05, |
| "loss": 0.0035, |
| "step": 11040 |
| }, |
| { |
| "epoch": 37.842465753424655, |
| "grad_norm": 0.04546872526407242, |
| "learning_rate": 4.545922292888959e-05, |
| "loss": 0.0045, |
| "step": 11050 |
| }, |
| { |
| "epoch": 37.87671232876713, |
| "grad_norm": 0.041294973343610764, |
| "learning_rate": 4.537689731178883e-05, |
| "loss": 0.0036, |
| "step": 11060 |
| }, |
| { |
| "epoch": 37.91095890410959, |
| "grad_norm": 0.04516911506652832, |
| "learning_rate": 4.529458433407429e-05, |
| "loss": 0.0054, |
| "step": 11070 |
| }, |
| { |
| "epoch": 37.945205479452056, |
| "grad_norm": 0.05957014858722687, |
| "learning_rate": 4.5212284220786494e-05, |
| "loss": 0.0032, |
| "step": 11080 |
| }, |
| { |
| "epoch": 37.97945205479452, |
| "grad_norm": 0.05336372181773186, |
| "learning_rate": 4.5129997196930845e-05, |
| "loss": 0.0047, |
| "step": 11090 |
| }, |
| { |
| "epoch": 38.013698630136986, |
| "grad_norm": 0.05886836349964142, |
| "learning_rate": 4.504772348747687e-05, |
| "loss": 0.0039, |
| "step": 11100 |
| }, |
| { |
| "epoch": 38.04794520547945, |
| "grad_norm": 0.05502059683203697, |
| "learning_rate": 4.496546331735778e-05, |
| "loss": 0.0043, |
| "step": 11110 |
| }, |
| { |
| "epoch": 38.082191780821915, |
| "grad_norm": 0.04685702919960022, |
| "learning_rate": 4.488321691146975e-05, |
| "loss": 0.0038, |
| "step": 11120 |
| }, |
| { |
| "epoch": 38.11643835616438, |
| "grad_norm": 0.0464017279446125, |
| "learning_rate": 4.480098449467132e-05, |
| "loss": 0.0062, |
| "step": 11130 |
| }, |
| { |
| "epoch": 38.15068493150685, |
| "grad_norm": 0.043533407151699066, |
| "learning_rate": 4.471876629178273e-05, |
| "loss": 0.0034, |
| "step": 11140 |
| }, |
| { |
| "epoch": 38.18493150684932, |
| "grad_norm": 0.04644559323787689, |
| "learning_rate": 4.463656252758542e-05, |
| "loss": 0.0035, |
| "step": 11150 |
| }, |
| { |
| "epoch": 38.21917808219178, |
| "grad_norm": 0.05499809607863426, |
| "learning_rate": 4.4554373426821374e-05, |
| "loss": 0.0036, |
| "step": 11160 |
| }, |
| { |
| "epoch": 38.25342465753425, |
| "grad_norm": 0.07476824522018433, |
| "learning_rate": 4.447219921419244e-05, |
| "loss": 0.005, |
| "step": 11170 |
| }, |
| { |
| "epoch": 38.28767123287671, |
| "grad_norm": 0.07308005541563034, |
| "learning_rate": 4.439004011435979e-05, |
| "loss": 0.0035, |
| "step": 11180 |
| }, |
| { |
| "epoch": 38.321917808219176, |
| "grad_norm": 0.045294955372810364, |
| "learning_rate": 4.430789635194324e-05, |
| "loss": 0.0031, |
| "step": 11190 |
| }, |
| { |
| "epoch": 38.35616438356164, |
| "grad_norm": 0.05321976915001869, |
| "learning_rate": 4.4225768151520694e-05, |
| "loss": 0.0049, |
| "step": 11200 |
| }, |
| { |
| "epoch": 38.39041095890411, |
| "grad_norm": 0.05076253041625023, |
| "learning_rate": 4.414365573762755e-05, |
| "loss": 0.0051, |
| "step": 11210 |
| }, |
| { |
| "epoch": 38.42465753424658, |
| "grad_norm": 0.054397087544202805, |
| "learning_rate": 4.406155933475599e-05, |
| "loss": 0.0053, |
| "step": 11220 |
| }, |
| { |
| "epoch": 38.45890410958904, |
| "grad_norm": 0.05751054361462593, |
| "learning_rate": 4.3979479167354477e-05, |
| "loss": 0.0035, |
| "step": 11230 |
| }, |
| { |
| "epoch": 38.49315068493151, |
| "grad_norm": 0.050182171165943146, |
| "learning_rate": 4.3897415459827e-05, |
| "loss": 0.0031, |
| "step": 11240 |
| }, |
| { |
| "epoch": 38.52739726027397, |
| "grad_norm": 0.04209424555301666, |
| "learning_rate": 4.381536843653262e-05, |
| "loss": 0.004, |
| "step": 11250 |
| }, |
| { |
| "epoch": 38.56164383561644, |
| "grad_norm": 0.04507875442504883, |
| "learning_rate": 4.373333832178478e-05, |
| "loss": 0.0036, |
| "step": 11260 |
| }, |
| { |
| "epoch": 38.5958904109589, |
| "grad_norm": 0.052286434918642044, |
| "learning_rate": 4.365132533985071e-05, |
| "loss": 0.0041, |
| "step": 11270 |
| }, |
| { |
| "epoch": 38.63013698630137, |
| "grad_norm": 0.04619704186916351, |
| "learning_rate": 4.3569329714950704e-05, |
| "loss": 0.0036, |
| "step": 11280 |
| }, |
| { |
| "epoch": 38.66438356164384, |
| "grad_norm": 0.0394359715282917, |
| "learning_rate": 4.348735167125771e-05, |
| "loss": 0.0031, |
| "step": 11290 |
| }, |
| { |
| "epoch": 38.6986301369863, |
| "grad_norm": 0.06122252345085144, |
| "learning_rate": 4.3405391432896555e-05, |
| "loss": 0.0032, |
| "step": 11300 |
| }, |
| { |
| "epoch": 38.73287671232877, |
| "grad_norm": 0.043462276458740234, |
| "learning_rate": 4.3323449223943416e-05, |
| "loss": 0.0031, |
| "step": 11310 |
| }, |
| { |
| "epoch": 38.76712328767123, |
| "grad_norm": 0.04255270957946777, |
| "learning_rate": 4.324152526842517e-05, |
| "loss": 0.0034, |
| "step": 11320 |
| }, |
| { |
| "epoch": 38.8013698630137, |
| "grad_norm": 0.042190827429294586, |
| "learning_rate": 4.315961979031875e-05, |
| "loss": 0.0043, |
| "step": 11330 |
| }, |
| { |
| "epoch": 38.83561643835616, |
| "grad_norm": 0.06325828284025192, |
| "learning_rate": 4.307773301355062e-05, |
| "loss": 0.0051, |
| "step": 11340 |
| }, |
| { |
| "epoch": 38.86986301369863, |
| "grad_norm": 0.04807128384709358, |
| "learning_rate": 4.2995865161996105e-05, |
| "loss": 0.0039, |
| "step": 11350 |
| }, |
| { |
| "epoch": 38.9041095890411, |
| "grad_norm": 0.05032104253768921, |
| "learning_rate": 4.291401645947879e-05, |
| "loss": 0.0038, |
| "step": 11360 |
| }, |
| { |
| "epoch": 38.93835616438356, |
| "grad_norm": 0.060034602880477905, |
| "learning_rate": 4.283218712976992e-05, |
| "loss": 0.0035, |
| "step": 11370 |
| }, |
| { |
| "epoch": 38.97260273972603, |
| "grad_norm": 0.04731849208474159, |
| "learning_rate": 4.275037739658771e-05, |
| "loss": 0.0036, |
| "step": 11380 |
| }, |
| { |
| "epoch": 39.00684931506849, |
| "grad_norm": 0.054858967661857605, |
| "learning_rate": 4.2668587483596864e-05, |
| "loss": 0.004, |
| "step": 11390 |
| }, |
| { |
| "epoch": 39.04109589041096, |
| "grad_norm": 0.06338762491941452, |
| "learning_rate": 4.2586817614407895e-05, |
| "loss": 0.0036, |
| "step": 11400 |
| }, |
| { |
| "epoch": 39.07534246575342, |
| "grad_norm": 0.03571184724569321, |
| "learning_rate": 4.250506801257653e-05, |
| "loss": 0.0036, |
| "step": 11410 |
| }, |
| { |
| "epoch": 39.10958904109589, |
| "grad_norm": 0.03335060179233551, |
| "learning_rate": 4.2423338901602985e-05, |
| "loss": 0.0033, |
| "step": 11420 |
| }, |
| { |
| "epoch": 39.14383561643836, |
| "grad_norm": 0.04027834162116051, |
| "learning_rate": 4.234163050493158e-05, |
| "loss": 0.0042, |
| "step": 11430 |
| }, |
| { |
| "epoch": 39.178082191780824, |
| "grad_norm": 0.049982279539108276, |
| "learning_rate": 4.2259943045949934e-05, |
| "loss": 0.0041, |
| "step": 11440 |
| }, |
| { |
| "epoch": 39.21232876712329, |
| "grad_norm": 0.06824607402086258, |
| "learning_rate": 4.2178276747988446e-05, |
| "loss": 0.004, |
| "step": 11450 |
| }, |
| { |
| "epoch": 39.24657534246575, |
| "grad_norm": 0.04790511727333069, |
| "learning_rate": 4.209663183431969e-05, |
| "loss": 0.0037, |
| "step": 11460 |
| }, |
| { |
| "epoch": 39.28082191780822, |
| "grad_norm": 0.07713089883327484, |
| "learning_rate": 4.201500852815768e-05, |
| "loss": 0.0037, |
| "step": 11470 |
| }, |
| { |
| "epoch": 39.31506849315068, |
| "grad_norm": 0.0667371153831482, |
| "learning_rate": 4.1933407052657456e-05, |
| "loss": 0.0048, |
| "step": 11480 |
| }, |
| { |
| "epoch": 39.34931506849315, |
| "grad_norm": 0.05451853573322296, |
| "learning_rate": 4.1851827630914305e-05, |
| "loss": 0.0034, |
| "step": 11490 |
| }, |
| { |
| "epoch": 39.38356164383562, |
| "grad_norm": 0.05602137744426727, |
| "learning_rate": 4.17702704859633e-05, |
| "loss": 0.0035, |
| "step": 11500 |
| }, |
| { |
| "epoch": 39.417808219178085, |
| "grad_norm": 0.07803839445114136, |
| "learning_rate": 4.1688735840778546e-05, |
| "loss": 0.0033, |
| "step": 11510 |
| }, |
| { |
| "epoch": 39.45205479452055, |
| "grad_norm": 0.04363902285695076, |
| "learning_rate": 4.160722391827262e-05, |
| "loss": 0.0034, |
| "step": 11520 |
| }, |
| { |
| "epoch": 39.486301369863014, |
| "grad_norm": 0.06316430121660233, |
| "learning_rate": 4.1525734941296026e-05, |
| "loss": 0.0037, |
| "step": 11530 |
| }, |
| { |
| "epoch": 39.52054794520548, |
| "grad_norm": 0.09381528198719025, |
| "learning_rate": 4.14442691326365e-05, |
| "loss": 0.005, |
| "step": 11540 |
| }, |
| { |
| "epoch": 39.554794520547944, |
| "grad_norm": 0.0672030970454216, |
| "learning_rate": 4.13628267150185e-05, |
| "loss": 0.0039, |
| "step": 11550 |
| }, |
| { |
| "epoch": 39.58904109589041, |
| "grad_norm": 0.05352475121617317, |
| "learning_rate": 4.1281407911102425e-05, |
| "loss": 0.0032, |
| "step": 11560 |
| }, |
| { |
| "epoch": 39.62328767123287, |
| "grad_norm": 0.05327161028981209, |
| "learning_rate": 4.120001294348421e-05, |
| "loss": 0.0033, |
| "step": 11570 |
| }, |
| { |
| "epoch": 39.657534246575345, |
| "grad_norm": 0.04990739747881889, |
| "learning_rate": 4.111864203469457e-05, |
| "loss": 0.0041, |
| "step": 11580 |
| }, |
| { |
| "epoch": 39.69178082191781, |
| "grad_norm": 0.04503406584262848, |
| "learning_rate": 4.103729540719847e-05, |
| "loss": 0.0034, |
| "step": 11590 |
| }, |
| { |
| "epoch": 39.726027397260275, |
| "grad_norm": 0.05482426658272743, |
| "learning_rate": 4.095597328339452e-05, |
| "loss": 0.0063, |
| "step": 11600 |
| }, |
| { |
| "epoch": 39.76027397260274, |
| "grad_norm": 0.047005269676446915, |
| "learning_rate": 4.087467588561424e-05, |
| "loss": 0.0028, |
| "step": 11610 |
| }, |
| { |
| "epoch": 39.794520547945204, |
| "grad_norm": 0.05959664657711983, |
| "learning_rate": 4.079340343612165e-05, |
| "loss": 0.0039, |
| "step": 11620 |
| }, |
| { |
| "epoch": 39.82876712328767, |
| "grad_norm": 0.06386947631835938, |
| "learning_rate": 4.07121561571125e-05, |
| "loss": 0.0027, |
| "step": 11630 |
| }, |
| { |
| "epoch": 39.863013698630134, |
| "grad_norm": 0.07454657554626465, |
| "learning_rate": 4.063093427071376e-05, |
| "loss": 0.004, |
| "step": 11640 |
| }, |
| { |
| "epoch": 39.897260273972606, |
| "grad_norm": 0.04651208966970444, |
| "learning_rate": 4.0549737998983e-05, |
| "loss": 0.0036, |
| "step": 11650 |
| }, |
| { |
| "epoch": 39.93150684931507, |
| "grad_norm": 0.04951368644833565, |
| "learning_rate": 4.046856756390767e-05, |
| "loss": 0.0042, |
| "step": 11660 |
| }, |
| { |
| "epoch": 39.965753424657535, |
| "grad_norm": 0.047568511217832565, |
| "learning_rate": 4.038742318740465e-05, |
| "loss": 0.003, |
| "step": 11670 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 0.04516076669096947, |
| "learning_rate": 4.0306305091319595e-05, |
| "loss": 0.0043, |
| "step": 11680 |
| }, |
| { |
| "epoch": 40.034246575342465, |
| "grad_norm": 0.05222661420702934, |
| "learning_rate": 4.0225213497426276e-05, |
| "loss": 0.0049, |
| "step": 11690 |
| }, |
| { |
| "epoch": 40.06849315068493, |
| "grad_norm": 0.04708956927061081, |
| "learning_rate": 4.0144148627425993e-05, |
| "loss": 0.0036, |
| "step": 11700 |
| }, |
| { |
| "epoch": 40.102739726027394, |
| "grad_norm": 0.05811784416437149, |
| "learning_rate": 4.006311070294702e-05, |
| "loss": 0.0047, |
| "step": 11710 |
| }, |
| { |
| "epoch": 40.136986301369866, |
| "grad_norm": 0.05184715986251831, |
| "learning_rate": 3.9982099945543945e-05, |
| "loss": 0.0044, |
| "step": 11720 |
| }, |
| { |
| "epoch": 40.17123287671233, |
| "grad_norm": 0.05277574062347412, |
| "learning_rate": 3.9901116576697083e-05, |
| "loss": 0.0045, |
| "step": 11730 |
| }, |
| { |
| "epoch": 40.205479452054796, |
| "grad_norm": 0.038952380418777466, |
| "learning_rate": 3.982016081781189e-05, |
| "loss": 0.0032, |
| "step": 11740 |
| }, |
| { |
| "epoch": 40.23972602739726, |
| "grad_norm": 0.059717144817113876, |
| "learning_rate": 3.973923289021829e-05, |
| "loss": 0.0031, |
| "step": 11750 |
| }, |
| { |
| "epoch": 40.273972602739725, |
| "grad_norm": 0.06439585983753204, |
| "learning_rate": 3.965833301517017e-05, |
| "loss": 0.0047, |
| "step": 11760 |
| }, |
| { |
| "epoch": 40.30821917808219, |
| "grad_norm": 0.05810396373271942, |
| "learning_rate": 3.9577461413844684e-05, |
| "loss": 0.0044, |
| "step": 11770 |
| }, |
| { |
| "epoch": 40.342465753424655, |
| "grad_norm": 0.043367937207221985, |
| "learning_rate": 3.949661830734172e-05, |
| "loss": 0.003, |
| "step": 11780 |
| }, |
| { |
| "epoch": 40.37671232876713, |
| "grad_norm": 0.046338461339473724, |
| "learning_rate": 3.9415803916683224e-05, |
| "loss": 0.0045, |
| "step": 11790 |
| }, |
| { |
| "epoch": 40.41095890410959, |
| "grad_norm": 0.04636251553893089, |
| "learning_rate": 3.933501846281267e-05, |
| "loss": 0.0038, |
| "step": 11800 |
| }, |
| { |
| "epoch": 40.445205479452056, |
| "grad_norm": 0.03807734698057175, |
| "learning_rate": 3.925426216659438e-05, |
| "loss": 0.0028, |
| "step": 11810 |
| }, |
| { |
| "epoch": 40.47945205479452, |
| "grad_norm": 0.0459088459610939, |
| "learning_rate": 3.917353524881302e-05, |
| "loss": 0.0032, |
| "step": 11820 |
| }, |
| { |
| "epoch": 40.513698630136986, |
| "grad_norm": 0.032536957412958145, |
| "learning_rate": 3.9092837930172884e-05, |
| "loss": 0.0029, |
| "step": 11830 |
| }, |
| { |
| "epoch": 40.54794520547945, |
| "grad_norm": 0.03757496923208237, |
| "learning_rate": 3.901217043129735e-05, |
| "loss": 0.0031, |
| "step": 11840 |
| }, |
| { |
| "epoch": 40.582191780821915, |
| "grad_norm": 0.048789843916893005, |
| "learning_rate": 3.8931532972728285e-05, |
| "loss": 0.003, |
| "step": 11850 |
| }, |
| { |
| "epoch": 40.61643835616438, |
| "grad_norm": 0.07031738758087158, |
| "learning_rate": 3.8850925774925425e-05, |
| "loss": 0.0037, |
| "step": 11860 |
| }, |
| { |
| "epoch": 40.65068493150685, |
| "grad_norm": 0.05289865657687187, |
| "learning_rate": 3.877034905826577e-05, |
| "loss": 0.0043, |
| "step": 11870 |
| }, |
| { |
| "epoch": 40.68493150684932, |
| "grad_norm": 0.044006284326314926, |
| "learning_rate": 3.8689803043043e-05, |
| "loss": 0.0038, |
| "step": 11880 |
| }, |
| { |
| "epoch": 40.71917808219178, |
| "grad_norm": 0.041351962834596634, |
| "learning_rate": 3.860928794946682e-05, |
| "loss": 0.0033, |
| "step": 11890 |
| }, |
| { |
| "epoch": 40.75342465753425, |
| "grad_norm": 0.04072045534849167, |
| "learning_rate": 3.852880399766243e-05, |
| "loss": 0.0037, |
| "step": 11900 |
| }, |
| { |
| "epoch": 40.78767123287671, |
| "grad_norm": 0.06456096470355988, |
| "learning_rate": 3.844835140766988e-05, |
| "loss": 0.0038, |
| "step": 11910 |
| }, |
| { |
| "epoch": 40.821917808219176, |
| "grad_norm": 0.05244871601462364, |
| "learning_rate": 3.836793039944349e-05, |
| "loss": 0.0033, |
| "step": 11920 |
| }, |
| { |
| "epoch": 40.85616438356164, |
| "grad_norm": 0.04571353644132614, |
| "learning_rate": 3.828754119285123e-05, |
| "loss": 0.0031, |
| "step": 11930 |
| }, |
| { |
| "epoch": 40.89041095890411, |
| "grad_norm": 0.03916588053107262, |
| "learning_rate": 3.820718400767409e-05, |
| "loss": 0.0034, |
| "step": 11940 |
| }, |
| { |
| "epoch": 40.92465753424658, |
| "grad_norm": 0.03788131847977638, |
| "learning_rate": 3.812685906360557e-05, |
| "loss": 0.004, |
| "step": 11950 |
| }, |
| { |
| "epoch": 40.95890410958904, |
| "grad_norm": 0.0459442101418972, |
| "learning_rate": 3.8046566580251e-05, |
| "loss": 0.0031, |
| "step": 11960 |
| }, |
| { |
| "epoch": 40.99315068493151, |
| "grad_norm": 0.058368634432554245, |
| "learning_rate": 3.796630677712697e-05, |
| "loss": 0.0029, |
| "step": 11970 |
| }, |
| { |
| "epoch": 41.02739726027397, |
| "grad_norm": 0.04359531030058861, |
| "learning_rate": 3.788607987366069e-05, |
| "loss": 0.0039, |
| "step": 11980 |
| }, |
| { |
| "epoch": 41.06164383561644, |
| "grad_norm": 0.04517539590597153, |
| "learning_rate": 3.780588608918947e-05, |
| "loss": 0.0025, |
| "step": 11990 |
| }, |
| { |
| "epoch": 41.0958904109589, |
| "grad_norm": 0.0433451309800148, |
| "learning_rate": 3.772572564296005e-05, |
| "loss": 0.0034, |
| "step": 12000 |
| }, |
| { |
| "epoch": 41.13013698630137, |
| "grad_norm": 0.04753587022423744, |
| "learning_rate": 3.764559875412803e-05, |
| "loss": 0.0037, |
| "step": 12010 |
| }, |
| { |
| "epoch": 41.16438356164384, |
| "grad_norm": 0.06799773871898651, |
| "learning_rate": 3.756550564175727e-05, |
| "loss": 0.0033, |
| "step": 12020 |
| }, |
| { |
| "epoch": 41.1986301369863, |
| "grad_norm": 0.05588128790259361, |
| "learning_rate": 3.748544652481927e-05, |
| "loss": 0.0044, |
| "step": 12030 |
| }, |
| { |
| "epoch": 41.23287671232877, |
| "grad_norm": 0.06136851757764816, |
| "learning_rate": 3.74054216221926e-05, |
| "loss": 0.0029, |
| "step": 12040 |
| }, |
| { |
| "epoch": 41.26712328767123, |
| "grad_norm": 0.052555352449417114, |
| "learning_rate": 3.73254311526623e-05, |
| "loss": 0.0039, |
| "step": 12050 |
| }, |
| { |
| "epoch": 41.3013698630137, |
| "grad_norm": 0.04874827712774277, |
| "learning_rate": 3.7245475334919246e-05, |
| "loss": 0.0036, |
| "step": 12060 |
| }, |
| { |
| "epoch": 41.33561643835616, |
| "grad_norm": 0.047369617968797684, |
| "learning_rate": 3.716555438755961e-05, |
| "loss": 0.0034, |
| "step": 12070 |
| }, |
| { |
| "epoch": 41.36986301369863, |
| "grad_norm": 0.04781021550297737, |
| "learning_rate": 3.7085668529084184e-05, |
| "loss": 0.0035, |
| "step": 12080 |
| }, |
| { |
| "epoch": 41.4041095890411, |
| "grad_norm": 0.05383175238966942, |
| "learning_rate": 3.700581797789786e-05, |
| "loss": 0.0037, |
| "step": 12090 |
| }, |
| { |
| "epoch": 41.43835616438356, |
| "grad_norm": 0.03811519220471382, |
| "learning_rate": 3.6926002952309016e-05, |
| "loss": 0.0037, |
| "step": 12100 |
| }, |
| { |
| "epoch": 41.47260273972603, |
| "grad_norm": 0.03701299428939819, |
| "learning_rate": 3.684622367052887e-05, |
| "loss": 0.0049, |
| "step": 12110 |
| }, |
| { |
| "epoch": 41.50684931506849, |
| "grad_norm": 0.03665095567703247, |
| "learning_rate": 3.676648035067093e-05, |
| "loss": 0.0041, |
| "step": 12120 |
| }, |
| { |
| "epoch": 41.54109589041096, |
| "grad_norm": 0.043359316885471344, |
| "learning_rate": 3.6686773210750385e-05, |
| "loss": 0.0034, |
| "step": 12130 |
| }, |
| { |
| "epoch": 41.57534246575342, |
| "grad_norm": 0.03668743371963501, |
| "learning_rate": 3.6607102468683526e-05, |
| "loss": 0.0033, |
| "step": 12140 |
| }, |
| { |
| "epoch": 41.60958904109589, |
| "grad_norm": 0.033484235405921936, |
| "learning_rate": 3.65274683422871e-05, |
| "loss": 0.0044, |
| "step": 12150 |
| }, |
| { |
| "epoch": 41.64383561643836, |
| "grad_norm": 0.04815152660012245, |
| "learning_rate": 3.6447871049277796e-05, |
| "loss": 0.0038, |
| "step": 12160 |
| }, |
| { |
| "epoch": 41.678082191780824, |
| "grad_norm": 0.05247364938259125, |
| "learning_rate": 3.636831080727154e-05, |
| "loss": 0.0033, |
| "step": 12170 |
| }, |
| { |
| "epoch": 41.71232876712329, |
| "grad_norm": 0.051211703568696976, |
| "learning_rate": 3.628878783378302e-05, |
| "loss": 0.0035, |
| "step": 12180 |
| }, |
| { |
| "epoch": 41.74657534246575, |
| "grad_norm": 0.056743912398815155, |
| "learning_rate": 3.6209302346225006e-05, |
| "loss": 0.0044, |
| "step": 12190 |
| }, |
| { |
| "epoch": 41.78082191780822, |
| "grad_norm": 0.044049229472875595, |
| "learning_rate": 3.612985456190778e-05, |
| "loss": 0.0036, |
| "step": 12200 |
| }, |
| { |
| "epoch": 41.81506849315068, |
| "grad_norm": 0.04488535597920418, |
| "learning_rate": 3.605044469803854e-05, |
| "loss": 0.0036, |
| "step": 12210 |
| }, |
| { |
| "epoch": 41.84931506849315, |
| "grad_norm": 0.05490657687187195, |
| "learning_rate": 3.597107297172084e-05, |
| "loss": 0.0049, |
| "step": 12220 |
| }, |
| { |
| "epoch": 41.88356164383562, |
| "grad_norm": 0.05091523379087448, |
| "learning_rate": 3.5891739599953945e-05, |
| "loss": 0.0035, |
| "step": 12230 |
| }, |
| { |
| "epoch": 41.917808219178085, |
| "grad_norm": 0.04404463246464729, |
| "learning_rate": 3.581244479963225e-05, |
| "loss": 0.0034, |
| "step": 12240 |
| }, |
| { |
| "epoch": 41.95205479452055, |
| "grad_norm": 0.05935285985469818, |
| "learning_rate": 3.5733188787544745e-05, |
| "loss": 0.0044, |
| "step": 12250 |
| }, |
| { |
| "epoch": 41.986301369863014, |
| "grad_norm": 0.048552870750427246, |
| "learning_rate": 3.5653971780374295e-05, |
| "loss": 0.0038, |
| "step": 12260 |
| }, |
| { |
| "epoch": 42.02054794520548, |
| "grad_norm": 0.06793242692947388, |
| "learning_rate": 3.557479399469721e-05, |
| "loss": 0.0049, |
| "step": 12270 |
| }, |
| { |
| "epoch": 42.054794520547944, |
| "grad_norm": 0.05885540693998337, |
| "learning_rate": 3.5495655646982505e-05, |
| "loss": 0.004, |
| "step": 12280 |
| }, |
| { |
| "epoch": 42.08904109589041, |
| "grad_norm": 0.05235716328024864, |
| "learning_rate": 3.541655695359142e-05, |
| "loss": 0.0045, |
| "step": 12290 |
| }, |
| { |
| "epoch": 42.12328767123287, |
| "grad_norm": 0.05270376428961754, |
| "learning_rate": 3.533749813077677e-05, |
| "loss": 0.0039, |
| "step": 12300 |
| }, |
| { |
| "epoch": 42.157534246575345, |
| "grad_norm": 0.06084022298455238, |
| "learning_rate": 3.525847939468233e-05, |
| "loss": 0.0041, |
| "step": 12310 |
| }, |
| { |
| "epoch": 42.19178082191781, |
| "grad_norm": 0.04897018149495125, |
| "learning_rate": 3.517950096134232e-05, |
| "loss": 0.003, |
| "step": 12320 |
| }, |
| { |
| "epoch": 42.226027397260275, |
| "grad_norm": 0.04482729360461235, |
| "learning_rate": 3.5100563046680764e-05, |
| "loss": 0.0035, |
| "step": 12330 |
| }, |
| { |
| "epoch": 42.26027397260274, |
| "grad_norm": 0.03667287901043892, |
| "learning_rate": 3.5021665866510925e-05, |
| "loss": 0.0038, |
| "step": 12340 |
| }, |
| { |
| "epoch": 42.294520547945204, |
| "grad_norm": 0.03215208277106285, |
| "learning_rate": 3.494280963653463e-05, |
| "loss": 0.0042, |
| "step": 12350 |
| }, |
| { |
| "epoch": 42.32876712328767, |
| "grad_norm": 0.061448678374290466, |
| "learning_rate": 3.4863994572341843e-05, |
| "loss": 0.0042, |
| "step": 12360 |
| }, |
| { |
| "epoch": 42.363013698630134, |
| "grad_norm": 0.043668776750564575, |
| "learning_rate": 3.478522088940993e-05, |
| "loss": 0.0034, |
| "step": 12370 |
| }, |
| { |
| "epoch": 42.397260273972606, |
| "grad_norm": 0.05667470395565033, |
| "learning_rate": 3.470648880310313e-05, |
| "loss": 0.0048, |
| "step": 12380 |
| }, |
| { |
| "epoch": 42.43150684931507, |
| "grad_norm": 0.04313276335597038, |
| "learning_rate": 3.462779852867197e-05, |
| "loss": 0.0031, |
| "step": 12390 |
| }, |
| { |
| "epoch": 42.465753424657535, |
| "grad_norm": 0.04941844195127487, |
| "learning_rate": 3.4549150281252636e-05, |
| "loss": 0.0037, |
| "step": 12400 |
| }, |
| { |
| "epoch": 42.5, |
| "grad_norm": 0.04372037574648857, |
| "learning_rate": 3.447054427586644e-05, |
| "loss": 0.004, |
| "step": 12410 |
| }, |
| { |
| "epoch": 42.534246575342465, |
| "grad_norm": 0.03683243691921234, |
| "learning_rate": 3.439198072741921e-05, |
| "loss": 0.0032, |
| "step": 12420 |
| }, |
| { |
| "epoch": 42.56849315068493, |
| "grad_norm": 0.03871821239590645, |
| "learning_rate": 3.431345985070067e-05, |
| "loss": 0.0033, |
| "step": 12430 |
| }, |
| { |
| "epoch": 42.602739726027394, |
| "grad_norm": 0.04553896188735962, |
| "learning_rate": 3.423498186038393e-05, |
| "loss": 0.003, |
| "step": 12440 |
| }, |
| { |
| "epoch": 42.636986301369866, |
| "grad_norm": 0.042918987572193146, |
| "learning_rate": 3.4156546971024784e-05, |
| "loss": 0.0029, |
| "step": 12450 |
| }, |
| { |
| "epoch": 42.67123287671233, |
| "grad_norm": 0.04090527072548866, |
| "learning_rate": 3.407815539706124e-05, |
| "loss": 0.0036, |
| "step": 12460 |
| }, |
| { |
| "epoch": 42.705479452054796, |
| "grad_norm": 0.059398066252470016, |
| "learning_rate": 3.399980735281286e-05, |
| "loss": 0.0032, |
| "step": 12470 |
| }, |
| { |
| "epoch": 42.73972602739726, |
| "grad_norm": 0.050417881458997726, |
| "learning_rate": 3.392150305248024e-05, |
| "loss": 0.0033, |
| "step": 12480 |
| }, |
| { |
| "epoch": 42.773972602739725, |
| "grad_norm": 0.05215463042259216, |
| "learning_rate": 3.384324271014429e-05, |
| "loss": 0.004, |
| "step": 12490 |
| }, |
| { |
| "epoch": 42.80821917808219, |
| "grad_norm": 0.04774712771177292, |
| "learning_rate": 3.3765026539765834e-05, |
| "loss": 0.0041, |
| "step": 12500 |
| }, |
| { |
| "epoch": 42.842465753424655, |
| "grad_norm": 0.04074811935424805, |
| "learning_rate": 3.368685475518488e-05, |
| "loss": 0.0031, |
| "step": 12510 |
| }, |
| { |
| "epoch": 42.87671232876713, |
| "grad_norm": 0.04318001866340637, |
| "learning_rate": 3.360872757012011e-05, |
| "loss": 0.0034, |
| "step": 12520 |
| }, |
| { |
| "epoch": 42.91095890410959, |
| "grad_norm": 0.04521534591913223, |
| "learning_rate": 3.3530645198168295e-05, |
| "loss": 0.0039, |
| "step": 12530 |
| }, |
| { |
| "epoch": 42.945205479452056, |
| "grad_norm": 0.045407313853502274, |
| "learning_rate": 3.3452607852803584e-05, |
| "loss": 0.0046, |
| "step": 12540 |
| }, |
| { |
| "epoch": 42.97945205479452, |
| "grad_norm": 0.03356282040476799, |
| "learning_rate": 3.337461574737716e-05, |
| "loss": 0.0033, |
| "step": 12550 |
| }, |
| { |
| "epoch": 43.013698630136986, |
| "grad_norm": 0.03246859833598137, |
| "learning_rate": 3.329666909511645e-05, |
| "loss": 0.0036, |
| "step": 12560 |
| }, |
| { |
| "epoch": 43.04794520547945, |
| "grad_norm": 0.054344963282346725, |
| "learning_rate": 3.321876810912461e-05, |
| "loss": 0.0037, |
| "step": 12570 |
| }, |
| { |
| "epoch": 43.082191780821915, |
| "grad_norm": 0.04024481028318405, |
| "learning_rate": 3.3140913002379995e-05, |
| "loss": 0.0031, |
| "step": 12580 |
| }, |
| { |
| "epoch": 43.11643835616438, |
| "grad_norm": 0.036860208958387375, |
| "learning_rate": 3.3063103987735433e-05, |
| "loss": 0.0033, |
| "step": 12590 |
| }, |
| { |
| "epoch": 43.15068493150685, |
| "grad_norm": 0.036103636026382446, |
| "learning_rate": 3.298534127791785e-05, |
| "loss": 0.0028, |
| "step": 12600 |
| }, |
| { |
| "epoch": 43.18493150684932, |
| "grad_norm": 0.047203242778778076, |
| "learning_rate": 3.2907625085527503e-05, |
| "loss": 0.0034, |
| "step": 12610 |
| }, |
| { |
| "epoch": 43.21917808219178, |
| "grad_norm": 0.034795694053173065, |
| "learning_rate": 3.282995562303754e-05, |
| "loss": 0.0041, |
| "step": 12620 |
| }, |
| { |
| "epoch": 43.25342465753425, |
| "grad_norm": 0.041322045028209686, |
| "learning_rate": 3.275233310279321e-05, |
| "loss": 0.003, |
| "step": 12630 |
| }, |
| { |
| "epoch": 43.28767123287671, |
| "grad_norm": 0.03881575167179108, |
| "learning_rate": 3.267475773701161e-05, |
| "loss": 0.0036, |
| "step": 12640 |
| }, |
| { |
| "epoch": 43.321917808219176, |
| "grad_norm": 0.0376458577811718, |
| "learning_rate": 3.2597229737780774e-05, |
| "loss": 0.0042, |
| "step": 12650 |
| }, |
| { |
| "epoch": 43.35616438356164, |
| "grad_norm": 0.06743606925010681, |
| "learning_rate": 3.251974931705933e-05, |
| "loss": 0.0043, |
| "step": 12660 |
| }, |
| { |
| "epoch": 43.39041095890411, |
| "grad_norm": 0.04050120338797569, |
| "learning_rate": 3.244231668667578e-05, |
| "loss": 0.0031, |
| "step": 12670 |
| }, |
| { |
| "epoch": 43.42465753424658, |
| "grad_norm": 0.03748546540737152, |
| "learning_rate": 3.236493205832795e-05, |
| "loss": 0.0026, |
| "step": 12680 |
| }, |
| { |
| "epoch": 43.45890410958904, |
| "grad_norm": 0.026954837143421173, |
| "learning_rate": 3.228759564358248e-05, |
| "loss": 0.0042, |
| "step": 12690 |
| }, |
| { |
| "epoch": 43.49315068493151, |
| "grad_norm": 0.03144029900431633, |
| "learning_rate": 3.221030765387417e-05, |
| "loss": 0.0044, |
| "step": 12700 |
| }, |
| { |
| "epoch": 43.52739726027397, |
| "grad_norm": 0.03990132361650467, |
| "learning_rate": 3.2133068300505455e-05, |
| "loss": 0.0059, |
| "step": 12710 |
| }, |
| { |
| "epoch": 43.56164383561644, |
| "grad_norm": 0.03905215859413147, |
| "learning_rate": 3.205587779464576e-05, |
| "loss": 0.0041, |
| "step": 12720 |
| }, |
| { |
| "epoch": 43.5958904109589, |
| "grad_norm": 0.0459834523499012, |
| "learning_rate": 3.197873634733096e-05, |
| "loss": 0.0036, |
| "step": 12730 |
| }, |
| { |
| "epoch": 43.63013698630137, |
| "grad_norm": 0.04976179450750351, |
| "learning_rate": 3.190164416946285e-05, |
| "loss": 0.0059, |
| "step": 12740 |
| }, |
| { |
| "epoch": 43.66438356164384, |
| "grad_norm": 0.05985915660858154, |
| "learning_rate": 3.18246014718085e-05, |
| "loss": 0.004, |
| "step": 12750 |
| }, |
| { |
| "epoch": 43.6986301369863, |
| "grad_norm": 0.06645234674215317, |
| "learning_rate": 3.1747608464999725e-05, |
| "loss": 0.0047, |
| "step": 12760 |
| }, |
| { |
| "epoch": 43.73287671232877, |
| "grad_norm": 0.05306819826364517, |
| "learning_rate": 3.167066535953242e-05, |
| "loss": 0.0046, |
| "step": 12770 |
| }, |
| { |
| "epoch": 43.76712328767123, |
| "grad_norm": 0.0343354307115078, |
| "learning_rate": 3.1593772365766105e-05, |
| "loss": 0.0024, |
| "step": 12780 |
| }, |
| { |
| "epoch": 43.8013698630137, |
| "grad_norm": 0.06609547883272171, |
| "learning_rate": 3.1516929693923315e-05, |
| "loss": 0.0038, |
| "step": 12790 |
| }, |
| { |
| "epoch": 43.83561643835616, |
| "grad_norm": 0.030476383864879608, |
| "learning_rate": 3.144013755408895e-05, |
| "loss": 0.0046, |
| "step": 12800 |
| }, |
| { |
| "epoch": 43.86986301369863, |
| "grad_norm": 0.03590785339474678, |
| "learning_rate": 3.136339615620985e-05, |
| "loss": 0.0033, |
| "step": 12810 |
| }, |
| { |
| "epoch": 43.9041095890411, |
| "grad_norm": 0.0393015556037426, |
| "learning_rate": 3.128670571009399e-05, |
| "loss": 0.0033, |
| "step": 12820 |
| }, |
| { |
| "epoch": 43.93835616438356, |
| "grad_norm": 0.04334140568971634, |
| "learning_rate": 3.121006642541014e-05, |
| "loss": 0.0033, |
| "step": 12830 |
| }, |
| { |
| "epoch": 43.97260273972603, |
| "grad_norm": 0.04587549716234207, |
| "learning_rate": 3.113347851168721e-05, |
| "loss": 0.0049, |
| "step": 12840 |
| }, |
| { |
| "epoch": 44.00684931506849, |
| "grad_norm": 0.06654663383960724, |
| "learning_rate": 3.105694217831361e-05, |
| "loss": 0.0032, |
| "step": 12850 |
| }, |
| { |
| "epoch": 44.04109589041096, |
| "grad_norm": 0.049101341515779495, |
| "learning_rate": 3.098045763453678e-05, |
| "loss": 0.0036, |
| "step": 12860 |
| }, |
| { |
| "epoch": 44.07534246575342, |
| "grad_norm": 0.0578986294567585, |
| "learning_rate": 3.090402508946249e-05, |
| "loss": 0.0041, |
| "step": 12870 |
| }, |
| { |
| "epoch": 44.10958904109589, |
| "grad_norm": 0.05904621630907059, |
| "learning_rate": 3.082764475205442e-05, |
| "loss": 0.0043, |
| "step": 12880 |
| }, |
| { |
| "epoch": 44.14383561643836, |
| "grad_norm": 0.03861738368868828, |
| "learning_rate": 3.075131683113352e-05, |
| "loss": 0.0032, |
| "step": 12890 |
| }, |
| { |
| "epoch": 44.178082191780824, |
| "grad_norm": 0.032082729041576385, |
| "learning_rate": 3.0675041535377405e-05, |
| "loss": 0.003, |
| "step": 12900 |
| }, |
| { |
| "epoch": 44.21232876712329, |
| "grad_norm": 0.06508829444646835, |
| "learning_rate": 3.059881907331979e-05, |
| "loss": 0.0037, |
| "step": 12910 |
| }, |
| { |
| "epoch": 44.24657534246575, |
| "grad_norm": 0.04459870606660843, |
| "learning_rate": 3.052264965335e-05, |
| "loss": 0.0046, |
| "step": 12920 |
| }, |
| { |
| "epoch": 44.28082191780822, |
| "grad_norm": 0.05662244185805321, |
| "learning_rate": 3.0446533483712304e-05, |
| "loss": 0.0053, |
| "step": 12930 |
| }, |
| { |
| "epoch": 44.31506849315068, |
| "grad_norm": 0.05471611022949219, |
| "learning_rate": 3.0370470772505433e-05, |
| "loss": 0.004, |
| "step": 12940 |
| }, |
| { |
| "epoch": 44.34931506849315, |
| "grad_norm": 0.053697649389505386, |
| "learning_rate": 3.0294461727681932e-05, |
| "loss": 0.0034, |
| "step": 12950 |
| }, |
| { |
| "epoch": 44.38356164383562, |
| "grad_norm": 0.05044024437665939, |
| "learning_rate": 3.0218506557047598e-05, |
| "loss": 0.0047, |
| "step": 12960 |
| }, |
| { |
| "epoch": 44.417808219178085, |
| "grad_norm": 0.04237433522939682, |
| "learning_rate": 3.0142605468260978e-05, |
| "loss": 0.0026, |
| "step": 12970 |
| }, |
| { |
| "epoch": 44.45205479452055, |
| "grad_norm": 0.041244782507419586, |
| "learning_rate": 3.006675866883275e-05, |
| "loss": 0.003, |
| "step": 12980 |
| }, |
| { |
| "epoch": 44.486301369863014, |
| "grad_norm": 0.031918175518512726, |
| "learning_rate": 2.999096636612518e-05, |
| "loss": 0.0026, |
| "step": 12990 |
| }, |
| { |
| "epoch": 44.52054794520548, |
| "grad_norm": 0.03653609752655029, |
| "learning_rate": 2.991522876735154e-05, |
| "loss": 0.0026, |
| "step": 13000 |
| }, |
| { |
| "epoch": 44.554794520547944, |
| "grad_norm": 0.05688665062189102, |
| "learning_rate": 2.9839546079575497e-05, |
| "loss": 0.0044, |
| "step": 13010 |
| }, |
| { |
| "epoch": 44.58904109589041, |
| "grad_norm": 0.060732051730155945, |
| "learning_rate": 2.976391850971065e-05, |
| "loss": 0.0038, |
| "step": 13020 |
| }, |
| { |
| "epoch": 44.62328767123287, |
| "grad_norm": 0.047593869268894196, |
| "learning_rate": 2.9688346264519866e-05, |
| "loss": 0.0028, |
| "step": 13030 |
| }, |
| { |
| "epoch": 44.657534246575345, |
| "grad_norm": 0.03735283017158508, |
| "learning_rate": 2.9612829550614836e-05, |
| "loss": 0.0036, |
| "step": 13040 |
| }, |
| { |
| "epoch": 44.69178082191781, |
| "grad_norm": 0.056196749210357666, |
| "learning_rate": 2.9537368574455304e-05, |
| "loss": 0.0039, |
| "step": 13050 |
| }, |
| { |
| "epoch": 44.726027397260275, |
| "grad_norm": 0.05041581392288208, |
| "learning_rate": 2.9461963542348737e-05, |
| "loss": 0.0027, |
| "step": 13060 |
| }, |
| { |
| "epoch": 44.76027397260274, |
| "grad_norm": 0.04259680584073067, |
| "learning_rate": 2.9386614660449596e-05, |
| "loss": 0.0033, |
| "step": 13070 |
| }, |
| { |
| "epoch": 44.794520547945204, |
| "grad_norm": 0.03420880436897278, |
| "learning_rate": 2.931132213475884e-05, |
| "loss": 0.0033, |
| "step": 13080 |
| }, |
| { |
| "epoch": 44.82876712328767, |
| "grad_norm": 0.03196268528699875, |
| "learning_rate": 2.9236086171123404e-05, |
| "loss": 0.0039, |
| "step": 13090 |
| }, |
| { |
| "epoch": 44.863013698630134, |
| "grad_norm": 0.035604946315288544, |
| "learning_rate": 2.916090697523549e-05, |
| "loss": 0.0037, |
| "step": 13100 |
| }, |
| { |
| "epoch": 44.897260273972606, |
| "grad_norm": 0.048967644572257996, |
| "learning_rate": 2.9085784752632157e-05, |
| "loss": 0.0044, |
| "step": 13110 |
| }, |
| { |
| "epoch": 44.93150684931507, |
| "grad_norm": 0.0407651923596859, |
| "learning_rate": 2.9010719708694722e-05, |
| "loss": 0.0038, |
| "step": 13120 |
| }, |
| { |
| "epoch": 44.965753424657535, |
| "grad_norm": 0.03494444489479065, |
| "learning_rate": 2.8935712048648112e-05, |
| "loss": 0.0029, |
| "step": 13130 |
| }, |
| { |
| "epoch": 45.0, |
| "grad_norm": 0.026488201692700386, |
| "learning_rate": 2.8860761977560436e-05, |
| "loss": 0.0028, |
| "step": 13140 |
| }, |
| { |
| "epoch": 45.034246575342465, |
| "grad_norm": 0.0324229933321476, |
| "learning_rate": 2.878586970034232e-05, |
| "loss": 0.003, |
| "step": 13150 |
| }, |
| { |
| "epoch": 45.06849315068493, |
| "grad_norm": 0.035328809171915054, |
| "learning_rate": 2.8711035421746367e-05, |
| "loss": 0.0032, |
| "step": 13160 |
| }, |
| { |
| "epoch": 45.102739726027394, |
| "grad_norm": 0.040484555065631866, |
| "learning_rate": 2.8636259346366666e-05, |
| "loss": 0.004, |
| "step": 13170 |
| }, |
| { |
| "epoch": 45.136986301369866, |
| "grad_norm": 0.041628774255514145, |
| "learning_rate": 2.8561541678638142e-05, |
| "loss": 0.0046, |
| "step": 13180 |
| }, |
| { |
| "epoch": 45.17123287671233, |
| "grad_norm": 0.06758181750774384, |
| "learning_rate": 2.8486882622836026e-05, |
| "loss": 0.0034, |
| "step": 13190 |
| }, |
| { |
| "epoch": 45.205479452054796, |
| "grad_norm": 0.04015596583485603, |
| "learning_rate": 2.8412282383075363e-05, |
| "loss": 0.0037, |
| "step": 13200 |
| }, |
| { |
| "epoch": 45.23972602739726, |
| "grad_norm": 0.048500169068574905, |
| "learning_rate": 2.8337741163310317e-05, |
| "loss": 0.0035, |
| "step": 13210 |
| }, |
| { |
| "epoch": 45.273972602739725, |
| "grad_norm": 0.04756450653076172, |
| "learning_rate": 2.8263259167333777e-05, |
| "loss": 0.0038, |
| "step": 13220 |
| }, |
| { |
| "epoch": 45.30821917808219, |
| "grad_norm": 0.04274963214993477, |
| "learning_rate": 2.8188836598776662e-05, |
| "loss": 0.0034, |
| "step": 13230 |
| }, |
| { |
| "epoch": 45.342465753424655, |
| "grad_norm": 0.04680255800485611, |
| "learning_rate": 2.811447366110741e-05, |
| "loss": 0.0035, |
| "step": 13240 |
| }, |
| { |
| "epoch": 45.37671232876713, |
| "grad_norm": 0.04105055332183838, |
| "learning_rate": 2.804017055763149e-05, |
| "loss": 0.0035, |
| "step": 13250 |
| }, |
| { |
| "epoch": 45.41095890410959, |
| "grad_norm": 0.05548069253563881, |
| "learning_rate": 2.7965927491490705e-05, |
| "loss": 0.0033, |
| "step": 13260 |
| }, |
| { |
| "epoch": 45.445205479452056, |
| "grad_norm": 0.045111995190382004, |
| "learning_rate": 2.7891744665662823e-05, |
| "loss": 0.0039, |
| "step": 13270 |
| }, |
| { |
| "epoch": 45.47945205479452, |
| "grad_norm": 0.04081454873085022, |
| "learning_rate": 2.7817622282960815e-05, |
| "loss": 0.004, |
| "step": 13280 |
| }, |
| { |
| "epoch": 45.513698630136986, |
| "grad_norm": 0.04935455694794655, |
| "learning_rate": 2.774356054603243e-05, |
| "loss": 0.0049, |
| "step": 13290 |
| }, |
| { |
| "epoch": 45.54794520547945, |
| "grad_norm": 0.037979427725076675, |
| "learning_rate": 2.766955965735968e-05, |
| "loss": 0.0032, |
| "step": 13300 |
| }, |
| { |
| "epoch": 45.582191780821915, |
| "grad_norm": 0.03660598769783974, |
| "learning_rate": 2.7595619819258116e-05, |
| "loss": 0.0052, |
| "step": 13310 |
| }, |
| { |
| "epoch": 45.61643835616438, |
| "grad_norm": 0.048113249242305756, |
| "learning_rate": 2.7521741233876496e-05, |
| "loss": 0.0031, |
| "step": 13320 |
| }, |
| { |
| "epoch": 45.65068493150685, |
| "grad_norm": 0.03831634670495987, |
| "learning_rate": 2.7447924103195976e-05, |
| "loss": 0.0033, |
| "step": 13330 |
| }, |
| { |
| "epoch": 45.68493150684932, |
| "grad_norm": 0.03267529979348183, |
| "learning_rate": 2.7374168629029813e-05, |
| "loss": 0.0029, |
| "step": 13340 |
| }, |
| { |
| "epoch": 45.71917808219178, |
| "grad_norm": 0.03972616046667099, |
| "learning_rate": 2.7300475013022663e-05, |
| "loss": 0.0029, |
| "step": 13350 |
| }, |
| { |
| "epoch": 45.75342465753425, |
| "grad_norm": 0.033614449203014374, |
| "learning_rate": 2.7226843456650037e-05, |
| "loss": 0.0039, |
| "step": 13360 |
| }, |
| { |
| "epoch": 45.78767123287671, |
| "grad_norm": 0.04760490730404854, |
| "learning_rate": 2.7153274161217846e-05, |
| "loss": 0.0025, |
| "step": 13370 |
| }, |
| { |
| "epoch": 45.821917808219176, |
| "grad_norm": 0.04591522365808487, |
| "learning_rate": 2.707976732786166e-05, |
| "loss": 0.0034, |
| "step": 13380 |
| }, |
| { |
| "epoch": 45.85616438356164, |
| "grad_norm": 0.0396135076880455, |
| "learning_rate": 2.7006323157546386e-05, |
| "loss": 0.0036, |
| "step": 13390 |
| }, |
| { |
| "epoch": 45.89041095890411, |
| "grad_norm": 0.04728582501411438, |
| "learning_rate": 2.693294185106562e-05, |
| "loss": 0.0029, |
| "step": 13400 |
| }, |
| { |
| "epoch": 45.92465753424658, |
| "grad_norm": 0.032608333975076675, |
| "learning_rate": 2.6859623609040984e-05, |
| "loss": 0.0033, |
| "step": 13410 |
| }, |
| { |
| "epoch": 45.95890410958904, |
| "grad_norm": 0.04741152003407478, |
| "learning_rate": 2.6786368631921836e-05, |
| "loss": 0.0036, |
| "step": 13420 |
| }, |
| { |
| "epoch": 45.99315068493151, |
| "grad_norm": 0.04052167013287544, |
| "learning_rate": 2.67131771199844e-05, |
| "loss": 0.0026, |
| "step": 13430 |
| }, |
| { |
| "epoch": 46.02739726027397, |
| "grad_norm": 0.03956746309995651, |
| "learning_rate": 2.6640049273331515e-05, |
| "loss": 0.0042, |
| "step": 13440 |
| }, |
| { |
| "epoch": 46.06164383561644, |
| "grad_norm": 0.04623987898230553, |
| "learning_rate": 2.656698529189193e-05, |
| "loss": 0.0045, |
| "step": 13450 |
| }, |
| { |
| "epoch": 46.0958904109589, |
| "grad_norm": 0.043264828622341156, |
| "learning_rate": 2.6493985375419778e-05, |
| "loss": 0.0031, |
| "step": 13460 |
| }, |
| { |
| "epoch": 46.13013698630137, |
| "grad_norm": 0.04458721727132797, |
| "learning_rate": 2.642104972349403e-05, |
| "loss": 0.0035, |
| "step": 13470 |
| }, |
| { |
| "epoch": 46.16438356164384, |
| "grad_norm": 0.04978759214282036, |
| "learning_rate": 2.6348178535517966e-05, |
| "loss": 0.0041, |
| "step": 13480 |
| }, |
| { |
| "epoch": 46.1986301369863, |
| "grad_norm": 0.038638100028038025, |
| "learning_rate": 2.6275372010718635e-05, |
| "loss": 0.003, |
| "step": 13490 |
| }, |
| { |
| "epoch": 46.23287671232877, |
| "grad_norm": 0.03764040768146515, |
| "learning_rate": 2.6202630348146324e-05, |
| "loss": 0.0033, |
| "step": 13500 |
| }, |
| { |
| "epoch": 46.26712328767123, |
| "grad_norm": 0.040804632008075714, |
| "learning_rate": 2.612995374667394e-05, |
| "loss": 0.0034, |
| "step": 13510 |
| }, |
| { |
| "epoch": 46.3013698630137, |
| "grad_norm": 0.041531484574079514, |
| "learning_rate": 2.6057342404996522e-05, |
| "loss": 0.003, |
| "step": 13520 |
| }, |
| { |
| "epoch": 46.33561643835616, |
| "grad_norm": 0.061668556183576584, |
| "learning_rate": 2.5984796521630737e-05, |
| "loss": 0.0034, |
| "step": 13530 |
| }, |
| { |
| "epoch": 46.36986301369863, |
| "grad_norm": 0.049322471022605896, |
| "learning_rate": 2.591231629491423e-05, |
| "loss": 0.004, |
| "step": 13540 |
| }, |
| { |
| "epoch": 46.4041095890411, |
| "grad_norm": 0.03611714765429497, |
| "learning_rate": 2.5839901923005205e-05, |
| "loss": 0.0025, |
| "step": 13550 |
| }, |
| { |
| "epoch": 46.43835616438356, |
| "grad_norm": 0.02910764142870903, |
| "learning_rate": 2.5767553603881767e-05, |
| "loss": 0.003, |
| "step": 13560 |
| }, |
| { |
| "epoch": 46.47260273972603, |
| "grad_norm": 0.03090509958565235, |
| "learning_rate": 2.5695271535341443e-05, |
| "loss": 0.0028, |
| "step": 13570 |
| }, |
| { |
| "epoch": 46.50684931506849, |
| "grad_norm": 0.04449021816253662, |
| "learning_rate": 2.562305591500069e-05, |
| "loss": 0.0046, |
| "step": 13580 |
| }, |
| { |
| "epoch": 46.54109589041096, |
| "grad_norm": 0.029060401022434235, |
| "learning_rate": 2.555090694029421e-05, |
| "loss": 0.0041, |
| "step": 13590 |
| }, |
| { |
| "epoch": 46.57534246575342, |
| "grad_norm": 0.06126280874013901, |
| "learning_rate": 2.547882480847461e-05, |
| "loss": 0.004, |
| "step": 13600 |
| }, |
| { |
| "epoch": 46.60958904109589, |
| "grad_norm": 0.0393584705889225, |
| "learning_rate": 2.540680971661161e-05, |
| "loss": 0.0031, |
| "step": 13610 |
| }, |
| { |
| "epoch": 46.64383561643836, |
| "grad_norm": 0.043196532875299454, |
| "learning_rate": 2.5334861861591753e-05, |
| "loss": 0.0027, |
| "step": 13620 |
| }, |
| { |
| "epoch": 46.678082191780824, |
| "grad_norm": 0.0547700971364975, |
| "learning_rate": 2.526298144011775e-05, |
| "loss": 0.0038, |
| "step": 13630 |
| }, |
| { |
| "epoch": 46.71232876712329, |
| "grad_norm": 0.04245225712656975, |
| "learning_rate": 2.5191168648707887e-05, |
| "loss": 0.0034, |
| "step": 13640 |
| }, |
| { |
| "epoch": 46.74657534246575, |
| "grad_norm": 0.04763708636164665, |
| "learning_rate": 2.511942368369566e-05, |
| "loss": 0.0046, |
| "step": 13650 |
| }, |
| { |
| "epoch": 46.78082191780822, |
| "grad_norm": 0.04791140928864479, |
| "learning_rate": 2.5047746741228978e-05, |
| "loss": 0.0034, |
| "step": 13660 |
| }, |
| { |
| "epoch": 46.81506849315068, |
| "grad_norm": 0.03143971785902977, |
| "learning_rate": 2.4976138017269908e-05, |
| "loss": 0.0026, |
| "step": 13670 |
| }, |
| { |
| "epoch": 46.84931506849315, |
| "grad_norm": 0.02771608904004097, |
| "learning_rate": 2.490459770759398e-05, |
| "loss": 0.0023, |
| "step": 13680 |
| }, |
| { |
| "epoch": 46.88356164383562, |
| "grad_norm": 0.03285845369100571, |
| "learning_rate": 2.4833126007789653e-05, |
| "loss": 0.0038, |
| "step": 13690 |
| }, |
| { |
| "epoch": 46.917808219178085, |
| "grad_norm": 0.033361777663230896, |
| "learning_rate": 2.476172311325783e-05, |
| "loss": 0.0028, |
| "step": 13700 |
| }, |
| { |
| "epoch": 46.95205479452055, |
| "grad_norm": 0.03274751454591751, |
| "learning_rate": 2.4690389219211273e-05, |
| "loss": 0.0032, |
| "step": 13710 |
| }, |
| { |
| "epoch": 46.986301369863014, |
| "grad_norm": 0.04675106331706047, |
| "learning_rate": 2.4619124520674146e-05, |
| "loss": 0.0038, |
| "step": 13720 |
| }, |
| { |
| "epoch": 47.02054794520548, |
| "grad_norm": 0.039955805987119675, |
| "learning_rate": 2.4547929212481435e-05, |
| "loss": 0.0035, |
| "step": 13730 |
| }, |
| { |
| "epoch": 47.054794520547944, |
| "grad_norm": 0.04543524235486984, |
| "learning_rate": 2.447680348927837e-05, |
| "loss": 0.0041, |
| "step": 13740 |
| }, |
| { |
| "epoch": 47.08904109589041, |
| "grad_norm": 0.031714826822280884, |
| "learning_rate": 2.4405747545519963e-05, |
| "loss": 0.0032, |
| "step": 13750 |
| }, |
| { |
| "epoch": 47.12328767123287, |
| "grad_norm": 0.033325448632240295, |
| "learning_rate": 2.433476157547044e-05, |
| "loss": 0.003, |
| "step": 13760 |
| }, |
| { |
| "epoch": 47.157534246575345, |
| "grad_norm": 0.0679163858294487, |
| "learning_rate": 2.4263845773202736e-05, |
| "loss": 0.0029, |
| "step": 13770 |
| }, |
| { |
| "epoch": 47.19178082191781, |
| "grad_norm": 0.03381791710853577, |
| "learning_rate": 2.419300033259798e-05, |
| "loss": 0.0028, |
| "step": 13780 |
| }, |
| { |
| "epoch": 47.226027397260275, |
| "grad_norm": 0.055893249809741974, |
| "learning_rate": 2.4122225447344875e-05, |
| "loss": 0.0034, |
| "step": 13790 |
| }, |
| { |
| "epoch": 47.26027397260274, |
| "grad_norm": 0.04048413038253784, |
| "learning_rate": 2.405152131093926e-05, |
| "loss": 0.0042, |
| "step": 13800 |
| }, |
| { |
| "epoch": 47.294520547945204, |
| "grad_norm": 0.0625983402132988, |
| "learning_rate": 2.3980888116683515e-05, |
| "loss": 0.005, |
| "step": 13810 |
| }, |
| { |
| "epoch": 47.32876712328767, |
| "grad_norm": 0.04345778375864029, |
| "learning_rate": 2.3910326057686127e-05, |
| "loss": 0.0036, |
| "step": 13820 |
| }, |
| { |
| "epoch": 47.363013698630134, |
| "grad_norm": 0.04610544443130493, |
| "learning_rate": 2.3839835326861104e-05, |
| "loss": 0.0031, |
| "step": 13830 |
| }, |
| { |
| "epoch": 47.397260273972606, |
| "grad_norm": 0.042664166539907455, |
| "learning_rate": 2.3769416116927335e-05, |
| "loss": 0.0023, |
| "step": 13840 |
| }, |
| { |
| "epoch": 47.43150684931507, |
| "grad_norm": 0.0539637915790081, |
| "learning_rate": 2.3699068620408304e-05, |
| "loss": 0.0038, |
| "step": 13850 |
| }, |
| { |
| "epoch": 47.465753424657535, |
| "grad_norm": 0.027709294110536575, |
| "learning_rate": 2.362879302963135e-05, |
| "loss": 0.0026, |
| "step": 13860 |
| }, |
| { |
| "epoch": 47.5, |
| "grad_norm": 0.044280145317316055, |
| "learning_rate": 2.3558589536727277e-05, |
| "loss": 0.0032, |
| "step": 13870 |
| }, |
| { |
| "epoch": 47.534246575342465, |
| "grad_norm": 0.03847116231918335, |
| "learning_rate": 2.3488458333629777e-05, |
| "loss": 0.0036, |
| "step": 13880 |
| }, |
| { |
| "epoch": 47.56849315068493, |
| "grad_norm": 0.04217471554875374, |
| "learning_rate": 2.341839961207482e-05, |
| "loss": 0.0034, |
| "step": 13890 |
| }, |
| { |
| "epoch": 47.602739726027394, |
| "grad_norm": 0.037517059594392776, |
| "learning_rate": 2.3348413563600325e-05, |
| "loss": 0.0036, |
| "step": 13900 |
| }, |
| { |
| "epoch": 47.636986301369866, |
| "grad_norm": 0.035919900983572006, |
| "learning_rate": 2.3278500379545436e-05, |
| "loss": 0.0035, |
| "step": 13910 |
| }, |
| { |
| "epoch": 47.67123287671233, |
| "grad_norm": 0.03642423823475838, |
| "learning_rate": 2.3208660251050158e-05, |
| "loss": 0.0027, |
| "step": 13920 |
| }, |
| { |
| "epoch": 47.705479452054796, |
| "grad_norm": 0.04969945177435875, |
| "learning_rate": 2.3138893369054766e-05, |
| "loss": 0.003, |
| "step": 13930 |
| }, |
| { |
| "epoch": 47.73972602739726, |
| "grad_norm": 0.032009631395339966, |
| "learning_rate": 2.3069199924299174e-05, |
| "loss": 0.0034, |
| "step": 13940 |
| }, |
| { |
| "epoch": 47.773972602739725, |
| "grad_norm": 0.053595248609781265, |
| "learning_rate": 2.2999580107322653e-05, |
| "loss": 0.0037, |
| "step": 13950 |
| }, |
| { |
| "epoch": 47.80821917808219, |
| "grad_norm": 0.03953272104263306, |
| "learning_rate": 2.29300341084631e-05, |
| "loss": 0.0039, |
| "step": 13960 |
| }, |
| { |
| "epoch": 47.842465753424655, |
| "grad_norm": 0.02748740464448929, |
| "learning_rate": 2.2860562117856647e-05, |
| "loss": 0.0033, |
| "step": 13970 |
| }, |
| { |
| "epoch": 47.87671232876713, |
| "grad_norm": 0.048432186245918274, |
| "learning_rate": 2.279116432543705e-05, |
| "loss": 0.0037, |
| "step": 13980 |
| }, |
| { |
| "epoch": 47.91095890410959, |
| "grad_norm": 0.02375647984445095, |
| "learning_rate": 2.2721840920935196e-05, |
| "loss": 0.0031, |
| "step": 13990 |
| }, |
| { |
| "epoch": 47.945205479452056, |
| "grad_norm": 0.0455491729080677, |
| "learning_rate": 2.2652592093878666e-05, |
| "loss": 0.0033, |
| "step": 14000 |
| }, |
| { |
| "epoch": 47.97945205479452, |
| "grad_norm": 0.03882705047726631, |
| "learning_rate": 2.258341803359108e-05, |
| "loss": 0.0032, |
| "step": 14010 |
| }, |
| { |
| "epoch": 48.013698630136986, |
| "grad_norm": 0.04311921074986458, |
| "learning_rate": 2.251431892919171e-05, |
| "loss": 0.003, |
| "step": 14020 |
| }, |
| { |
| "epoch": 48.04794520547945, |
| "grad_norm": 0.03556806966662407, |
| "learning_rate": 2.2445294969594844e-05, |
| "loss": 0.0037, |
| "step": 14030 |
| }, |
| { |
| "epoch": 48.082191780821915, |
| "grad_norm": 0.04763215407729149, |
| "learning_rate": 2.237634634350934e-05, |
| "loss": 0.0027, |
| "step": 14040 |
| }, |
| { |
| "epoch": 48.11643835616438, |
| "grad_norm": 0.048188772052526474, |
| "learning_rate": 2.2307473239438154e-05, |
| "loss": 0.0037, |
| "step": 14050 |
| }, |
| { |
| "epoch": 48.15068493150685, |
| "grad_norm": 0.041279006749391556, |
| "learning_rate": 2.2238675845677663e-05, |
| "loss": 0.0025, |
| "step": 14060 |
| }, |
| { |
| "epoch": 48.18493150684932, |
| "grad_norm": 0.03583381325006485, |
| "learning_rate": 2.2169954350317374e-05, |
| "loss": 0.0028, |
| "step": 14070 |
| }, |
| { |
| "epoch": 48.21917808219178, |
| "grad_norm": 0.0358072929084301, |
| "learning_rate": 2.2101308941239203e-05, |
| "loss": 0.0028, |
| "step": 14080 |
| }, |
| { |
| "epoch": 48.25342465753425, |
| "grad_norm": 0.028081903234124184, |
| "learning_rate": 2.2032739806117058e-05, |
| "loss": 0.0034, |
| "step": 14090 |
| }, |
| { |
| "epoch": 48.28767123287671, |
| "grad_norm": 0.036171264946460724, |
| "learning_rate": 2.196424713241637e-05, |
| "loss": 0.0033, |
| "step": 14100 |
| }, |
| { |
| "epoch": 48.321917808219176, |
| "grad_norm": 0.034019824117422104, |
| "learning_rate": 2.1895831107393484e-05, |
| "loss": 0.0033, |
| "step": 14110 |
| }, |
| { |
| "epoch": 48.35616438356164, |
| "grad_norm": 0.04331124201416969, |
| "learning_rate": 2.182749191809518e-05, |
| "loss": 0.0028, |
| "step": 14120 |
| }, |
| { |
| "epoch": 48.39041095890411, |
| "grad_norm": 0.026034552603960037, |
| "learning_rate": 2.1759229751358217e-05, |
| "loss": 0.0031, |
| "step": 14130 |
| }, |
| { |
| "epoch": 48.42465753424658, |
| "grad_norm": 0.02854372002184391, |
| "learning_rate": 2.1691044793808734e-05, |
| "loss": 0.0028, |
| "step": 14140 |
| }, |
| { |
| "epoch": 48.45890410958904, |
| "grad_norm": 0.0506640300154686, |
| "learning_rate": 2.1622937231861822e-05, |
| "loss": 0.0028, |
| "step": 14150 |
| }, |
| { |
| "epoch": 48.49315068493151, |
| "grad_norm": 0.06169329211115837, |
| "learning_rate": 2.1554907251720945e-05, |
| "loss": 0.0043, |
| "step": 14160 |
| }, |
| { |
| "epoch": 48.52739726027397, |
| "grad_norm": 0.0488462932407856, |
| "learning_rate": 2.148695503937745e-05, |
| "loss": 0.0025, |
| "step": 14170 |
| }, |
| { |
| "epoch": 48.56164383561644, |
| "grad_norm": 0.05333937332034111, |
| "learning_rate": 2.1419080780610123e-05, |
| "loss": 0.0024, |
| "step": 14180 |
| }, |
| { |
| "epoch": 48.5958904109589, |
| "grad_norm": 0.03566636145114899, |
| "learning_rate": 2.1351284660984572e-05, |
| "loss": 0.0029, |
| "step": 14190 |
| }, |
| { |
| "epoch": 48.63013698630137, |
| "grad_norm": 0.04205214977264404, |
| "learning_rate": 2.128356686585282e-05, |
| "loss": 0.0028, |
| "step": 14200 |
| }, |
| { |
| "epoch": 48.66438356164384, |
| "grad_norm": 0.03965020179748535, |
| "learning_rate": 2.121592758035273e-05, |
| "loss": 0.0041, |
| "step": 14210 |
| }, |
| { |
| "epoch": 48.6986301369863, |
| "grad_norm": 0.037797197699546814, |
| "learning_rate": 2.1148366989407496e-05, |
| "loss": 0.0035, |
| "step": 14220 |
| }, |
| { |
| "epoch": 48.73287671232877, |
| "grad_norm": 0.04017401114106178, |
| "learning_rate": 2.1080885277725236e-05, |
| "loss": 0.0029, |
| "step": 14230 |
| }, |
| { |
| "epoch": 48.76712328767123, |
| "grad_norm": 0.05713287740945816, |
| "learning_rate": 2.1013482629798333e-05, |
| "loss": 0.0042, |
| "step": 14240 |
| }, |
| { |
| "epoch": 48.8013698630137, |
| "grad_norm": 0.04046434909105301, |
| "learning_rate": 2.094615922990309e-05, |
| "loss": 0.003, |
| "step": 14250 |
| }, |
| { |
| "epoch": 48.83561643835616, |
| "grad_norm": 0.03594409301877022, |
| "learning_rate": 2.0878915262099098e-05, |
| "loss": 0.0023, |
| "step": 14260 |
| }, |
| { |
| "epoch": 48.86986301369863, |
| "grad_norm": 0.038105227053165436, |
| "learning_rate": 2.0811750910228774e-05, |
| "loss": 0.0034, |
| "step": 14270 |
| }, |
| { |
| "epoch": 48.9041095890411, |
| "grad_norm": 0.06692781299352646, |
| "learning_rate": 2.0744666357916925e-05, |
| "loss": 0.0037, |
| "step": 14280 |
| }, |
| { |
| "epoch": 48.93835616438356, |
| "grad_norm": 0.04800930246710777, |
| "learning_rate": 2.067766178857013e-05, |
| "loss": 0.0032, |
| "step": 14290 |
| }, |
| { |
| "epoch": 48.97260273972603, |
| "grad_norm": 0.04606781154870987, |
| "learning_rate": 2.061073738537635e-05, |
| "loss": 0.0028, |
| "step": 14300 |
| }, |
| { |
| "epoch": 49.00684931506849, |
| "grad_norm": 0.03653561696410179, |
| "learning_rate": 2.0543893331304333e-05, |
| "loss": 0.003, |
| "step": 14310 |
| }, |
| { |
| "epoch": 49.04109589041096, |
| "grad_norm": 0.04066811501979828, |
| "learning_rate": 2.0477129809103147e-05, |
| "loss": 0.0036, |
| "step": 14320 |
| }, |
| { |
| "epoch": 49.07534246575342, |
| "grad_norm": 0.028658408671617508, |
| "learning_rate": 2.0410447001301753e-05, |
| "loss": 0.0026, |
| "step": 14330 |
| }, |
| { |
| "epoch": 49.10958904109589, |
| "grad_norm": 0.04720501974225044, |
| "learning_rate": 2.0343845090208368e-05, |
| "loss": 0.0029, |
| "step": 14340 |
| }, |
| { |
| "epoch": 49.14383561643836, |
| "grad_norm": 0.03682386875152588, |
| "learning_rate": 2.0277324257910106e-05, |
| "loss": 0.0033, |
| "step": 14350 |
| }, |
| { |
| "epoch": 49.178082191780824, |
| "grad_norm": 0.029543591663241386, |
| "learning_rate": 2.0210884686272368e-05, |
| "loss": 0.0031, |
| "step": 14360 |
| }, |
| { |
| "epoch": 49.21232876712329, |
| "grad_norm": 0.03623099625110626, |
| "learning_rate": 2.0144526556938387e-05, |
| "loss": 0.0028, |
| "step": 14370 |
| }, |
| { |
| "epoch": 49.24657534246575, |
| "grad_norm": 0.02651667222380638, |
| "learning_rate": 2.0078250051328784e-05, |
| "loss": 0.003, |
| "step": 14380 |
| }, |
| { |
| "epoch": 49.28082191780822, |
| "grad_norm": 0.031783703714609146, |
| "learning_rate": 2.0012055350640986e-05, |
| "loss": 0.0037, |
| "step": 14390 |
| }, |
| { |
| "epoch": 49.31506849315068, |
| "grad_norm": 0.03946804255247116, |
| "learning_rate": 1.9945942635848748e-05, |
| "loss": 0.0026, |
| "step": 14400 |
| }, |
| { |
| "epoch": 49.34931506849315, |
| "grad_norm": 0.028189141303300858, |
| "learning_rate": 1.9879912087701753e-05, |
| "loss": 0.0037, |
| "step": 14410 |
| }, |
| { |
| "epoch": 49.38356164383562, |
| "grad_norm": 0.037286076694726944, |
| "learning_rate": 1.981396388672496e-05, |
| "loss": 0.0028, |
| "step": 14420 |
| }, |
| { |
| "epoch": 49.417808219178085, |
| "grad_norm": 0.03162837401032448, |
| "learning_rate": 1.974809821321827e-05, |
| "loss": 0.0026, |
| "step": 14430 |
| }, |
| { |
| "epoch": 49.45205479452055, |
| "grad_norm": 0.05570969358086586, |
| "learning_rate": 1.9682315247255894e-05, |
| "loss": 0.0032, |
| "step": 14440 |
| }, |
| { |
| "epoch": 49.486301369863014, |
| "grad_norm": 0.0389660969376564, |
| "learning_rate": 1.9616615168685943e-05, |
| "loss": 0.0028, |
| "step": 14450 |
| }, |
| { |
| "epoch": 49.52054794520548, |
| "grad_norm": 0.03000059723854065, |
| "learning_rate": 1.9550998157129946e-05, |
| "loss": 0.0023, |
| "step": 14460 |
| }, |
| { |
| "epoch": 49.554794520547944, |
| "grad_norm": 0.03354468196630478, |
| "learning_rate": 1.9485464391982284e-05, |
| "loss": 0.0033, |
| "step": 14470 |
| }, |
| { |
| "epoch": 49.58904109589041, |
| "grad_norm": 0.037038616836071014, |
| "learning_rate": 1.942001405240979e-05, |
| "loss": 0.0039, |
| "step": 14480 |
| }, |
| { |
| "epoch": 49.62328767123287, |
| "grad_norm": 0.03484483063220978, |
| "learning_rate": 1.9354647317351188e-05, |
| "loss": 0.0029, |
| "step": 14490 |
| }, |
| { |
| "epoch": 49.657534246575345, |
| "grad_norm": 0.04799222946166992, |
| "learning_rate": 1.928936436551661e-05, |
| "loss": 0.0036, |
| "step": 14500 |
| }, |
| { |
| "epoch": 49.69178082191781, |
| "grad_norm": 0.0432819202542305, |
| "learning_rate": 1.9224165375387193e-05, |
| "loss": 0.0032, |
| "step": 14510 |
| }, |
| { |
| "epoch": 49.726027397260275, |
| "grad_norm": 0.039506372064352036, |
| "learning_rate": 1.9159050525214452e-05, |
| "loss": 0.0042, |
| "step": 14520 |
| }, |
| { |
| "epoch": 49.76027397260274, |
| "grad_norm": 0.03616689145565033, |
| "learning_rate": 1.909401999301993e-05, |
| "loss": 0.0022, |
| "step": 14530 |
| }, |
| { |
| "epoch": 49.794520547945204, |
| "grad_norm": 0.042690783739089966, |
| "learning_rate": 1.9029073956594606e-05, |
| "loss": 0.0033, |
| "step": 14540 |
| }, |
| { |
| "epoch": 49.82876712328767, |
| "grad_norm": 0.046874433755874634, |
| "learning_rate": 1.8964212593498442e-05, |
| "loss": 0.0032, |
| "step": 14550 |
| }, |
| { |
| "epoch": 49.863013698630134, |
| "grad_norm": 0.037254612892866135, |
| "learning_rate": 1.8899436081059975e-05, |
| "loss": 0.0028, |
| "step": 14560 |
| }, |
| { |
| "epoch": 49.897260273972606, |
| "grad_norm": 0.04147499427199364, |
| "learning_rate": 1.8834744596375666e-05, |
| "loss": 0.0038, |
| "step": 14570 |
| }, |
| { |
| "epoch": 49.93150684931507, |
| "grad_norm": 0.030911028385162354, |
| "learning_rate": 1.877013831630961e-05, |
| "loss": 0.0034, |
| "step": 14580 |
| }, |
| { |
| "epoch": 49.965753424657535, |
| "grad_norm": 0.026016168296337128, |
| "learning_rate": 1.8705617417492883e-05, |
| "loss": 0.0029, |
| "step": 14590 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 0.026868095621466637, |
| "learning_rate": 1.8641182076323148e-05, |
| "loss": 0.0028, |
| "step": 14600 |
| }, |
| { |
| "epoch": 50.034246575342465, |
| "grad_norm": 0.039373837411403656, |
| "learning_rate": 1.85768324689642e-05, |
| "loss": 0.0034, |
| "step": 14610 |
| }, |
| { |
| "epoch": 50.06849315068493, |
| "grad_norm": 0.047938112169504166, |
| "learning_rate": 1.851256877134538e-05, |
| "loss": 0.004, |
| "step": 14620 |
| }, |
| { |
| "epoch": 50.102739726027394, |
| "grad_norm": 0.029682587832212448, |
| "learning_rate": 1.8448391159161204e-05, |
| "loss": 0.0029, |
| "step": 14630 |
| }, |
| { |
| "epoch": 50.136986301369866, |
| "grad_norm": 0.035953033715486526, |
| "learning_rate": 1.838429980787081e-05, |
| "loss": 0.0028, |
| "step": 14640 |
| }, |
| { |
| "epoch": 50.17123287671233, |
| "grad_norm": 0.04639003053307533, |
| "learning_rate": 1.8320294892697478e-05, |
| "loss": 0.0038, |
| "step": 14650 |
| }, |
| { |
| "epoch": 50.205479452054796, |
| "grad_norm": 0.03600938618183136, |
| "learning_rate": 1.8256376588628238e-05, |
| "loss": 0.0029, |
| "step": 14660 |
| }, |
| { |
| "epoch": 50.23972602739726, |
| "grad_norm": 0.03340703994035721, |
| "learning_rate": 1.8192545070413282e-05, |
| "loss": 0.0029, |
| "step": 14670 |
| }, |
| { |
| "epoch": 50.273972602739725, |
| "grad_norm": 0.05106693133711815, |
| "learning_rate": 1.8128800512565513e-05, |
| "loss": 0.0036, |
| "step": 14680 |
| }, |
| { |
| "epoch": 50.30821917808219, |
| "grad_norm": 0.05128314346075058, |
| "learning_rate": 1.8065143089360172e-05, |
| "loss": 0.0029, |
| "step": 14690 |
| }, |
| { |
| "epoch": 50.342465753424655, |
| "grad_norm": 0.04804209992289543, |
| "learning_rate": 1.800157297483417e-05, |
| "loss": 0.0026, |
| "step": 14700 |
| }, |
| { |
| "epoch": 50.37671232876713, |
| "grad_norm": 0.040086787194013596, |
| "learning_rate": 1.7938090342785817e-05, |
| "loss": 0.0021, |
| "step": 14710 |
| }, |
| { |
| "epoch": 50.41095890410959, |
| "grad_norm": 0.0328865684568882, |
| "learning_rate": 1.787469536677419e-05, |
| "loss": 0.0035, |
| "step": 14720 |
| }, |
| { |
| "epoch": 50.445205479452056, |
| "grad_norm": 0.02870897389948368, |
| "learning_rate": 1.7811388220118707e-05, |
| "loss": 0.0033, |
| "step": 14730 |
| }, |
| { |
| "epoch": 50.47945205479452, |
| "grad_norm": 0.03273727372288704, |
| "learning_rate": 1.774816907589873e-05, |
| "loss": 0.0026, |
| "step": 14740 |
| }, |
| { |
| "epoch": 50.513698630136986, |
| "grad_norm": 0.03714567795395851, |
| "learning_rate": 1.768503810695295e-05, |
| "loss": 0.0027, |
| "step": 14750 |
| }, |
| { |
| "epoch": 50.54794520547945, |
| "grad_norm": 0.051201384514570236, |
| "learning_rate": 1.7621995485879062e-05, |
| "loss": 0.0036, |
| "step": 14760 |
| }, |
| { |
| "epoch": 50.582191780821915, |
| "grad_norm": 0.04567592591047287, |
| "learning_rate": 1.755904138503316e-05, |
| "loss": 0.0036, |
| "step": 14770 |
| }, |
| { |
| "epoch": 50.61643835616438, |
| "grad_norm": 0.04741727560758591, |
| "learning_rate": 1.749617597652934e-05, |
| "loss": 0.0032, |
| "step": 14780 |
| }, |
| { |
| "epoch": 50.65068493150685, |
| "grad_norm": 0.02346430905163288, |
| "learning_rate": 1.743339943223926e-05, |
| "loss": 0.0026, |
| "step": 14790 |
| }, |
| { |
| "epoch": 50.68493150684932, |
| "grad_norm": 0.033810559660196304, |
| "learning_rate": 1.7370711923791567e-05, |
| "loss": 0.0033, |
| "step": 14800 |
| }, |
| { |
| "epoch": 50.71917808219178, |
| "grad_norm": 0.04429788514971733, |
| "learning_rate": 1.7308113622571544e-05, |
| "loss": 0.0043, |
| "step": 14810 |
| }, |
| { |
| "epoch": 50.75342465753425, |
| "grad_norm": 0.034124165773391724, |
| "learning_rate": 1.7245604699720535e-05, |
| "loss": 0.0023, |
| "step": 14820 |
| }, |
| { |
| "epoch": 50.78767123287671, |
| "grad_norm": 0.04748954623937607, |
| "learning_rate": 1.7183185326135543e-05, |
| "loss": 0.0029, |
| "step": 14830 |
| }, |
| { |
| "epoch": 50.821917808219176, |
| "grad_norm": 0.02713291347026825, |
| "learning_rate": 1.712085567246878e-05, |
| "loss": 0.0023, |
| "step": 14840 |
| }, |
| { |
| "epoch": 50.85616438356164, |
| "grad_norm": 0.036193542182445526, |
| "learning_rate": 1.70586159091271e-05, |
| "loss": 0.003, |
| "step": 14850 |
| }, |
| { |
| "epoch": 50.89041095890411, |
| "grad_norm": 0.027815349400043488, |
| "learning_rate": 1.699646620627168e-05, |
| "loss": 0.003, |
| "step": 14860 |
| }, |
| { |
| "epoch": 50.92465753424658, |
| "grad_norm": 0.02768586575984955, |
| "learning_rate": 1.6934406733817414e-05, |
| "loss": 0.0039, |
| "step": 14870 |
| }, |
| { |
| "epoch": 50.95890410958904, |
| "grad_norm": 0.0356876440346241, |
| "learning_rate": 1.6872437661432517e-05, |
| "loss": 0.0039, |
| "step": 14880 |
| }, |
| { |
| "epoch": 50.99315068493151, |
| "grad_norm": 0.030868323519825935, |
| "learning_rate": 1.6810559158538092e-05, |
| "loss": 0.0036, |
| "step": 14890 |
| }, |
| { |
| "epoch": 51.02739726027397, |
| "grad_norm": 0.04372343793511391, |
| "learning_rate": 1.6748771394307585e-05, |
| "loss": 0.0041, |
| "step": 14900 |
| }, |
| { |
| "epoch": 51.06164383561644, |
| "grad_norm": 0.026267332956194878, |
| "learning_rate": 1.6687074537666398e-05, |
| "loss": 0.0023, |
| "step": 14910 |
| }, |
| { |
| "epoch": 51.0958904109589, |
| "grad_norm": 0.035959236323833466, |
| "learning_rate": 1.662546875729138e-05, |
| "loss": 0.0028, |
| "step": 14920 |
| }, |
| { |
| "epoch": 51.13013698630137, |
| "grad_norm": 0.03822488337755203, |
| "learning_rate": 1.6563954221610355e-05, |
| "loss": 0.0043, |
| "step": 14930 |
| }, |
| { |
| "epoch": 51.16438356164384, |
| "grad_norm": 0.03191686421632767, |
| "learning_rate": 1.6502531098801753e-05, |
| "loss": 0.0039, |
| "step": 14940 |
| }, |
| { |
| "epoch": 51.1986301369863, |
| "grad_norm": 0.046950504183769226, |
| "learning_rate": 1.6441199556794033e-05, |
| "loss": 0.0036, |
| "step": 14950 |
| }, |
| { |
| "epoch": 51.23287671232877, |
| "grad_norm": 0.03460918739438057, |
| "learning_rate": 1.637995976326527e-05, |
| "loss": 0.0039, |
| "step": 14960 |
| }, |
| { |
| "epoch": 51.26712328767123, |
| "grad_norm": 0.045809391885995865, |
| "learning_rate": 1.631881188564275e-05, |
| "loss": 0.0049, |
| "step": 14970 |
| }, |
| { |
| "epoch": 51.3013698630137, |
| "grad_norm": 0.04050698131322861, |
| "learning_rate": 1.62577560911024e-05, |
| "loss": 0.0028, |
| "step": 14980 |
| }, |
| { |
| "epoch": 51.33561643835616, |
| "grad_norm": 0.02764354832470417, |
| "learning_rate": 1.6196792546568472e-05, |
| "loss": 0.0021, |
| "step": 14990 |
| }, |
| { |
| "epoch": 51.36986301369863, |
| "grad_norm": 0.035305511206388474, |
| "learning_rate": 1.6135921418712956e-05, |
| "loss": 0.0028, |
| "step": 15000 |
| }, |
| { |
| "epoch": 51.4041095890411, |
| "grad_norm": 0.030398089438676834, |
| "learning_rate": 1.6075142873955164e-05, |
| "loss": 0.0039, |
| "step": 15010 |
| }, |
| { |
| "epoch": 51.43835616438356, |
| "grad_norm": 0.03482901677489281, |
| "learning_rate": 1.6014457078461353e-05, |
| "loss": 0.0026, |
| "step": 15020 |
| }, |
| { |
| "epoch": 51.47260273972603, |
| "grad_norm": 0.035669729113578796, |
| "learning_rate": 1.5953864198144135e-05, |
| "loss": 0.0042, |
| "step": 15030 |
| }, |
| { |
| "epoch": 51.50684931506849, |
| "grad_norm": 0.03193089738488197, |
| "learning_rate": 1.5893364398662176e-05, |
| "loss": 0.0024, |
| "step": 15040 |
| }, |
| { |
| "epoch": 51.54109589041096, |
| "grad_norm": 0.027457116171717644, |
| "learning_rate": 1.583295784541958e-05, |
| "loss": 0.0038, |
| "step": 15050 |
| }, |
| { |
| "epoch": 51.57534246575342, |
| "grad_norm": 0.037244558334350586, |
| "learning_rate": 1.5772644703565565e-05, |
| "loss": 0.0032, |
| "step": 15060 |
| }, |
| { |
| "epoch": 51.60958904109589, |
| "grad_norm": 0.026973290368914604, |
| "learning_rate": 1.5712425137993973e-05, |
| "loss": 0.0044, |
| "step": 15070 |
| }, |
| { |
| "epoch": 51.64383561643836, |
| "grad_norm": 0.027435757219791412, |
| "learning_rate": 1.5652299313342773e-05, |
| "loss": 0.0032, |
| "step": 15080 |
| }, |
| { |
| "epoch": 51.678082191780824, |
| "grad_norm": 0.033582866191864014, |
| "learning_rate": 1.5592267393993716e-05, |
| "loss": 0.0035, |
| "step": 15090 |
| }, |
| { |
| "epoch": 51.71232876712329, |
| "grad_norm": 0.029963036999106407, |
| "learning_rate": 1.553232954407171e-05, |
| "loss": 0.0036, |
| "step": 15100 |
| }, |
| { |
| "epoch": 51.74657534246575, |
| "grad_norm": 0.029700949788093567, |
| "learning_rate": 1.5472485927444597e-05, |
| "loss": 0.0045, |
| "step": 15110 |
| }, |
| { |
| "epoch": 51.78082191780822, |
| "grad_norm": 0.021908050402998924, |
| "learning_rate": 1.5412736707722537e-05, |
| "loss": 0.0026, |
| "step": 15120 |
| }, |
| { |
| "epoch": 51.81506849315068, |
| "grad_norm": 0.04361306503415108, |
| "learning_rate": 1.5353082048257596e-05, |
| "loss": 0.0045, |
| "step": 15130 |
| }, |
| { |
| "epoch": 51.84931506849315, |
| "grad_norm": 0.047619931399822235, |
| "learning_rate": 1.5293522112143373e-05, |
| "loss": 0.0045, |
| "step": 15140 |
| }, |
| { |
| "epoch": 51.88356164383562, |
| "grad_norm": 0.04392608627676964, |
| "learning_rate": 1.5234057062214402e-05, |
| "loss": 0.0031, |
| "step": 15150 |
| }, |
| { |
| "epoch": 51.917808219178085, |
| "grad_norm": 0.03222033753991127, |
| "learning_rate": 1.517468706104589e-05, |
| "loss": 0.0032, |
| "step": 15160 |
| }, |
| { |
| "epoch": 51.95205479452055, |
| "grad_norm": 0.04997098818421364, |
| "learning_rate": 1.5115412270953167e-05, |
| "loss": 0.0036, |
| "step": 15170 |
| }, |
| { |
| "epoch": 51.986301369863014, |
| "grad_norm": 0.04557951167225838, |
| "learning_rate": 1.5056232853991209e-05, |
| "loss": 0.0036, |
| "step": 15180 |
| }, |
| { |
| "epoch": 52.02054794520548, |
| "grad_norm": 0.026306597515940666, |
| "learning_rate": 1.4997148971954344e-05, |
| "loss": 0.0033, |
| "step": 15190 |
| }, |
| { |
| "epoch": 52.054794520547944, |
| "grad_norm": 0.034577708691358566, |
| "learning_rate": 1.4938160786375572e-05, |
| "loss": 0.0026, |
| "step": 15200 |
| }, |
| { |
| "epoch": 52.08904109589041, |
| "grad_norm": 0.030455907806754112, |
| "learning_rate": 1.4879268458526379e-05, |
| "loss": 0.0029, |
| "step": 15210 |
| }, |
| { |
| "epoch": 52.12328767123287, |
| "grad_norm": 0.021716345101594925, |
| "learning_rate": 1.4820472149416154e-05, |
| "loss": 0.0017, |
| "step": 15220 |
| }, |
| { |
| "epoch": 52.157534246575345, |
| "grad_norm": 0.031647682189941406, |
| "learning_rate": 1.4761772019791748e-05, |
| "loss": 0.0024, |
| "step": 15230 |
| }, |
| { |
| "epoch": 52.19178082191781, |
| "grad_norm": 0.026279931887984276, |
| "learning_rate": 1.470316823013707e-05, |
| "loss": 0.0025, |
| "step": 15240 |
| }, |
| { |
| "epoch": 52.226027397260275, |
| "grad_norm": 0.03701292723417282, |
| "learning_rate": 1.4644660940672627e-05, |
| "loss": 0.0032, |
| "step": 15250 |
| }, |
| { |
| "epoch": 52.26027397260274, |
| "grad_norm": 0.03941259905695915, |
| "learning_rate": 1.4586250311355132e-05, |
| "loss": 0.0028, |
| "step": 15260 |
| }, |
| { |
| "epoch": 52.294520547945204, |
| "grad_norm": 0.037332188338041306, |
| "learning_rate": 1.4527936501877032e-05, |
| "loss": 0.0033, |
| "step": 15270 |
| }, |
| { |
| "epoch": 52.32876712328767, |
| "grad_norm": 0.027855148538947105, |
| "learning_rate": 1.4469719671666043e-05, |
| "loss": 0.0033, |
| "step": 15280 |
| }, |
| { |
| "epoch": 52.363013698630134, |
| "grad_norm": 0.030381757766008377, |
| "learning_rate": 1.4411599979884744e-05, |
| "loss": 0.0047, |
| "step": 15290 |
| }, |
| { |
| "epoch": 52.397260273972606, |
| "grad_norm": 0.030294453725218773, |
| "learning_rate": 1.435357758543015e-05, |
| "loss": 0.0024, |
| "step": 15300 |
| }, |
| { |
| "epoch": 52.43150684931507, |
| "grad_norm": 0.03383754566311836, |
| "learning_rate": 1.4295652646933277e-05, |
| "loss": 0.003, |
| "step": 15310 |
| }, |
| { |
| "epoch": 52.465753424657535, |
| "grad_norm": 0.03104538284242153, |
| "learning_rate": 1.4237825322758736e-05, |
| "loss": 0.0036, |
| "step": 15320 |
| }, |
| { |
| "epoch": 52.5, |
| "grad_norm": 0.034425560384988785, |
| "learning_rate": 1.4180095771004154e-05, |
| "loss": 0.0032, |
| "step": 15330 |
| }, |
| { |
| "epoch": 52.534246575342465, |
| "grad_norm": 0.028029220178723335, |
| "learning_rate": 1.412246414949997e-05, |
| "loss": 0.0024, |
| "step": 15340 |
| }, |
| { |
| "epoch": 52.56849315068493, |
| "grad_norm": 0.03750409930944443, |
| "learning_rate": 1.4064930615808808e-05, |
| "loss": 0.0032, |
| "step": 15350 |
| }, |
| { |
| "epoch": 52.602739726027394, |
| "grad_norm": 0.02901621349155903, |
| "learning_rate": 1.4007495327225162e-05, |
| "loss": 0.0032, |
| "step": 15360 |
| }, |
| { |
| "epoch": 52.636986301369866, |
| "grad_norm": 0.03773738816380501, |
| "learning_rate": 1.3950158440774957e-05, |
| "loss": 0.0032, |
| "step": 15370 |
| }, |
| { |
| "epoch": 52.67123287671233, |
| "grad_norm": 0.035674892365932465, |
| "learning_rate": 1.389292011321498e-05, |
| "loss": 0.0027, |
| "step": 15380 |
| }, |
| { |
| "epoch": 52.705479452054796, |
| "grad_norm": 0.03301068767905235, |
| "learning_rate": 1.383578050103268e-05, |
| "loss": 0.0037, |
| "step": 15390 |
| }, |
| { |
| "epoch": 52.73972602739726, |
| "grad_norm": 0.04170006886124611, |
| "learning_rate": 1.3778739760445552e-05, |
| "loss": 0.0029, |
| "step": 15400 |
| }, |
| { |
| "epoch": 52.773972602739725, |
| "grad_norm": 0.04138687998056412, |
| "learning_rate": 1.3721798047400813e-05, |
| "loss": 0.0037, |
| "step": 15410 |
| }, |
| { |
| "epoch": 52.80821917808219, |
| "grad_norm": 0.031535543501377106, |
| "learning_rate": 1.3664955517574968e-05, |
| "loss": 0.0032, |
| "step": 15420 |
| }, |
| { |
| "epoch": 52.842465753424655, |
| "grad_norm": 0.04763416200876236, |
| "learning_rate": 1.3608212326373249e-05, |
| "loss": 0.0031, |
| "step": 15430 |
| }, |
| { |
| "epoch": 52.87671232876713, |
| "grad_norm": 0.05117536336183548, |
| "learning_rate": 1.3551568628929434e-05, |
| "loss": 0.0045, |
| "step": 15440 |
| }, |
| { |
| "epoch": 52.91095890410959, |
| "grad_norm": 0.040727127343416214, |
| "learning_rate": 1.3495024580105192e-05, |
| "loss": 0.0026, |
| "step": 15450 |
| }, |
| { |
| "epoch": 52.945205479452056, |
| "grad_norm": 0.03748713806271553, |
| "learning_rate": 1.343858033448982e-05, |
| "loss": 0.0029, |
| "step": 15460 |
| }, |
| { |
| "epoch": 52.97945205479452, |
| "grad_norm": 0.026867395266890526, |
| "learning_rate": 1.3382236046399722e-05, |
| "loss": 0.0024, |
| "step": 15470 |
| }, |
| { |
| "epoch": 53.013698630136986, |
| "grad_norm": 0.03179669752717018, |
| "learning_rate": 1.3325991869878013e-05, |
| "loss": 0.0029, |
| "step": 15480 |
| }, |
| { |
| "epoch": 53.04794520547945, |
| "grad_norm": 0.0422259159386158, |
| "learning_rate": 1.3269847958694148e-05, |
| "loss": 0.0026, |
| "step": 15490 |
| }, |
| { |
| "epoch": 53.082191780821915, |
| "grad_norm": 0.030288219451904297, |
| "learning_rate": 1.3213804466343421e-05, |
| "loss": 0.0033, |
| "step": 15500 |
| }, |
| { |
| "epoch": 53.11643835616438, |
| "grad_norm": 0.03154587373137474, |
| "learning_rate": 1.3157861546046613e-05, |
| "loss": 0.0032, |
| "step": 15510 |
| }, |
| { |
| "epoch": 53.15068493150685, |
| "grad_norm": 0.0416083000600338, |
| "learning_rate": 1.3102019350749528e-05, |
| "loss": 0.0038, |
| "step": 15520 |
| }, |
| { |
| "epoch": 53.18493150684932, |
| "grad_norm": 0.030634721741080284, |
| "learning_rate": 1.3046278033122577e-05, |
| "loss": 0.0027, |
| "step": 15530 |
| }, |
| { |
| "epoch": 53.21917808219178, |
| "grad_norm": 0.03578125312924385, |
| "learning_rate": 1.299063774556042e-05, |
| "loss": 0.0035, |
| "step": 15540 |
| }, |
| { |
| "epoch": 53.25342465753425, |
| "grad_norm": 0.02459009923040867, |
| "learning_rate": 1.293509864018146e-05, |
| "loss": 0.0029, |
| "step": 15550 |
| }, |
| { |
| "epoch": 53.28767123287671, |
| "grad_norm": 0.034552790224552155, |
| "learning_rate": 1.2879660868827508e-05, |
| "loss": 0.0035, |
| "step": 15560 |
| }, |
| { |
| "epoch": 53.321917808219176, |
| "grad_norm": 0.027246346697211266, |
| "learning_rate": 1.2824324583063302e-05, |
| "loss": 0.0026, |
| "step": 15570 |
| }, |
| { |
| "epoch": 53.35616438356164, |
| "grad_norm": 0.03363962098956108, |
| "learning_rate": 1.2769089934176126e-05, |
| "loss": 0.0032, |
| "step": 15580 |
| }, |
| { |
| "epoch": 53.39041095890411, |
| "grad_norm": 0.03963744267821312, |
| "learning_rate": 1.2713957073175425e-05, |
| "loss": 0.0028, |
| "step": 15590 |
| }, |
| { |
| "epoch": 53.42465753424658, |
| "grad_norm": 0.02972957119345665, |
| "learning_rate": 1.2658926150792322e-05, |
| "loss": 0.0024, |
| "step": 15600 |
| }, |
| { |
| "epoch": 53.45890410958904, |
| "grad_norm": 0.04657153785228729, |
| "learning_rate": 1.2603997317479238e-05, |
| "loss": 0.0037, |
| "step": 15610 |
| }, |
| { |
| "epoch": 53.49315068493151, |
| "grad_norm": 0.022585947066545486, |
| "learning_rate": 1.2549170723409549e-05, |
| "loss": 0.0038, |
| "step": 15620 |
| }, |
| { |
| "epoch": 53.52739726027397, |
| "grad_norm": 0.027521610260009766, |
| "learning_rate": 1.2494446518477022e-05, |
| "loss": 0.0028, |
| "step": 15630 |
| }, |
| { |
| "epoch": 53.56164383561644, |
| "grad_norm": 0.02755691297352314, |
| "learning_rate": 1.243982485229559e-05, |
| "loss": 0.0044, |
| "step": 15640 |
| }, |
| { |
| "epoch": 53.5958904109589, |
| "grad_norm": 0.027248825877904892, |
| "learning_rate": 1.2385305874198776e-05, |
| "loss": 0.0041, |
| "step": 15650 |
| }, |
| { |
| "epoch": 53.63013698630137, |
| "grad_norm": 0.026916412636637688, |
| "learning_rate": 1.233088973323937e-05, |
| "loss": 0.0023, |
| "step": 15660 |
| }, |
| { |
| "epoch": 53.66438356164384, |
| "grad_norm": 0.026474550366401672, |
| "learning_rate": 1.2276576578189064e-05, |
| "loss": 0.003, |
| "step": 15670 |
| }, |
| { |
| "epoch": 53.6986301369863, |
| "grad_norm": 0.0351148284971714, |
| "learning_rate": 1.2222366557537911e-05, |
| "loss": 0.0034, |
| "step": 15680 |
| }, |
| { |
| "epoch": 53.73287671232877, |
| "grad_norm": 0.03675288334488869, |
| "learning_rate": 1.2168259819494066e-05, |
| "loss": 0.0033, |
| "step": 15690 |
| }, |
| { |
| "epoch": 53.76712328767123, |
| "grad_norm": 0.027912992984056473, |
| "learning_rate": 1.2114256511983274e-05, |
| "loss": 0.0037, |
| "step": 15700 |
| }, |
| { |
| "epoch": 53.8013698630137, |
| "grad_norm": 0.026471871882677078, |
| "learning_rate": 1.2060356782648503e-05, |
| "loss": 0.0042, |
| "step": 15710 |
| }, |
| { |
| "epoch": 53.83561643835616, |
| "grad_norm": 0.0362430065870285, |
| "learning_rate": 1.2006560778849578e-05, |
| "loss": 0.0029, |
| "step": 15720 |
| }, |
| { |
| "epoch": 53.86986301369863, |
| "grad_norm": 0.03203180804848671, |
| "learning_rate": 1.1952868647662696e-05, |
| "loss": 0.0029, |
| "step": 15730 |
| }, |
| { |
| "epoch": 53.9041095890411, |
| "grad_norm": 0.04257775843143463, |
| "learning_rate": 1.1899280535880119e-05, |
| "loss": 0.0028, |
| "step": 15740 |
| }, |
| { |
| "epoch": 53.93835616438356, |
| "grad_norm": 0.03652791678905487, |
| "learning_rate": 1.1845796590009683e-05, |
| "loss": 0.0032, |
| "step": 15750 |
| }, |
| { |
| "epoch": 53.97260273972603, |
| "grad_norm": 0.02293156087398529, |
| "learning_rate": 1.1792416956274444e-05, |
| "loss": 0.0024, |
| "step": 15760 |
| }, |
| { |
| "epoch": 54.00684931506849, |
| "grad_norm": 0.038182858377695084, |
| "learning_rate": 1.1739141780612306e-05, |
| "loss": 0.0032, |
| "step": 15770 |
| }, |
| { |
| "epoch": 54.04109589041096, |
| "grad_norm": 0.04348289966583252, |
| "learning_rate": 1.1685971208675539e-05, |
| "loss": 0.004, |
| "step": 15780 |
| }, |
| { |
| "epoch": 54.07534246575342, |
| "grad_norm": 0.03379713371396065, |
| "learning_rate": 1.1632905385830484e-05, |
| "loss": 0.0021, |
| "step": 15790 |
| }, |
| { |
| "epoch": 54.10958904109589, |
| "grad_norm": 0.03406383469700813, |
| "learning_rate": 1.157994445715706e-05, |
| "loss": 0.0028, |
| "step": 15800 |
| }, |
| { |
| "epoch": 54.14383561643836, |
| "grad_norm": 0.047310084104537964, |
| "learning_rate": 1.1527088567448407e-05, |
| "loss": 0.0038, |
| "step": 15810 |
| }, |
| { |
| "epoch": 54.178082191780824, |
| "grad_norm": 0.04336090013384819, |
| "learning_rate": 1.1474337861210543e-05, |
| "loss": 0.003, |
| "step": 15820 |
| }, |
| { |
| "epoch": 54.21232876712329, |
| "grad_norm": 0.040994029492139816, |
| "learning_rate": 1.1421692482661856e-05, |
| "loss": 0.0037, |
| "step": 15830 |
| }, |
| { |
| "epoch": 54.24657534246575, |
| "grad_norm": 0.037672173231840134, |
| "learning_rate": 1.1369152575732822e-05, |
| "loss": 0.004, |
| "step": 15840 |
| }, |
| { |
| "epoch": 54.28082191780822, |
| "grad_norm": 0.03158077225089073, |
| "learning_rate": 1.1316718284065537e-05, |
| "loss": 0.0027, |
| "step": 15850 |
| }, |
| { |
| "epoch": 54.31506849315068, |
| "grad_norm": 0.03162799030542374, |
| "learning_rate": 1.1264389751013326e-05, |
| "loss": 0.0023, |
| "step": 15860 |
| }, |
| { |
| "epoch": 54.34931506849315, |
| "grad_norm": 0.032345883548259735, |
| "learning_rate": 1.1212167119640438e-05, |
| "loss": 0.0028, |
| "step": 15870 |
| }, |
| { |
| "epoch": 54.38356164383562, |
| "grad_norm": 0.027058375999331474, |
| "learning_rate": 1.1160050532721528e-05, |
| "loss": 0.0033, |
| "step": 15880 |
| }, |
| { |
| "epoch": 54.417808219178085, |
| "grad_norm": 0.03421015664935112, |
| "learning_rate": 1.1108040132741354e-05, |
| "loss": 0.003, |
| "step": 15890 |
| }, |
| { |
| "epoch": 54.45205479452055, |
| "grad_norm": 0.04903862252831459, |
| "learning_rate": 1.1056136061894384e-05, |
| "loss": 0.003, |
| "step": 15900 |
| }, |
| { |
| "epoch": 54.486301369863014, |
| "grad_norm": 0.033128440380096436, |
| "learning_rate": 1.100433846208434e-05, |
| "loss": 0.0027, |
| "step": 15910 |
| }, |
| { |
| "epoch": 54.52054794520548, |
| "grad_norm": 0.02966536581516266, |
| "learning_rate": 1.095264747492391e-05, |
| "loss": 0.0029, |
| "step": 15920 |
| }, |
| { |
| "epoch": 54.554794520547944, |
| "grad_norm": 0.02673506550490856, |
| "learning_rate": 1.090106324173426e-05, |
| "loss": 0.0027, |
| "step": 15930 |
| }, |
| { |
| "epoch": 54.58904109589041, |
| "grad_norm": 0.02429800108075142, |
| "learning_rate": 1.0849585903544706e-05, |
| "loss": 0.0024, |
| "step": 15940 |
| }, |
| { |
| "epoch": 54.62328767123287, |
| "grad_norm": 0.02877028100192547, |
| "learning_rate": 1.0798215601092354e-05, |
| "loss": 0.002, |
| "step": 15950 |
| }, |
| { |
| "epoch": 54.657534246575345, |
| "grad_norm": 0.020034709945321083, |
| "learning_rate": 1.0746952474821614e-05, |
| "loss": 0.0028, |
| "step": 15960 |
| }, |
| { |
| "epoch": 54.69178082191781, |
| "grad_norm": 0.03347136452794075, |
| "learning_rate": 1.069579666488395e-05, |
| "loss": 0.0037, |
| "step": 15970 |
| }, |
| { |
| "epoch": 54.726027397260275, |
| "grad_norm": 0.04249145835638046, |
| "learning_rate": 1.0644748311137376e-05, |
| "loss": 0.004, |
| "step": 15980 |
| }, |
| { |
| "epoch": 54.76027397260274, |
| "grad_norm": 0.03087807074189186, |
| "learning_rate": 1.059380755314613e-05, |
| "loss": 0.0031, |
| "step": 15990 |
| }, |
| { |
| "epoch": 54.794520547945204, |
| "grad_norm": 0.054326847195625305, |
| "learning_rate": 1.0542974530180327e-05, |
| "loss": 0.0028, |
| "step": 16000 |
| }, |
| { |
| "epoch": 54.82876712328767, |
| "grad_norm": 0.027016691863536835, |
| "learning_rate": 1.049224938121548e-05, |
| "loss": 0.0036, |
| "step": 16010 |
| }, |
| { |
| "epoch": 54.863013698630134, |
| "grad_norm": 0.025528740137815475, |
| "learning_rate": 1.0441632244932237e-05, |
| "loss": 0.0022, |
| "step": 16020 |
| }, |
| { |
| "epoch": 54.897260273972606, |
| "grad_norm": 0.023421315476298332, |
| "learning_rate": 1.0391123259715906e-05, |
| "loss": 0.0025, |
| "step": 16030 |
| }, |
| { |
| "epoch": 54.93150684931507, |
| "grad_norm": 0.03620325028896332, |
| "learning_rate": 1.0340722563656107e-05, |
| "loss": 0.0026, |
| "step": 16040 |
| }, |
| { |
| "epoch": 54.965753424657535, |
| "grad_norm": 0.036633092910051346, |
| "learning_rate": 1.0290430294546449e-05, |
| "loss": 0.0027, |
| "step": 16050 |
| }, |
| { |
| "epoch": 55.0, |
| "grad_norm": 0.04909895360469818, |
| "learning_rate": 1.0240246589884044e-05, |
| "loss": 0.0029, |
| "step": 16060 |
| }, |
| { |
| "epoch": 55.034246575342465, |
| "grad_norm": 0.02480817213654518, |
| "learning_rate": 1.0190171586869258e-05, |
| "loss": 0.0031, |
| "step": 16070 |
| }, |
| { |
| "epoch": 55.06849315068493, |
| "grad_norm": 0.03510754182934761, |
| "learning_rate": 1.0140205422405214e-05, |
| "loss": 0.0036, |
| "step": 16080 |
| }, |
| { |
| "epoch": 55.102739726027394, |
| "grad_norm": 0.02947348728775978, |
| "learning_rate": 1.009034823309749e-05, |
| "loss": 0.0026, |
| "step": 16090 |
| }, |
| { |
| "epoch": 55.136986301369866, |
| "grad_norm": 0.023402495309710503, |
| "learning_rate": 1.0040600155253765e-05, |
| "loss": 0.002, |
| "step": 16100 |
| }, |
| { |
| "epoch": 55.17123287671233, |
| "grad_norm": 0.04765753448009491, |
| "learning_rate": 9.990961324883358e-06, |
| "loss": 0.0046, |
| "step": 16110 |
| }, |
| { |
| "epoch": 55.205479452054796, |
| "grad_norm": 0.034412067383527756, |
| "learning_rate": 9.941431877696955e-06, |
| "loss": 0.0031, |
| "step": 16120 |
| }, |
| { |
| "epoch": 55.23972602739726, |
| "grad_norm": 0.0393243208527565, |
| "learning_rate": 9.892011949106172e-06, |
| "loss": 0.0033, |
| "step": 16130 |
| }, |
| { |
| "epoch": 55.273972602739725, |
| "grad_norm": 0.0417923778295517, |
| "learning_rate": 9.842701674223187e-06, |
| "loss": 0.0036, |
| "step": 16140 |
| }, |
| { |
| "epoch": 55.30821917808219, |
| "grad_norm": 0.03421523794531822, |
| "learning_rate": 9.793501187860432e-06, |
| "loss": 0.0032, |
| "step": 16150 |
| }, |
| { |
| "epoch": 55.342465753424655, |
| "grad_norm": 0.02373417280614376, |
| "learning_rate": 9.744410624530148e-06, |
| "loss": 0.002, |
| "step": 16160 |
| }, |
| { |
| "epoch": 55.37671232876713, |
| "grad_norm": 0.03909434750676155, |
| "learning_rate": 9.695430118444048e-06, |
| "loss": 0.0028, |
| "step": 16170 |
| }, |
| { |
| "epoch": 55.41095890410959, |
| "grad_norm": 0.031819798052310944, |
| "learning_rate": 9.646559803512994e-06, |
| "loss": 0.0029, |
| "step": 16180 |
| }, |
| { |
| "epoch": 55.445205479452056, |
| "grad_norm": 0.024081602692604065, |
| "learning_rate": 9.597799813346525e-06, |
| "loss": 0.0026, |
| "step": 16190 |
| }, |
| { |
| "epoch": 55.47945205479452, |
| "grad_norm": 0.02767317369580269, |
| "learning_rate": 9.549150281252633e-06, |
| "loss": 0.0042, |
| "step": 16200 |
| }, |
| { |
| "epoch": 55.513698630136986, |
| "grad_norm": 0.03075326606631279, |
| "learning_rate": 9.500611340237258e-06, |
| "loss": 0.0044, |
| "step": 16210 |
| }, |
| { |
| "epoch": 55.54794520547945, |
| "grad_norm": 0.022155923768877983, |
| "learning_rate": 9.452183123004e-06, |
| "loss": 0.0025, |
| "step": 16220 |
| }, |
| { |
| "epoch": 55.582191780821915, |
| "grad_norm": 0.027297066524624825, |
| "learning_rate": 9.403865761953779e-06, |
| "loss": 0.0033, |
| "step": 16230 |
| }, |
| { |
| "epoch": 55.61643835616438, |
| "grad_norm": 0.04104280099272728, |
| "learning_rate": 9.355659389184396e-06, |
| "loss": 0.0037, |
| "step": 16240 |
| }, |
| { |
| "epoch": 55.65068493150685, |
| "grad_norm": 0.02392885647714138, |
| "learning_rate": 9.307564136490254e-06, |
| "loss": 0.0023, |
| "step": 16250 |
| }, |
| { |
| "epoch": 55.68493150684932, |
| "grad_norm": 0.040937427431344986, |
| "learning_rate": 9.259580135361929e-06, |
| "loss": 0.003, |
| "step": 16260 |
| }, |
| { |
| "epoch": 55.71917808219178, |
| "grad_norm": 0.025108788162469864, |
| "learning_rate": 9.211707516985829e-06, |
| "loss": 0.0043, |
| "step": 16270 |
| }, |
| { |
| "epoch": 55.75342465753425, |
| "grad_norm": 0.03981754183769226, |
| "learning_rate": 9.163946412243896e-06, |
| "loss": 0.0024, |
| "step": 16280 |
| }, |
| { |
| "epoch": 55.78767123287671, |
| "grad_norm": 0.027174200862646103, |
| "learning_rate": 9.116296951713133e-06, |
| "loss": 0.0035, |
| "step": 16290 |
| }, |
| { |
| "epoch": 55.821917808219176, |
| "grad_norm": 0.03212342411279678, |
| "learning_rate": 9.068759265665384e-06, |
| "loss": 0.0048, |
| "step": 16300 |
| }, |
| { |
| "epoch": 55.85616438356164, |
| "grad_norm": 0.022607458755373955, |
| "learning_rate": 9.02133348406684e-06, |
| "loss": 0.0017, |
| "step": 16310 |
| }, |
| { |
| "epoch": 55.89041095890411, |
| "grad_norm": 0.0344407893717289, |
| "learning_rate": 8.974019736577777e-06, |
| "loss": 0.003, |
| "step": 16320 |
| }, |
| { |
| "epoch": 55.92465753424658, |
| "grad_norm": 0.03668694943189621, |
| "learning_rate": 8.92681815255219e-06, |
| "loss": 0.0025, |
| "step": 16330 |
| }, |
| { |
| "epoch": 55.95890410958904, |
| "grad_norm": 0.03393989056348801, |
| "learning_rate": 8.879728861037384e-06, |
| "loss": 0.004, |
| "step": 16340 |
| }, |
| { |
| "epoch": 55.99315068493151, |
| "grad_norm": 0.03064884804189205, |
| "learning_rate": 8.832751990773714e-06, |
| "loss": 0.0029, |
| "step": 16350 |
| }, |
| { |
| "epoch": 56.02739726027397, |
| "grad_norm": 0.024468744173645973, |
| "learning_rate": 8.785887670194138e-06, |
| "loss": 0.002, |
| "step": 16360 |
| }, |
| { |
| "epoch": 56.06164383561644, |
| "grad_norm": 0.02453785017132759, |
| "learning_rate": 8.739136027423894e-06, |
| "loss": 0.0029, |
| "step": 16370 |
| }, |
| { |
| "epoch": 56.0958904109589, |
| "grad_norm": 0.019738109782338142, |
| "learning_rate": 8.692497190280224e-06, |
| "loss": 0.0029, |
| "step": 16380 |
| }, |
| { |
| "epoch": 56.13013698630137, |
| "grad_norm": 0.02169376239180565, |
| "learning_rate": 8.645971286271904e-06, |
| "loss": 0.0036, |
| "step": 16390 |
| }, |
| { |
| "epoch": 56.16438356164384, |
| "grad_norm": 0.02179548889398575, |
| "learning_rate": 8.599558442598998e-06, |
| "loss": 0.0023, |
| "step": 16400 |
| }, |
| { |
| "epoch": 56.1986301369863, |
| "grad_norm": 0.025071581825613976, |
| "learning_rate": 8.55325878615244e-06, |
| "loss": 0.0022, |
| "step": 16410 |
| }, |
| { |
| "epoch": 56.23287671232877, |
| "grad_norm": 0.027042483910918236, |
| "learning_rate": 8.507072443513702e-06, |
| "loss": 0.0035, |
| "step": 16420 |
| }, |
| { |
| "epoch": 56.26712328767123, |
| "grad_norm": 0.02304648794233799, |
| "learning_rate": 8.460999540954517e-06, |
| "loss": 0.0024, |
| "step": 16430 |
| }, |
| { |
| "epoch": 56.3013698630137, |
| "grad_norm": 0.02609025314450264, |
| "learning_rate": 8.415040204436426e-06, |
| "loss": 0.0034, |
| "step": 16440 |
| }, |
| { |
| "epoch": 56.33561643835616, |
| "grad_norm": 0.029359575361013412, |
| "learning_rate": 8.369194559610482e-06, |
| "loss": 0.0022, |
| "step": 16450 |
| }, |
| { |
| "epoch": 56.36986301369863, |
| "grad_norm": 0.031987905502319336, |
| "learning_rate": 8.323462731816961e-06, |
| "loss": 0.0022, |
| "step": 16460 |
| }, |
| { |
| "epoch": 56.4041095890411, |
| "grad_norm": 0.04589890316128731, |
| "learning_rate": 8.277844846084898e-06, |
| "loss": 0.0036, |
| "step": 16470 |
| }, |
| { |
| "epoch": 56.43835616438356, |
| "grad_norm": 0.019137799739837646, |
| "learning_rate": 8.232341027131885e-06, |
| "loss": 0.0029, |
| "step": 16480 |
| }, |
| { |
| "epoch": 56.47260273972603, |
| "grad_norm": 0.02907939814031124, |
| "learning_rate": 8.186951399363613e-06, |
| "loss": 0.0024, |
| "step": 16490 |
| }, |
| { |
| "epoch": 56.50684931506849, |
| "grad_norm": 0.030412210151553154, |
| "learning_rate": 8.141676086873572e-06, |
| "loss": 0.0033, |
| "step": 16500 |
| }, |
| { |
| "epoch": 56.54109589041096, |
| "grad_norm": 0.03413422778248787, |
| "learning_rate": 8.096515213442762e-06, |
| "loss": 0.0026, |
| "step": 16510 |
| }, |
| { |
| "epoch": 56.57534246575342, |
| "grad_norm": 0.01989554800093174, |
| "learning_rate": 8.051468902539272e-06, |
| "loss": 0.0042, |
| "step": 16520 |
| }, |
| { |
| "epoch": 56.60958904109589, |
| "grad_norm": 0.021041251718997955, |
| "learning_rate": 8.00653727731801e-06, |
| "loss": 0.0032, |
| "step": 16530 |
| }, |
| { |
| "epoch": 56.64383561643836, |
| "grad_norm": 0.036524537950754166, |
| "learning_rate": 7.96172046062032e-06, |
| "loss": 0.0034, |
| "step": 16540 |
| }, |
| { |
| "epoch": 56.678082191780824, |
| "grad_norm": 0.03467152640223503, |
| "learning_rate": 7.917018574973645e-06, |
| "loss": 0.0038, |
| "step": 16550 |
| }, |
| { |
| "epoch": 56.71232876712329, |
| "grad_norm": 0.03052785061299801, |
| "learning_rate": 7.872431742591268e-06, |
| "loss": 0.0025, |
| "step": 16560 |
| }, |
| { |
| "epoch": 56.74657534246575, |
| "grad_norm": 0.04574688896536827, |
| "learning_rate": 7.827960085371855e-06, |
| "loss": 0.0036, |
| "step": 16570 |
| }, |
| { |
| "epoch": 56.78082191780822, |
| "grad_norm": 0.04330058395862579, |
| "learning_rate": 7.783603724899257e-06, |
| "loss": 0.0031, |
| "step": 16580 |
| }, |
| { |
| "epoch": 56.81506849315068, |
| "grad_norm": 0.024963831529021263, |
| "learning_rate": 7.739362782442021e-06, |
| "loss": 0.004, |
| "step": 16590 |
| }, |
| { |
| "epoch": 56.84931506849315, |
| "grad_norm": 0.02104303240776062, |
| "learning_rate": 7.695237378953223e-06, |
| "loss": 0.0022, |
| "step": 16600 |
| }, |
| { |
| "epoch": 56.88356164383562, |
| "grad_norm": 0.04167972132563591, |
| "learning_rate": 7.651227635070041e-06, |
| "loss": 0.0036, |
| "step": 16610 |
| }, |
| { |
| "epoch": 56.917808219178085, |
| "grad_norm": 0.03367381915450096, |
| "learning_rate": 7.607333671113409e-06, |
| "loss": 0.0029, |
| "step": 16620 |
| }, |
| { |
| "epoch": 56.95205479452055, |
| "grad_norm": 0.03414791822433472, |
| "learning_rate": 7.56355560708778e-06, |
| "loss": 0.0039, |
| "step": 16630 |
| }, |
| { |
| "epoch": 56.986301369863014, |
| "grad_norm": 0.027016418054699898, |
| "learning_rate": 7.519893562680663e-06, |
| "loss": 0.0034, |
| "step": 16640 |
| }, |
| { |
| "epoch": 57.02054794520548, |
| "grad_norm": 0.024846762418746948, |
| "learning_rate": 7.476347657262456e-06, |
| "loss": 0.0027, |
| "step": 16650 |
| }, |
| { |
| "epoch": 57.054794520547944, |
| "grad_norm": 0.018531423062086105, |
| "learning_rate": 7.432918009885997e-06, |
| "loss": 0.0025, |
| "step": 16660 |
| }, |
| { |
| "epoch": 57.08904109589041, |
| "grad_norm": 0.03298579528927803, |
| "learning_rate": 7.389604739286271e-06, |
| "loss": 0.0045, |
| "step": 16670 |
| }, |
| { |
| "epoch": 57.12328767123287, |
| "grad_norm": 0.030705546960234642, |
| "learning_rate": 7.3464079638801365e-06, |
| "loss": 0.0024, |
| "step": 16680 |
| }, |
| { |
| "epoch": 57.157534246575345, |
| "grad_norm": 0.03241143003106117, |
| "learning_rate": 7.30332780176588e-06, |
| "loss": 0.0031, |
| "step": 16690 |
| }, |
| { |
| "epoch": 57.19178082191781, |
| "grad_norm": 0.025262603536248207, |
| "learning_rate": 7.260364370723044e-06, |
| "loss": 0.0031, |
| "step": 16700 |
| }, |
| { |
| "epoch": 57.226027397260275, |
| "grad_norm": 0.028922202065587044, |
| "learning_rate": 7.217517788212025e-06, |
| "loss": 0.0026, |
| "step": 16710 |
| }, |
| { |
| "epoch": 57.26027397260274, |
| "grad_norm": 0.028081277385354042, |
| "learning_rate": 7.174788171373731e-06, |
| "loss": 0.0045, |
| "step": 16720 |
| }, |
| { |
| "epoch": 57.294520547945204, |
| "grad_norm": 0.0246548093855381, |
| "learning_rate": 7.132175637029293e-06, |
| "loss": 0.0022, |
| "step": 16730 |
| }, |
| { |
| "epoch": 57.32876712328767, |
| "grad_norm": 0.02839839644730091, |
| "learning_rate": 7.089680301679752e-06, |
| "loss": 0.0039, |
| "step": 16740 |
| }, |
| { |
| "epoch": 57.363013698630134, |
| "grad_norm": 0.016727443784475327, |
| "learning_rate": 7.047302281505736e-06, |
| "loss": 0.0023, |
| "step": 16750 |
| }, |
| { |
| "epoch": 57.397260273972606, |
| "grad_norm": 0.022300872951745987, |
| "learning_rate": 7.005041692367154e-06, |
| "loss": 0.0035, |
| "step": 16760 |
| }, |
| { |
| "epoch": 57.43150684931507, |
| "grad_norm": 0.025143135339021683, |
| "learning_rate": 6.962898649802823e-06, |
| "loss": 0.0033, |
| "step": 16770 |
| }, |
| { |
| "epoch": 57.465753424657535, |
| "grad_norm": 0.034999918192625046, |
| "learning_rate": 6.92087326903022e-06, |
| "loss": 0.0029, |
| "step": 16780 |
| }, |
| { |
| "epoch": 57.5, |
| "grad_norm": 0.04293488711118698, |
| "learning_rate": 6.878965664945108e-06, |
| "loss": 0.0028, |
| "step": 16790 |
| }, |
| { |
| "epoch": 57.534246575342465, |
| "grad_norm": 0.022644592449069023, |
| "learning_rate": 6.837175952121306e-06, |
| "loss": 0.0025, |
| "step": 16800 |
| }, |
| { |
| "epoch": 57.56849315068493, |
| "grad_norm": 0.019077636301517487, |
| "learning_rate": 6.795504244810285e-06, |
| "loss": 0.0032, |
| "step": 16810 |
| }, |
| { |
| "epoch": 57.602739726027394, |
| "grad_norm": 0.024243632331490517, |
| "learning_rate": 6.753950656940905e-06, |
| "loss": 0.003, |
| "step": 16820 |
| }, |
| { |
| "epoch": 57.636986301369866, |
| "grad_norm": 0.04237434267997742, |
| "learning_rate": 6.712515302119077e-06, |
| "loss": 0.0034, |
| "step": 16830 |
| }, |
| { |
| "epoch": 57.67123287671233, |
| "grad_norm": 0.019914429634809494, |
| "learning_rate": 6.671198293627479e-06, |
| "loss": 0.0024, |
| "step": 16840 |
| }, |
| { |
| "epoch": 57.705479452054796, |
| "grad_norm": 0.015781041234731674, |
| "learning_rate": 6.629999744425236e-06, |
| "loss": 0.0031, |
| "step": 16850 |
| }, |
| { |
| "epoch": 57.73972602739726, |
| "grad_norm": 0.018441669642925262, |
| "learning_rate": 6.588919767147639e-06, |
| "loss": 0.003, |
| "step": 16860 |
| }, |
| { |
| "epoch": 57.773972602739725, |
| "grad_norm": 0.017452578991651535, |
| "learning_rate": 6.5479584741057255e-06, |
| "loss": 0.0022, |
| "step": 16870 |
| }, |
| { |
| "epoch": 57.80821917808219, |
| "grad_norm": 0.03509654104709625, |
| "learning_rate": 6.5071159772861436e-06, |
| "loss": 0.0029, |
| "step": 16880 |
| }, |
| { |
| "epoch": 57.842465753424655, |
| "grad_norm": 0.019614599645137787, |
| "learning_rate": 6.466392388350695e-06, |
| "loss": 0.0025, |
| "step": 16890 |
| }, |
| { |
| "epoch": 57.87671232876713, |
| "grad_norm": 0.022444887086749077, |
| "learning_rate": 6.425787818636131e-06, |
| "loss": 0.003, |
| "step": 16900 |
| }, |
| { |
| "epoch": 57.91095890410959, |
| "grad_norm": 0.017676763236522675, |
| "learning_rate": 6.385302379153818e-06, |
| "loss": 0.0023, |
| "step": 16910 |
| }, |
| { |
| "epoch": 57.945205479452056, |
| "grad_norm": 0.017485516145825386, |
| "learning_rate": 6.344936180589351e-06, |
| "loss": 0.0028, |
| "step": 16920 |
| }, |
| { |
| "epoch": 57.97945205479452, |
| "grad_norm": 0.019938629120588303, |
| "learning_rate": 6.304689333302416e-06, |
| "loss": 0.0025, |
| "step": 16930 |
| }, |
| { |
| "epoch": 58.013698630136986, |
| "grad_norm": 0.03070191666483879, |
| "learning_rate": 6.264561947326331e-06, |
| "loss": 0.0027, |
| "step": 16940 |
| }, |
| { |
| "epoch": 58.04794520547945, |
| "grad_norm": 0.021726680919528008, |
| "learning_rate": 6.22455413236786e-06, |
| "loss": 0.0034, |
| "step": 16950 |
| }, |
| { |
| "epoch": 58.082191780821915, |
| "grad_norm": 0.03220411017537117, |
| "learning_rate": 6.184665997806832e-06, |
| "loss": 0.0021, |
| "step": 16960 |
| }, |
| { |
| "epoch": 58.11643835616438, |
| "grad_norm": 0.032874926924705505, |
| "learning_rate": 6.144897652695864e-06, |
| "loss": 0.0034, |
| "step": 16970 |
| }, |
| { |
| "epoch": 58.15068493150685, |
| "grad_norm": 0.022236965596675873, |
| "learning_rate": 6.1052492057601275e-06, |
| "loss": 0.0026, |
| "step": 16980 |
| }, |
| { |
| "epoch": 58.18493150684932, |
| "grad_norm": 0.05407319590449333, |
| "learning_rate": 6.0657207653969315e-06, |
| "loss": 0.0037, |
| "step": 16990 |
| }, |
| { |
| "epoch": 58.21917808219178, |
| "grad_norm": 0.04553509131073952, |
| "learning_rate": 6.026312439675552e-06, |
| "loss": 0.0033, |
| "step": 17000 |
| }, |
| { |
| "epoch": 58.25342465753425, |
| "grad_norm": 0.028858445584774017, |
| "learning_rate": 5.9870243363368275e-06, |
| "loss": 0.0023, |
| "step": 17010 |
| }, |
| { |
| "epoch": 58.28767123287671, |
| "grad_norm": 0.02405349723994732, |
| "learning_rate": 5.947856562792925e-06, |
| "loss": 0.0037, |
| "step": 17020 |
| }, |
| { |
| "epoch": 58.321917808219176, |
| "grad_norm": 0.030620204284787178, |
| "learning_rate": 5.908809226127054e-06, |
| "loss": 0.0034, |
| "step": 17030 |
| }, |
| { |
| "epoch": 58.35616438356164, |
| "grad_norm": 0.03459925949573517, |
| "learning_rate": 5.869882433093155e-06, |
| "loss": 0.003, |
| "step": 17040 |
| }, |
| { |
| "epoch": 58.39041095890411, |
| "grad_norm": 0.03197428211569786, |
| "learning_rate": 5.831076290115573e-06, |
| "loss": 0.0027, |
| "step": 17050 |
| }, |
| { |
| "epoch": 58.42465753424658, |
| "grad_norm": 0.022777985781431198, |
| "learning_rate": 5.79239090328883e-06, |
| "loss": 0.0038, |
| "step": 17060 |
| }, |
| { |
| "epoch": 58.45890410958904, |
| "grad_norm": 0.025515226647257805, |
| "learning_rate": 5.753826378377286e-06, |
| "loss": 0.0026, |
| "step": 17070 |
| }, |
| { |
| "epoch": 58.49315068493151, |
| "grad_norm": 0.01934850960969925, |
| "learning_rate": 5.715382820814885e-06, |
| "loss": 0.0026, |
| "step": 17080 |
| }, |
| { |
| "epoch": 58.52739726027397, |
| "grad_norm": 0.02293264865875244, |
| "learning_rate": 5.67706033570487e-06, |
| "loss": 0.003, |
| "step": 17090 |
| }, |
| { |
| "epoch": 58.56164383561644, |
| "grad_norm": 0.02049412578344345, |
| "learning_rate": 5.6388590278194096e-06, |
| "loss": 0.002, |
| "step": 17100 |
| }, |
| { |
| "epoch": 58.5958904109589, |
| "grad_norm": 0.015052354894578457, |
| "learning_rate": 5.600779001599455e-06, |
| "loss": 0.0021, |
| "step": 17110 |
| }, |
| { |
| "epoch": 58.63013698630137, |
| "grad_norm": 0.04019205644726753, |
| "learning_rate": 5.562820361154314e-06, |
| "loss": 0.0032, |
| "step": 17120 |
| }, |
| { |
| "epoch": 58.66438356164384, |
| "grad_norm": 0.03583867847919464, |
| "learning_rate": 5.524983210261481e-06, |
| "loss": 0.0034, |
| "step": 17130 |
| }, |
| { |
| "epoch": 58.6986301369863, |
| "grad_norm": 0.028554566204547882, |
| "learning_rate": 5.48726765236629e-06, |
| "loss": 0.0034, |
| "step": 17140 |
| }, |
| { |
| "epoch": 58.73287671232877, |
| "grad_norm": 0.017591096460819244, |
| "learning_rate": 5.449673790581611e-06, |
| "loss": 0.0025, |
| "step": 17150 |
| }, |
| { |
| "epoch": 58.76712328767123, |
| "grad_norm": 0.03151347115635872, |
| "learning_rate": 5.412201727687644e-06, |
| "loss": 0.0028, |
| "step": 17160 |
| }, |
| { |
| "epoch": 58.8013698630137, |
| "grad_norm": 0.019181104376912117, |
| "learning_rate": 5.374851566131561e-06, |
| "loss": 0.0023, |
| "step": 17170 |
| }, |
| { |
| "epoch": 58.83561643835616, |
| "grad_norm": 0.03802407905459404, |
| "learning_rate": 5.337623408027293e-06, |
| "loss": 0.0031, |
| "step": 17180 |
| }, |
| { |
| "epoch": 58.86986301369863, |
| "grad_norm": 0.03747876361012459, |
| "learning_rate": 5.300517355155215e-06, |
| "loss": 0.0025, |
| "step": 17190 |
| }, |
| { |
| "epoch": 58.9041095890411, |
| "grad_norm": 0.0215825904160738, |
| "learning_rate": 5.263533508961827e-06, |
| "loss": 0.0039, |
| "step": 17200 |
| }, |
| { |
| "epoch": 58.93835616438356, |
| "grad_norm": 0.016658857464790344, |
| "learning_rate": 5.226671970559577e-06, |
| "loss": 0.0021, |
| "step": 17210 |
| }, |
| { |
| "epoch": 58.97260273972603, |
| "grad_norm": 0.018328074365854263, |
| "learning_rate": 5.1899328407264855e-06, |
| "loss": 0.0023, |
| "step": 17220 |
| }, |
| { |
| "epoch": 59.00684931506849, |
| "grad_norm": 0.025236770510673523, |
| "learning_rate": 5.153316219905946e-06, |
| "loss": 0.0028, |
| "step": 17230 |
| }, |
| { |
| "epoch": 59.04109589041096, |
| "grad_norm": 0.016753623262047768, |
| "learning_rate": 5.116822208206396e-06, |
| "loss": 0.0025, |
| "step": 17240 |
| }, |
| { |
| "epoch": 59.07534246575342, |
| "grad_norm": 0.020218942314386368, |
| "learning_rate": 5.080450905401057e-06, |
| "loss": 0.0025, |
| "step": 17250 |
| }, |
| { |
| "epoch": 59.10958904109589, |
| "grad_norm": 0.03711342811584473, |
| "learning_rate": 5.044202410927706e-06, |
| "loss": 0.0034, |
| "step": 17260 |
| }, |
| { |
| "epoch": 59.14383561643836, |
| "grad_norm": 0.039715610444545746, |
| "learning_rate": 5.008076823888319e-06, |
| "loss": 0.003, |
| "step": 17270 |
| }, |
| { |
| "epoch": 59.178082191780824, |
| "grad_norm": 0.03945466876029968, |
| "learning_rate": 4.972074243048897e-06, |
| "loss": 0.0026, |
| "step": 17280 |
| }, |
| { |
| "epoch": 59.21232876712329, |
| "grad_norm": 0.017290910705924034, |
| "learning_rate": 4.936194766839103e-06, |
| "loss": 0.0037, |
| "step": 17290 |
| }, |
| { |
| "epoch": 59.24657534246575, |
| "grad_norm": 0.018677933141589165, |
| "learning_rate": 4.900438493352055e-06, |
| "loss": 0.0023, |
| "step": 17300 |
| }, |
| { |
| "epoch": 59.28082191780822, |
| "grad_norm": 0.023664385080337524, |
| "learning_rate": 4.864805520344051e-06, |
| "loss": 0.0033, |
| "step": 17310 |
| }, |
| { |
| "epoch": 59.31506849315068, |
| "grad_norm": 0.02739626169204712, |
| "learning_rate": 4.829295945234258e-06, |
| "loss": 0.0041, |
| "step": 17320 |
| }, |
| { |
| "epoch": 59.34931506849315, |
| "grad_norm": 0.023249467834830284, |
| "learning_rate": 4.7939098651045235e-06, |
| "loss": 0.0036, |
| "step": 17330 |
| }, |
| { |
| "epoch": 59.38356164383562, |
| "grad_norm": 0.02616041526198387, |
| "learning_rate": 4.758647376699032e-06, |
| "loss": 0.0028, |
| "step": 17340 |
| }, |
| { |
| "epoch": 59.417808219178085, |
| "grad_norm": 0.04718932509422302, |
| "learning_rate": 4.723508576424062e-06, |
| "loss": 0.0029, |
| "step": 17350 |
| }, |
| { |
| "epoch": 59.45205479452055, |
| "grad_norm": 0.02695685438811779, |
| "learning_rate": 4.688493560347773e-06, |
| "loss": 0.003, |
| "step": 17360 |
| }, |
| { |
| "epoch": 59.486301369863014, |
| "grad_norm": 0.02369818091392517, |
| "learning_rate": 4.653602424199876e-06, |
| "loss": 0.0031, |
| "step": 17370 |
| }, |
| { |
| "epoch": 59.52054794520548, |
| "grad_norm": 0.028814973309636116, |
| "learning_rate": 4.618835263371396e-06, |
| "loss": 0.0028, |
| "step": 17380 |
| }, |
| { |
| "epoch": 59.554794520547944, |
| "grad_norm": 0.02037746086716652, |
| "learning_rate": 4.5841921729144424e-06, |
| "loss": 0.0023, |
| "step": 17390 |
| }, |
| { |
| "epoch": 59.58904109589041, |
| "grad_norm": 0.020026013255119324, |
| "learning_rate": 4.549673247541875e-06, |
| "loss": 0.0023, |
| "step": 17400 |
| }, |
| { |
| "epoch": 59.62328767123287, |
| "grad_norm": 0.023346390575170517, |
| "learning_rate": 4.515278581627141e-06, |
| "loss": 0.0027, |
| "step": 17410 |
| }, |
| { |
| "epoch": 59.657534246575345, |
| "grad_norm": 0.02217704802751541, |
| "learning_rate": 4.48100826920394e-06, |
| "loss": 0.0027, |
| "step": 17420 |
| }, |
| { |
| "epoch": 59.69178082191781, |
| "grad_norm": 0.012821310199797153, |
| "learning_rate": 4.446862403965984e-06, |
| "loss": 0.002, |
| "step": 17430 |
| }, |
| { |
| "epoch": 59.726027397260275, |
| "grad_norm": 0.021954253315925598, |
| "learning_rate": 4.412841079266777e-06, |
| "loss": 0.0034, |
| "step": 17440 |
| }, |
| { |
| "epoch": 59.76027397260274, |
| "grad_norm": 0.02877684310078621, |
| "learning_rate": 4.378944388119311e-06, |
| "loss": 0.0027, |
| "step": 17450 |
| }, |
| { |
| "epoch": 59.794520547945204, |
| "grad_norm": 0.0313013419508934, |
| "learning_rate": 4.3451724231958644e-06, |
| "loss": 0.0022, |
| "step": 17460 |
| }, |
| { |
| "epoch": 59.82876712328767, |
| "grad_norm": 0.03267759829759598, |
| "learning_rate": 4.311525276827682e-06, |
| "loss": 0.0033, |
| "step": 17470 |
| }, |
| { |
| "epoch": 59.863013698630134, |
| "grad_norm": 0.026436539366841316, |
| "learning_rate": 4.27800304100478e-06, |
| "loss": 0.0025, |
| "step": 17480 |
| }, |
| { |
| "epoch": 59.897260273972606, |
| "grad_norm": 0.03464627265930176, |
| "learning_rate": 4.244605807375679e-06, |
| "loss": 0.003, |
| "step": 17490 |
| }, |
| { |
| "epoch": 59.93150684931507, |
| "grad_norm": 0.019924577325582504, |
| "learning_rate": 4.2113336672471245e-06, |
| "loss": 0.0028, |
| "step": 17500 |
| }, |
| { |
| "epoch": 59.965753424657535, |
| "grad_norm": 0.02339211106300354, |
| "learning_rate": 4.178186711583904e-06, |
| "loss": 0.0046, |
| "step": 17510 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 0.02530355006456375, |
| "learning_rate": 4.145165031008508e-06, |
| "loss": 0.0031, |
| "step": 17520 |
| }, |
| { |
| "epoch": 60.034246575342465, |
| "grad_norm": 0.012683791108429432, |
| "learning_rate": 4.112268715800943e-06, |
| "loss": 0.0021, |
| "step": 17530 |
| }, |
| { |
| "epoch": 60.06849315068493, |
| "grad_norm": 0.03863707557320595, |
| "learning_rate": 4.079497855898501e-06, |
| "loss": 0.0044, |
| "step": 17540 |
| }, |
| { |
| "epoch": 60.102739726027394, |
| "grad_norm": 0.027493145316839218, |
| "learning_rate": 4.046852540895446e-06, |
| "loss": 0.0031, |
| "step": 17550 |
| }, |
| { |
| "epoch": 60.136986301369866, |
| "grad_norm": 0.041166216135025024, |
| "learning_rate": 4.01433286004283e-06, |
| "loss": 0.0042, |
| "step": 17560 |
| }, |
| { |
| "epoch": 60.17123287671233, |
| "grad_norm": 0.021608727052807808, |
| "learning_rate": 3.981938902248222e-06, |
| "loss": 0.0024, |
| "step": 17570 |
| }, |
| { |
| "epoch": 60.205479452054796, |
| "grad_norm": 0.026905011385679245, |
| "learning_rate": 3.949670756075447e-06, |
| "loss": 0.0023, |
| "step": 17580 |
| }, |
| { |
| "epoch": 60.23972602739726, |
| "grad_norm": 0.02121490240097046, |
| "learning_rate": 3.917528509744412e-06, |
| "loss": 0.0032, |
| "step": 17590 |
| }, |
| { |
| "epoch": 60.273972602739725, |
| "grad_norm": 0.04649144038558006, |
| "learning_rate": 3.885512251130763e-06, |
| "loss": 0.0049, |
| "step": 17600 |
| }, |
| { |
| "epoch": 60.30821917808219, |
| "grad_norm": 0.03161786124110222, |
| "learning_rate": 3.8536220677657495e-06, |
| "loss": 0.0025, |
| "step": 17610 |
| }, |
| { |
| "epoch": 60.342465753424655, |
| "grad_norm": 0.02871571108698845, |
| "learning_rate": 3.821858046835913e-06, |
| "loss": 0.0029, |
| "step": 17620 |
| }, |
| { |
| "epoch": 60.37671232876713, |
| "grad_norm": 0.01877393200993538, |
| "learning_rate": 3.790220275182854e-06, |
| "loss": 0.0031, |
| "step": 17630 |
| }, |
| { |
| "epoch": 60.41095890410959, |
| "grad_norm": 0.028997721150517464, |
| "learning_rate": 3.75870883930306e-06, |
| "loss": 0.0021, |
| "step": 17640 |
| }, |
| { |
| "epoch": 60.445205479452056, |
| "grad_norm": 0.019930049777030945, |
| "learning_rate": 3.7273238253475785e-06, |
| "loss": 0.0032, |
| "step": 17650 |
| }, |
| { |
| "epoch": 60.47945205479452, |
| "grad_norm": 0.025048483163118362, |
| "learning_rate": 3.696065319121833e-06, |
| "loss": 0.0029, |
| "step": 17660 |
| }, |
| { |
| "epoch": 60.513698630136986, |
| "grad_norm": 0.028792202472686768, |
| "learning_rate": 3.664933406085402e-06, |
| "loss": 0.0039, |
| "step": 17670 |
| }, |
| { |
| "epoch": 60.54794520547945, |
| "grad_norm": 0.020279956981539726, |
| "learning_rate": 3.6339281713517303e-06, |
| "loss": 0.0029, |
| "step": 17680 |
| }, |
| { |
| "epoch": 60.582191780821915, |
| "grad_norm": 0.02139876037836075, |
| "learning_rate": 3.60304969968796e-06, |
| "loss": 0.0035, |
| "step": 17690 |
| }, |
| { |
| "epoch": 60.61643835616438, |
| "grad_norm": 0.02807740494608879, |
| "learning_rate": 3.5722980755146517e-06, |
| "loss": 0.0034, |
| "step": 17700 |
| }, |
| { |
| "epoch": 60.65068493150685, |
| "grad_norm": 0.03502603620290756, |
| "learning_rate": 3.541673382905558e-06, |
| "loss": 0.0032, |
| "step": 17710 |
| }, |
| { |
| "epoch": 60.68493150684932, |
| "grad_norm": 0.032354071736335754, |
| "learning_rate": 3.511175705587433e-06, |
| "loss": 0.004, |
| "step": 17720 |
| }, |
| { |
| "epoch": 60.71917808219178, |
| "grad_norm": 0.019513679668307304, |
| "learning_rate": 3.4808051269397512e-06, |
| "loss": 0.0035, |
| "step": 17730 |
| }, |
| { |
| "epoch": 60.75342465753425, |
| "grad_norm": 0.03239310160279274, |
| "learning_rate": 3.4505617299945336e-06, |
| "loss": 0.0034, |
| "step": 17740 |
| }, |
| { |
| "epoch": 60.78767123287671, |
| "grad_norm": 0.019614113494753838, |
| "learning_rate": 3.420445597436056e-06, |
| "loss": 0.0026, |
| "step": 17750 |
| }, |
| { |
| "epoch": 60.821917808219176, |
| "grad_norm": 0.016276845708489418, |
| "learning_rate": 3.390456811600673e-06, |
| "loss": 0.004, |
| "step": 17760 |
| }, |
| { |
| "epoch": 60.85616438356164, |
| "grad_norm": 0.032702166587114334, |
| "learning_rate": 3.360595454476595e-06, |
| "loss": 0.0032, |
| "step": 17770 |
| }, |
| { |
| "epoch": 60.89041095890411, |
| "grad_norm": 0.020163316279649734, |
| "learning_rate": 3.3308616077036115e-06, |
| "loss": 0.0026, |
| "step": 17780 |
| }, |
| { |
| "epoch": 60.92465753424658, |
| "grad_norm": 0.02356639876961708, |
| "learning_rate": 3.301255352572946e-06, |
| "loss": 0.0026, |
| "step": 17790 |
| }, |
| { |
| "epoch": 60.95890410958904, |
| "grad_norm": 0.0164373517036438, |
| "learning_rate": 3.271776770026963e-06, |
| "loss": 0.0028, |
| "step": 17800 |
| }, |
| { |
| "epoch": 60.99315068493151, |
| "grad_norm": 0.01919223740696907, |
| "learning_rate": 3.2424259406589664e-06, |
| "loss": 0.0035, |
| "step": 17810 |
| }, |
| { |
| "epoch": 61.02739726027397, |
| "grad_norm": 0.059007029980421066, |
| "learning_rate": 3.213202944713023e-06, |
| "loss": 0.0046, |
| "step": 17820 |
| }, |
| { |
| "epoch": 61.06164383561644, |
| "grad_norm": 0.03275210037827492, |
| "learning_rate": 3.1841078620836683e-06, |
| "loss": 0.0026, |
| "step": 17830 |
| }, |
| { |
| "epoch": 61.0958904109589, |
| "grad_norm": 0.013377784751355648, |
| "learning_rate": 3.155140772315773e-06, |
| "loss": 0.0028, |
| "step": 17840 |
| }, |
| { |
| "epoch": 61.13013698630137, |
| "grad_norm": 0.016845788806676865, |
| "learning_rate": 3.126301754604233e-06, |
| "loss": 0.004, |
| "step": 17850 |
| }, |
| { |
| "epoch": 61.16438356164384, |
| "grad_norm": 0.02182050235569477, |
| "learning_rate": 3.0975908877938277e-06, |
| "loss": 0.0039, |
| "step": 17860 |
| }, |
| { |
| "epoch": 61.1986301369863, |
| "grad_norm": 0.02985861524939537, |
| "learning_rate": 3.0690082503789742e-06, |
| "loss": 0.003, |
| "step": 17870 |
| }, |
| { |
| "epoch": 61.23287671232877, |
| "grad_norm": 0.014906318858265877, |
| "learning_rate": 3.040553920503503e-06, |
| "loss": 0.0024, |
| "step": 17880 |
| }, |
| { |
| "epoch": 61.26712328767123, |
| "grad_norm": 0.013918554410338402, |
| "learning_rate": 3.0122279759604745e-06, |
| "loss": 0.0026, |
| "step": 17890 |
| }, |
| { |
| "epoch": 61.3013698630137, |
| "grad_norm": 0.04059552773833275, |
| "learning_rate": 2.9840304941919415e-06, |
| "loss": 0.0029, |
| "step": 17900 |
| }, |
| { |
| "epoch": 61.33561643835616, |
| "grad_norm": 0.01856929622590542, |
| "learning_rate": 2.9559615522887273e-06, |
| "loss": 0.0032, |
| "step": 17910 |
| }, |
| { |
| "epoch": 61.36986301369863, |
| "grad_norm": 0.021468475461006165, |
| "learning_rate": 2.928021226990263e-06, |
| "loss": 0.0027, |
| "step": 17920 |
| }, |
| { |
| "epoch": 61.4041095890411, |
| "grad_norm": 0.031402189284563065, |
| "learning_rate": 2.9002095946843277e-06, |
| "loss": 0.0022, |
| "step": 17930 |
| }, |
| { |
| "epoch": 61.43835616438356, |
| "grad_norm": 0.018215125426650047, |
| "learning_rate": 2.8725267314068495e-06, |
| "loss": 0.0033, |
| "step": 17940 |
| }, |
| { |
| "epoch": 61.47260273972603, |
| "grad_norm": 0.019874971359968185, |
| "learning_rate": 2.844972712841737e-06, |
| "loss": 0.0022, |
| "step": 17950 |
| }, |
| { |
| "epoch": 61.50684931506849, |
| "grad_norm": 0.017870550975203514, |
| "learning_rate": 2.817547614320615e-06, |
| "loss": 0.003, |
| "step": 17960 |
| }, |
| { |
| "epoch": 61.54109589041096, |
| "grad_norm": 0.011960655450820923, |
| "learning_rate": 2.790251510822661e-06, |
| "loss": 0.0025, |
| "step": 17970 |
| }, |
| { |
| "epoch": 61.57534246575342, |
| "grad_norm": 0.011642039753496647, |
| "learning_rate": 2.7630844769743757e-06, |
| "loss": 0.0023, |
| "step": 17980 |
| }, |
| { |
| "epoch": 61.60958904109589, |
| "grad_norm": 0.03464807942509651, |
| "learning_rate": 2.73604658704939e-06, |
| "loss": 0.0031, |
| "step": 17990 |
| }, |
| { |
| "epoch": 61.64383561643836, |
| "grad_norm": 0.017682120203971863, |
| "learning_rate": 2.7091379149682685e-06, |
| "loss": 0.0036, |
| "step": 18000 |
| }, |
| { |
| "epoch": 61.678082191780824, |
| "grad_norm": 0.019320419058203697, |
| "learning_rate": 2.682358534298285e-06, |
| "loss": 0.0027, |
| "step": 18010 |
| }, |
| { |
| "epoch": 61.71232876712329, |
| "grad_norm": 0.01240335963666439, |
| "learning_rate": 2.6557085182532582e-06, |
| "loss": 0.0044, |
| "step": 18020 |
| }, |
| { |
| "epoch": 61.74657534246575, |
| "grad_norm": 0.01913302391767502, |
| "learning_rate": 2.6291879396933004e-06, |
| "loss": 0.0031, |
| "step": 18030 |
| }, |
| { |
| "epoch": 61.78082191780822, |
| "grad_norm": 0.025125738233327866, |
| "learning_rate": 2.602796871124663e-06, |
| "loss": 0.0032, |
| "step": 18040 |
| }, |
| { |
| "epoch": 61.81506849315068, |
| "grad_norm": 0.03669289872050285, |
| "learning_rate": 2.57653538469953e-06, |
| "loss": 0.0035, |
| "step": 18050 |
| }, |
| { |
| "epoch": 61.84931506849315, |
| "grad_norm": 0.012292813509702682, |
| "learning_rate": 2.5504035522157854e-06, |
| "loss": 0.003, |
| "step": 18060 |
| }, |
| { |
| "epoch": 61.88356164383562, |
| "grad_norm": 0.02633073925971985, |
| "learning_rate": 2.5244014451168863e-06, |
| "loss": 0.0022, |
| "step": 18070 |
| }, |
| { |
| "epoch": 61.917808219178085, |
| "grad_norm": 0.014214101247489452, |
| "learning_rate": 2.4985291344915674e-06, |
| "loss": 0.0024, |
| "step": 18080 |
| }, |
| { |
| "epoch": 61.95205479452055, |
| "grad_norm": 0.016861189156770706, |
| "learning_rate": 2.4727866910737583e-06, |
| "loss": 0.003, |
| "step": 18090 |
| }, |
| { |
| "epoch": 61.986301369863014, |
| "grad_norm": 0.02316778339445591, |
| "learning_rate": 2.4471741852423237e-06, |
| "loss": 0.0023, |
| "step": 18100 |
| }, |
| { |
| "epoch": 62.02054794520548, |
| "grad_norm": 0.013381525874137878, |
| "learning_rate": 2.421691687020855e-06, |
| "loss": 0.002, |
| "step": 18110 |
| }, |
| { |
| "epoch": 62.054794520547944, |
| "grad_norm": 0.020613618195056915, |
| "learning_rate": 2.3963392660775575e-06, |
| "loss": 0.0035, |
| "step": 18120 |
| }, |
| { |
| "epoch": 62.08904109589041, |
| "grad_norm": 0.032079510390758514, |
| "learning_rate": 2.371116991724953e-06, |
| "loss": 0.0028, |
| "step": 18130 |
| }, |
| { |
| "epoch": 62.12328767123287, |
| "grad_norm": 0.02837025187909603, |
| "learning_rate": 2.3460249329197824e-06, |
| "loss": 0.003, |
| "step": 18140 |
| }, |
| { |
| "epoch": 62.157534246575345, |
| "grad_norm": 0.02057802490890026, |
| "learning_rate": 2.321063158262793e-06, |
| "loss": 0.0027, |
| "step": 18150 |
| }, |
| { |
| "epoch": 62.19178082191781, |
| "grad_norm": 0.02426522970199585, |
| "learning_rate": 2.296231735998511e-06, |
| "loss": 0.0023, |
| "step": 18160 |
| }, |
| { |
| "epoch": 62.226027397260275, |
| "grad_norm": 0.041218966245651245, |
| "learning_rate": 2.271530734015104e-06, |
| "loss": 0.0037, |
| "step": 18170 |
| }, |
| { |
| "epoch": 62.26027397260274, |
| "grad_norm": 0.023262323811650276, |
| "learning_rate": 2.2469602198441573e-06, |
| "loss": 0.0022, |
| "step": 18180 |
| }, |
| { |
| "epoch": 62.294520547945204, |
| "grad_norm": 0.022797662764787674, |
| "learning_rate": 2.222520260660521e-06, |
| "loss": 0.0032, |
| "step": 18190 |
| }, |
| { |
| "epoch": 62.32876712328767, |
| "grad_norm": 0.018522756174206734, |
| "learning_rate": 2.1982109232821178e-06, |
| "loss": 0.0031, |
| "step": 18200 |
| }, |
| { |
| "epoch": 62.363013698630134, |
| "grad_norm": 0.028718652203679085, |
| "learning_rate": 2.174032274169746e-06, |
| "loss": 0.0024, |
| "step": 18210 |
| }, |
| { |
| "epoch": 62.397260273972606, |
| "grad_norm": 0.031062902882695198, |
| "learning_rate": 2.149984379426906e-06, |
| "loss": 0.003, |
| "step": 18220 |
| }, |
| { |
| "epoch": 62.43150684931507, |
| "grad_norm": 0.01933165453374386, |
| "learning_rate": 2.1260673047996227e-06, |
| "loss": 0.0024, |
| "step": 18230 |
| }, |
| { |
| "epoch": 62.465753424657535, |
| "grad_norm": 0.034511059522628784, |
| "learning_rate": 2.102281115676258e-06, |
| "loss": 0.0025, |
| "step": 18240 |
| }, |
| { |
| "epoch": 62.5, |
| "grad_norm": 0.029437880963087082, |
| "learning_rate": 2.0786258770873647e-06, |
| "loss": 0.0024, |
| "step": 18250 |
| }, |
| { |
| "epoch": 62.534246575342465, |
| "grad_norm": 0.008569435216486454, |
| "learning_rate": 2.0551016537054493e-06, |
| "loss": 0.0016, |
| "step": 18260 |
| }, |
| { |
| "epoch": 62.56849315068493, |
| "grad_norm": 0.025799725204706192, |
| "learning_rate": 2.0317085098448372e-06, |
| "loss": 0.0025, |
| "step": 18270 |
| }, |
| { |
| "epoch": 62.602739726027394, |
| "grad_norm": 0.011235746555030346, |
| "learning_rate": 2.008446509461498e-06, |
| "loss": 0.002, |
| "step": 18280 |
| }, |
| { |
| "epoch": 62.636986301369866, |
| "grad_norm": 0.022236861288547516, |
| "learning_rate": 1.985315716152847e-06, |
| "loss": 0.0034, |
| "step": 18290 |
| }, |
| { |
| "epoch": 62.67123287671233, |
| "grad_norm": 0.022197648882865906, |
| "learning_rate": 1.962316193157593e-06, |
| "loss": 0.0038, |
| "step": 18300 |
| }, |
| { |
| "epoch": 62.705479452054796, |
| "grad_norm": 0.01615208201110363, |
| "learning_rate": 1.939448003355554e-06, |
| "loss": 0.0035, |
| "step": 18310 |
| }, |
| { |
| "epoch": 62.73972602739726, |
| "grad_norm": 0.035238660871982574, |
| "learning_rate": 1.91671120926748e-06, |
| "loss": 0.0022, |
| "step": 18320 |
| }, |
| { |
| "epoch": 62.773972602739725, |
| "grad_norm": 0.024143965914845467, |
| "learning_rate": 1.8941058730549132e-06, |
| "loss": 0.0027, |
| "step": 18330 |
| }, |
| { |
| "epoch": 62.80821917808219, |
| "grad_norm": 0.016463087871670723, |
| "learning_rate": 1.8716320565199618e-06, |
| "loss": 0.0025, |
| "step": 18340 |
| }, |
| { |
| "epoch": 62.842465753424655, |
| "grad_norm": 0.020290328189730644, |
| "learning_rate": 1.849289821105199e-06, |
| "loss": 0.0033, |
| "step": 18350 |
| }, |
| { |
| "epoch": 62.87671232876713, |
| "grad_norm": 0.04143396392464638, |
| "learning_rate": 1.8270792278934302e-06, |
| "loss": 0.0029, |
| "step": 18360 |
| }, |
| { |
| "epoch": 62.91095890410959, |
| "grad_norm": 0.016841132193803787, |
| "learning_rate": 1.8050003376075707e-06, |
| "loss": 0.0022, |
| "step": 18370 |
| }, |
| { |
| "epoch": 62.945205479452056, |
| "grad_norm": 0.02761237323284149, |
| "learning_rate": 1.7830532106104747e-06, |
| "loss": 0.0039, |
| "step": 18380 |
| }, |
| { |
| "epoch": 62.97945205479452, |
| "grad_norm": 0.01891123317182064, |
| "learning_rate": 1.7612379069047335e-06, |
| "loss": 0.0019, |
| "step": 18390 |
| }, |
| { |
| "epoch": 63.013698630136986, |
| "grad_norm": 0.03597179055213928, |
| "learning_rate": 1.7395544861325718e-06, |
| "loss": 0.0032, |
| "step": 18400 |
| }, |
| { |
| "epoch": 63.04794520547945, |
| "grad_norm": 0.03108893521130085, |
| "learning_rate": 1.7180030075756136e-06, |
| "loss": 0.0027, |
| "step": 18410 |
| }, |
| { |
| "epoch": 63.082191780821915, |
| "grad_norm": 0.021627260372042656, |
| "learning_rate": 1.696583530154794e-06, |
| "loss": 0.0032, |
| "step": 18420 |
| }, |
| { |
| "epoch": 63.11643835616438, |
| "grad_norm": 0.016512632369995117, |
| "learning_rate": 1.6752961124301415e-06, |
| "loss": 0.0033, |
| "step": 18430 |
| }, |
| { |
| "epoch": 63.15068493150685, |
| "grad_norm": 0.016389215365052223, |
| "learning_rate": 1.6541408126006463e-06, |
| "loss": 0.0027, |
| "step": 18440 |
| }, |
| { |
| "epoch": 63.18493150684932, |
| "grad_norm": 0.02537810057401657, |
| "learning_rate": 1.6331176885040878e-06, |
| "loss": 0.0021, |
| "step": 18450 |
| }, |
| { |
| "epoch": 63.21917808219178, |
| "grad_norm": 0.014781366102397442, |
| "learning_rate": 1.6122267976168781e-06, |
| "loss": 0.0038, |
| "step": 18460 |
| }, |
| { |
| "epoch": 63.25342465753425, |
| "grad_norm": 0.024314258247613907, |
| "learning_rate": 1.5914681970539192e-06, |
| "loss": 0.003, |
| "step": 18470 |
| }, |
| { |
| "epoch": 63.28767123287671, |
| "grad_norm": 0.0542307011783123, |
| "learning_rate": 1.5708419435684462e-06, |
| "loss": 0.0055, |
| "step": 18480 |
| }, |
| { |
| "epoch": 63.321917808219176, |
| "grad_norm": 0.01515932660549879, |
| "learning_rate": 1.550348093551829e-06, |
| "loss": 0.0029, |
| "step": 18490 |
| }, |
| { |
| "epoch": 63.35616438356164, |
| "grad_norm": 0.01770406775176525, |
| "learning_rate": 1.5299867030334814e-06, |
| "loss": 0.003, |
| "step": 18500 |
| }, |
| { |
| "epoch": 63.39041095890411, |
| "grad_norm": 0.016681650653481483, |
| "learning_rate": 1.5097578276806633e-06, |
| "loss": 0.0025, |
| "step": 18510 |
| }, |
| { |
| "epoch": 63.42465753424658, |
| "grad_norm": 0.012945062480866909, |
| "learning_rate": 1.4896615227983468e-06, |
| "loss": 0.0037, |
| "step": 18520 |
| }, |
| { |
| "epoch": 63.45890410958904, |
| "grad_norm": 0.027772676199674606, |
| "learning_rate": 1.4696978433290653e-06, |
| "loss": 0.0037, |
| "step": 18530 |
| }, |
| { |
| "epoch": 63.49315068493151, |
| "grad_norm": 0.01776418834924698, |
| "learning_rate": 1.4498668438527597e-06, |
| "loss": 0.0024, |
| "step": 18540 |
| }, |
| { |
| "epoch": 63.52739726027397, |
| "grad_norm": 0.012397520244121552, |
| "learning_rate": 1.4301685785866214e-06, |
| "loss": 0.0032, |
| "step": 18550 |
| }, |
| { |
| "epoch": 63.56164383561644, |
| "grad_norm": 0.017262982204556465, |
| "learning_rate": 1.4106031013849496e-06, |
| "loss": 0.002, |
| "step": 18560 |
| }, |
| { |
| "epoch": 63.5958904109589, |
| "grad_norm": 0.010316437110304832, |
| "learning_rate": 1.3911704657390113e-06, |
| "loss": 0.002, |
| "step": 18570 |
| }, |
| { |
| "epoch": 63.63013698630137, |
| "grad_norm": 0.020639773458242416, |
| "learning_rate": 1.3718707247769135e-06, |
| "loss": 0.0026, |
| "step": 18580 |
| }, |
| { |
| "epoch": 63.66438356164384, |
| "grad_norm": 0.023104477673768997, |
| "learning_rate": 1.3527039312633827e-06, |
| "loss": 0.0036, |
| "step": 18590 |
| }, |
| { |
| "epoch": 63.6986301369863, |
| "grad_norm": 0.02620391547679901, |
| "learning_rate": 1.333670137599713e-06, |
| "loss": 0.0044, |
| "step": 18600 |
| }, |
| { |
| "epoch": 63.73287671232877, |
| "grad_norm": 0.013353808782994747, |
| "learning_rate": 1.3147693958235618e-06, |
| "loss": 0.0038, |
| "step": 18610 |
| }, |
| { |
| "epoch": 63.76712328767123, |
| "grad_norm": 0.027655024081468582, |
| "learning_rate": 1.2960017576088446e-06, |
| "loss": 0.0032, |
| "step": 18620 |
| }, |
| { |
| "epoch": 63.8013698630137, |
| "grad_norm": 0.014509730041027069, |
| "learning_rate": 1.2773672742655784e-06, |
| "loss": 0.0027, |
| "step": 18630 |
| }, |
| { |
| "epoch": 63.83561643835616, |
| "grad_norm": 0.042286377400159836, |
| "learning_rate": 1.2588659967397e-06, |
| "loss": 0.0031, |
| "step": 18640 |
| }, |
| { |
| "epoch": 63.86986301369863, |
| "grad_norm": 0.025881841778755188, |
| "learning_rate": 1.2404979756130142e-06, |
| "loss": 0.0025, |
| "step": 18650 |
| }, |
| { |
| "epoch": 63.9041095890411, |
| "grad_norm": 0.03276398405432701, |
| "learning_rate": 1.222263261102985e-06, |
| "loss": 0.0037, |
| "step": 18660 |
| }, |
| { |
| "epoch": 63.93835616438356, |
| "grad_norm": 0.03172963857650757, |
| "learning_rate": 1.2041619030626284e-06, |
| "loss": 0.0036, |
| "step": 18670 |
| }, |
| { |
| "epoch": 63.97260273972603, |
| "grad_norm": 0.02655917964875698, |
| "learning_rate": 1.1861939509803687e-06, |
| "loss": 0.0034, |
| "step": 18680 |
| }, |
| { |
| "epoch": 64.0068493150685, |
| "grad_norm": 0.033081576228141785, |
| "learning_rate": 1.1683594539798893e-06, |
| "loss": 0.0025, |
| "step": 18690 |
| }, |
| { |
| "epoch": 64.04109589041096, |
| "grad_norm": 0.017231125384569168, |
| "learning_rate": 1.1506584608200367e-06, |
| "loss": 0.0033, |
| "step": 18700 |
| }, |
| { |
| "epoch": 64.07534246575342, |
| "grad_norm": 0.027252597734332085, |
| "learning_rate": 1.1330910198946442e-06, |
| "loss": 0.0034, |
| "step": 18710 |
| }, |
| { |
| "epoch": 64.10958904109589, |
| "grad_norm": 0.009583823382854462, |
| "learning_rate": 1.1156571792324211e-06, |
| "loss": 0.0033, |
| "step": 18720 |
| }, |
| { |
| "epoch": 64.14383561643835, |
| "grad_norm": 0.014770534820854664, |
| "learning_rate": 1.0983569864968346e-06, |
| "loss": 0.0032, |
| "step": 18730 |
| }, |
| { |
| "epoch": 64.17808219178082, |
| "grad_norm": 0.017628680914640427, |
| "learning_rate": 1.0811904889859336e-06, |
| "loss": 0.0033, |
| "step": 18740 |
| }, |
| { |
| "epoch": 64.21232876712328, |
| "grad_norm": 0.02731098234653473, |
| "learning_rate": 1.064157733632276e-06, |
| "loss": 0.0034, |
| "step": 18750 |
| }, |
| { |
| "epoch": 64.24657534246575, |
| "grad_norm": 0.029084838926792145, |
| "learning_rate": 1.0472587670027678e-06, |
| "loss": 0.0036, |
| "step": 18760 |
| }, |
| { |
| "epoch": 64.28082191780823, |
| "grad_norm": 0.015242715366184711, |
| "learning_rate": 1.030493635298535e-06, |
| "loss": 0.0032, |
| "step": 18770 |
| }, |
| { |
| "epoch": 64.31506849315069, |
| "grad_norm": 0.010613277554512024, |
| "learning_rate": 1.0138623843548078e-06, |
| "loss": 0.0029, |
| "step": 18780 |
| }, |
| { |
| "epoch": 64.34931506849315, |
| "grad_norm": 0.031174929812550545, |
| "learning_rate": 9.97365059640787e-07, |
| "loss": 0.0034, |
| "step": 18790 |
| }, |
| { |
| "epoch": 64.38356164383562, |
| "grad_norm": 0.011623591184616089, |
| "learning_rate": 9.810017062595322e-07, |
| "loss": 0.002, |
| "step": 18800 |
| }, |
| { |
| "epoch": 64.41780821917808, |
| "grad_norm": 0.017375310882925987, |
| "learning_rate": 9.647723689478305e-07, |
| "loss": 0.0022, |
| "step": 18810 |
| }, |
| { |
| "epoch": 64.45205479452055, |
| "grad_norm": 0.014030944555997849, |
| "learning_rate": 9.486770920760668e-07, |
| "loss": 0.0021, |
| "step": 18820 |
| }, |
| { |
| "epoch": 64.48630136986301, |
| "grad_norm": 0.025326306000351906, |
| "learning_rate": 9.327159196481138e-07, |
| "loss": 0.0037, |
| "step": 18830 |
| }, |
| { |
| "epoch": 64.52054794520548, |
| "grad_norm": 0.03436637669801712, |
| "learning_rate": 9.168888953011989e-07, |
| "loss": 0.0042, |
| "step": 18840 |
| }, |
| { |
| "epoch": 64.55479452054794, |
| "grad_norm": 0.01274149864912033, |
| "learning_rate": 9.011960623058202e-07, |
| "loss": 0.0029, |
| "step": 18850 |
| }, |
| { |
| "epoch": 64.58904109589041, |
| "grad_norm": 0.012670198455452919, |
| "learning_rate": 8.856374635655695e-07, |
| "loss": 0.0025, |
| "step": 18860 |
| }, |
| { |
| "epoch": 64.62328767123287, |
| "grad_norm": 0.021424753591418266, |
| "learning_rate": 8.702131416170656e-07, |
| "loss": 0.0025, |
| "step": 18870 |
| }, |
| { |
| "epoch": 64.65753424657534, |
| "grad_norm": 0.015697501599788666, |
| "learning_rate": 8.549231386298151e-07, |
| "loss": 0.0026, |
| "step": 18880 |
| }, |
| { |
| "epoch": 64.6917808219178, |
| "grad_norm": 0.028277793899178505, |
| "learning_rate": 8.397674964061075e-07, |
| "loss": 0.0032, |
| "step": 18890 |
| }, |
| { |
| "epoch": 64.72602739726027, |
| "grad_norm": 0.02876514196395874, |
| "learning_rate": 8.247462563808817e-07, |
| "loss": 0.0021, |
| "step": 18900 |
| }, |
| { |
| "epoch": 64.76027397260275, |
| "grad_norm": 0.0304822139441967, |
| "learning_rate": 8.098594596216424e-07, |
| "loss": 0.003, |
| "step": 18910 |
| }, |
| { |
| "epoch": 64.79452054794521, |
| "grad_norm": 0.029384471476078033, |
| "learning_rate": 7.951071468283167e-07, |
| "loss": 0.0023, |
| "step": 18920 |
| }, |
| { |
| "epoch": 64.82876712328768, |
| "grad_norm": 0.01294246967881918, |
| "learning_rate": 7.804893583331696e-07, |
| "loss": 0.0028, |
| "step": 18930 |
| }, |
| { |
| "epoch": 64.86301369863014, |
| "grad_norm": 0.022310776636004448, |
| "learning_rate": 7.66006134100672e-07, |
| "loss": 0.0021, |
| "step": 18940 |
| }, |
| { |
| "epoch": 64.8972602739726, |
| "grad_norm": 0.012138908728957176, |
| "learning_rate": 7.516575137274162e-07, |
| "loss": 0.002, |
| "step": 18950 |
| }, |
| { |
| "epoch": 64.93150684931507, |
| "grad_norm": 0.01461056899279356, |
| "learning_rate": 7.374435364419674e-07, |
| "loss": 0.0021, |
| "step": 18960 |
| }, |
| { |
| "epoch": 64.96575342465754, |
| "grad_norm": 0.019607581198215485, |
| "learning_rate": 7.233642411048014e-07, |
| "loss": 0.0022, |
| "step": 18970 |
| }, |
| { |
| "epoch": 65.0, |
| "grad_norm": 0.009313903748989105, |
| "learning_rate": 7.094196662081831e-07, |
| "loss": 0.0027, |
| "step": 18980 |
| }, |
| { |
| "epoch": 65.03424657534246, |
| "grad_norm": 0.010933526791632175, |
| "learning_rate": 6.956098498760389e-07, |
| "loss": 0.0028, |
| "step": 18990 |
| }, |
| { |
| "epoch": 65.06849315068493, |
| "grad_norm": 0.03836876153945923, |
| "learning_rate": 6.819348298638839e-07, |
| "loss": 0.003, |
| "step": 19000 |
| }, |
| { |
| "epoch": 65.1027397260274, |
| "grad_norm": 0.020728355273604393, |
| "learning_rate": 6.683946435586952e-07, |
| "loss": 0.0024, |
| "step": 19010 |
| }, |
| { |
| "epoch": 65.13698630136986, |
| "grad_norm": 0.02146265283226967, |
| "learning_rate": 6.549893279788277e-07, |
| "loss": 0.0019, |
| "step": 19020 |
| }, |
| { |
| "epoch": 65.17123287671232, |
| "grad_norm": 0.020000096410512924, |
| "learning_rate": 6.417189197739093e-07, |
| "loss": 0.0029, |
| "step": 19030 |
| }, |
| { |
| "epoch": 65.20547945205479, |
| "grad_norm": 0.02361264079809189, |
| "learning_rate": 6.285834552247128e-07, |
| "loss": 0.0024, |
| "step": 19040 |
| }, |
| { |
| "epoch": 65.23972602739725, |
| "grad_norm": 0.03703237324953079, |
| "learning_rate": 6.15582970243117e-07, |
| "loss": 0.0046, |
| "step": 19050 |
| }, |
| { |
| "epoch": 65.27397260273973, |
| "grad_norm": 0.028757376596331596, |
| "learning_rate": 6.027175003719354e-07, |
| "loss": 0.0026, |
| "step": 19060 |
| }, |
| { |
| "epoch": 65.3082191780822, |
| "grad_norm": 0.009351376444101334, |
| "learning_rate": 5.899870807848762e-07, |
| "loss": 0.0032, |
| "step": 19070 |
| }, |
| { |
| "epoch": 65.34246575342466, |
| "grad_norm": 0.019105447456240654, |
| "learning_rate": 5.773917462864264e-07, |
| "loss": 0.0026, |
| "step": 19080 |
| }, |
| { |
| "epoch": 65.37671232876713, |
| "grad_norm": 0.00916428305208683, |
| "learning_rate": 5.64931531311741e-07, |
| "loss": 0.0029, |
| "step": 19090 |
| }, |
| { |
| "epoch": 65.41095890410959, |
| "grad_norm": 0.022843752056360245, |
| "learning_rate": 5.526064699265753e-07, |
| "loss": 0.0026, |
| "step": 19100 |
| }, |
| { |
| "epoch": 65.44520547945206, |
| "grad_norm": 0.014886964112520218, |
| "learning_rate": 5.404165958271811e-07, |
| "loss": 0.0026, |
| "step": 19110 |
| }, |
| { |
| "epoch": 65.47945205479452, |
| "grad_norm": 0.016025792807340622, |
| "learning_rate": 5.283619423401998e-07, |
| "loss": 0.0029, |
| "step": 19120 |
| }, |
| { |
| "epoch": 65.51369863013699, |
| "grad_norm": 0.023562895134091377, |
| "learning_rate": 5.164425424226016e-07, |
| "loss": 0.0031, |
| "step": 19130 |
| }, |
| { |
| "epoch": 65.54794520547945, |
| "grad_norm": 0.018180225044488907, |
| "learning_rate": 5.046584286615697e-07, |
| "loss": 0.0025, |
| "step": 19140 |
| }, |
| { |
| "epoch": 65.58219178082192, |
| "grad_norm": 0.01071920245885849, |
| "learning_rate": 4.930096332744105e-07, |
| "loss": 0.0026, |
| "step": 19150 |
| }, |
| { |
| "epoch": 65.61643835616438, |
| "grad_norm": 0.014726242981851101, |
| "learning_rate": 4.814961881085045e-07, |
| "loss": 0.003, |
| "step": 19160 |
| }, |
| { |
| "epoch": 65.65068493150685, |
| "grad_norm": 0.032696496695280075, |
| "learning_rate": 4.701181246411501e-07, |
| "loss": 0.0044, |
| "step": 19170 |
| }, |
| { |
| "epoch": 65.68493150684931, |
| "grad_norm": 0.0267886221408844, |
| "learning_rate": 4.5887547397955864e-07, |
| "loss": 0.0027, |
| "step": 19180 |
| }, |
| { |
| "epoch": 65.71917808219177, |
| "grad_norm": 0.029734879732131958, |
| "learning_rate": 4.4776826686069305e-07, |
| "loss": 0.0032, |
| "step": 19190 |
| }, |
| { |
| "epoch": 65.75342465753425, |
| "grad_norm": 0.01782161183655262, |
| "learning_rate": 4.367965336512403e-07, |
| "loss": 0.0022, |
| "step": 19200 |
| }, |
| { |
| "epoch": 65.78767123287672, |
| "grad_norm": 0.01377611793577671, |
| "learning_rate": 4.259603043475002e-07, |
| "loss": 0.0041, |
| "step": 19210 |
| }, |
| { |
| "epoch": 65.82191780821918, |
| "grad_norm": 0.019605727866292, |
| "learning_rate": 4.1525960857530243e-07, |
| "loss": 0.0026, |
| "step": 19220 |
| }, |
| { |
| "epoch": 65.85616438356165, |
| "grad_norm": 0.021362723782658577, |
| "learning_rate": 4.0469447558995065e-07, |
| "loss": 0.0031, |
| "step": 19230 |
| }, |
| { |
| "epoch": 65.89041095890411, |
| "grad_norm": 0.010247836820781231, |
| "learning_rate": 3.9426493427611177e-07, |
| "loss": 0.003, |
| "step": 19240 |
| }, |
| { |
| "epoch": 65.92465753424658, |
| "grad_norm": 0.016217583790421486, |
| "learning_rate": 3.839710131477492e-07, |
| "loss": 0.0039, |
| "step": 19250 |
| }, |
| { |
| "epoch": 65.95890410958904, |
| "grad_norm": 0.010646031238138676, |
| "learning_rate": 3.738127403480507e-07, |
| "loss": 0.0029, |
| "step": 19260 |
| }, |
| { |
| "epoch": 65.9931506849315, |
| "grad_norm": 0.013961025513708591, |
| "learning_rate": 3.637901436493507e-07, |
| "loss": 0.0031, |
| "step": 19270 |
| }, |
| { |
| "epoch": 66.02739726027397, |
| "grad_norm": 0.009408863261342049, |
| "learning_rate": 3.5390325045304706e-07, |
| "loss": 0.0034, |
| "step": 19280 |
| }, |
| { |
| "epoch": 66.06164383561644, |
| "grad_norm": 0.019813723862171173, |
| "learning_rate": 3.441520877895288e-07, |
| "loss": 0.0028, |
| "step": 19290 |
| }, |
| { |
| "epoch": 66.0958904109589, |
| "grad_norm": 0.015585197135806084, |
| "learning_rate": 3.3453668231809286e-07, |
| "loss": 0.0034, |
| "step": 19300 |
| }, |
| { |
| "epoch": 66.13013698630137, |
| "grad_norm": 0.014616301283240318, |
| "learning_rate": 3.250570603268943e-07, |
| "loss": 0.0036, |
| "step": 19310 |
| }, |
| { |
| "epoch": 66.16438356164383, |
| "grad_norm": 0.017365731298923492, |
| "learning_rate": 3.157132477328628e-07, |
| "loss": 0.0024, |
| "step": 19320 |
| }, |
| { |
| "epoch": 66.1986301369863, |
| "grad_norm": 0.023117030039429665, |
| "learning_rate": 3.0650527008162513e-07, |
| "loss": 0.0031, |
| "step": 19330 |
| }, |
| { |
| "epoch": 66.23287671232876, |
| "grad_norm": 0.012441856786608696, |
| "learning_rate": 2.9743315254743833e-07, |
| "loss": 0.0028, |
| "step": 19340 |
| }, |
| { |
| "epoch": 66.26712328767124, |
| "grad_norm": 0.013054047711193562, |
| "learning_rate": 2.8849691993311777e-07, |
| "loss": 0.0024, |
| "step": 19350 |
| }, |
| { |
| "epoch": 66.3013698630137, |
| "grad_norm": 0.040562987327575684, |
| "learning_rate": 2.796965966699927e-07, |
| "loss": 0.003, |
| "step": 19360 |
| }, |
| { |
| "epoch": 66.33561643835617, |
| "grad_norm": 0.02737599052488804, |
| "learning_rate": 2.7103220681780615e-07, |
| "loss": 0.0035, |
| "step": 19370 |
| }, |
| { |
| "epoch": 66.36986301369863, |
| "grad_norm": 0.021345140412449837, |
| "learning_rate": 2.625037740646763e-07, |
| "loss": 0.0031, |
| "step": 19380 |
| }, |
| { |
| "epoch": 66.4041095890411, |
| "grad_norm": 0.019200876355171204, |
| "learning_rate": 2.5411132172700194e-07, |
| "loss": 0.0031, |
| "step": 19390 |
| }, |
| { |
| "epoch": 66.43835616438356, |
| "grad_norm": 0.010418230667710304, |
| "learning_rate": 2.458548727494292e-07, |
| "loss": 0.0025, |
| "step": 19400 |
| }, |
| { |
| "epoch": 66.47260273972603, |
| "grad_norm": 0.013329196721315384, |
| "learning_rate": 2.3773444970477955e-07, |
| "loss": 0.0031, |
| "step": 19410 |
| }, |
| { |
| "epoch": 66.5068493150685, |
| "grad_norm": 0.012985438108444214, |
| "learning_rate": 2.2975007479397738e-07, |
| "loss": 0.003, |
| "step": 19420 |
| }, |
| { |
| "epoch": 66.54109589041096, |
| "grad_norm": 0.022846786305308342, |
| "learning_rate": 2.219017698460002e-07, |
| "loss": 0.0041, |
| "step": 19430 |
| }, |
| { |
| "epoch": 66.57534246575342, |
| "grad_norm": 0.02029629796743393, |
| "learning_rate": 2.1418955631781202e-07, |
| "loss": 0.0028, |
| "step": 19440 |
| }, |
| { |
| "epoch": 66.60958904109589, |
| "grad_norm": 0.016922811046242714, |
| "learning_rate": 2.0661345529430775e-07, |
| "loss": 0.0033, |
| "step": 19450 |
| }, |
| { |
| "epoch": 66.64383561643835, |
| "grad_norm": 0.023142823949456215, |
| "learning_rate": 1.9917348748826335e-07, |
| "loss": 0.0028, |
| "step": 19460 |
| }, |
| { |
| "epoch": 66.67808219178082, |
| "grad_norm": 0.013606252148747444, |
| "learning_rate": 1.918696732402636e-07, |
| "loss": 0.0021, |
| "step": 19470 |
| }, |
| { |
| "epoch": 66.71232876712328, |
| "grad_norm": 0.015818113461136818, |
| "learning_rate": 1.847020325186577e-07, |
| "loss": 0.0036, |
| "step": 19480 |
| }, |
| { |
| "epoch": 66.74657534246575, |
| "grad_norm": 0.010209470987319946, |
| "learning_rate": 1.776705849195037e-07, |
| "loss": 0.0026, |
| "step": 19490 |
| }, |
| { |
| "epoch": 66.78082191780823, |
| "grad_norm": 0.010678197257220745, |
| "learning_rate": 1.7077534966650766e-07, |
| "loss": 0.0031, |
| "step": 19500 |
| }, |
| { |
| "epoch": 66.81506849315069, |
| "grad_norm": 0.017493901774287224, |
| "learning_rate": 1.6401634561098444e-07, |
| "loss": 0.0032, |
| "step": 19510 |
| }, |
| { |
| "epoch": 66.84931506849315, |
| "grad_norm": 0.029743684455752373, |
| "learning_rate": 1.5739359123178587e-07, |
| "loss": 0.0038, |
| "step": 19520 |
| }, |
| { |
| "epoch": 66.88356164383562, |
| "grad_norm": 0.020673219114542007, |
| "learning_rate": 1.5090710463527836e-07, |
| "loss": 0.0028, |
| "step": 19530 |
| }, |
| { |
| "epoch": 66.91780821917808, |
| "grad_norm": 0.023049013689160347, |
| "learning_rate": 1.4455690355525964e-07, |
| "loss": 0.0028, |
| "step": 19540 |
| }, |
| { |
| "epoch": 66.95205479452055, |
| "grad_norm": 0.026155423372983932, |
| "learning_rate": 1.383430053529422e-07, |
| "loss": 0.0032, |
| "step": 19550 |
| }, |
| { |
| "epoch": 66.98630136986301, |
| "grad_norm": 0.032859109342098236, |
| "learning_rate": 1.3226542701689215e-07, |
| "loss": 0.0032, |
| "step": 19560 |
| }, |
| { |
| "epoch": 67.02054794520548, |
| "grad_norm": 0.0173659510910511, |
| "learning_rate": 1.2632418516296262e-07, |
| "loss": 0.0038, |
| "step": 19570 |
| }, |
| { |
| "epoch": 67.05479452054794, |
| "grad_norm": 0.03304281830787659, |
| "learning_rate": 1.2051929603428825e-07, |
| "loss": 0.0021, |
| "step": 19580 |
| }, |
| { |
| "epoch": 67.08904109589041, |
| "grad_norm": 0.010505401529371738, |
| "learning_rate": 1.1485077550122402e-07, |
| "loss": 0.0019, |
| "step": 19590 |
| }, |
| { |
| "epoch": 67.12328767123287, |
| "grad_norm": 0.019299061968922615, |
| "learning_rate": 1.0931863906127327e-07, |
| "loss": 0.003, |
| "step": 19600 |
| }, |
| { |
| "epoch": 67.15753424657534, |
| "grad_norm": 0.010144400410354137, |
| "learning_rate": 1.0392290183909304e-07, |
| "loss": 0.0029, |
| "step": 19610 |
| }, |
| { |
| "epoch": 67.1917808219178, |
| "grad_norm": 0.04509196802973747, |
| "learning_rate": 9.866357858642205e-08, |
| "loss": 0.0043, |
| "step": 19620 |
| }, |
| { |
| "epoch": 67.22602739726027, |
| "grad_norm": 0.01752866618335247, |
| "learning_rate": 9.354068368204739e-08, |
| "loss": 0.0017, |
| "step": 19630 |
| }, |
| { |
| "epoch": 67.26027397260275, |
| "grad_norm": 0.03391791135072708, |
| "learning_rate": 8.855423113177664e-08, |
| "loss": 0.0036, |
| "step": 19640 |
| }, |
| { |
| "epoch": 67.29452054794521, |
| "grad_norm": 0.027711069211363792, |
| "learning_rate": 8.37042345683714e-08, |
| "loss": 0.0034, |
| "step": 19650 |
| }, |
| { |
| "epoch": 67.32876712328768, |
| "grad_norm": 0.03244870901107788, |
| "learning_rate": 7.899070725153613e-08, |
| "loss": 0.0033, |
| "step": 19660 |
| }, |
| { |
| "epoch": 67.36301369863014, |
| "grad_norm": 0.009590478613972664, |
| "learning_rate": 7.44136620678848e-08, |
| "loss": 0.0029, |
| "step": 19670 |
| }, |
| { |
| "epoch": 67.3972602739726, |
| "grad_norm": 0.02920118160545826, |
| "learning_rate": 6.997311153086883e-08, |
| "loss": 0.0035, |
| "step": 19680 |
| }, |
| { |
| "epoch": 67.43150684931507, |
| "grad_norm": 0.015596888959407806, |
| "learning_rate": 6.566906778079917e-08, |
| "loss": 0.0024, |
| "step": 19690 |
| }, |
| { |
| "epoch": 67.46575342465754, |
| "grad_norm": 0.022850381210446358, |
| "learning_rate": 6.150154258476315e-08, |
| "loss": 0.0023, |
| "step": 19700 |
| }, |
| { |
| "epoch": 67.5, |
| "grad_norm": 0.028931666165590286, |
| "learning_rate": 5.747054733660773e-08, |
| "loss": 0.0028, |
| "step": 19710 |
| }, |
| { |
| "epoch": 67.53424657534246, |
| "grad_norm": 0.01714816689491272, |
| "learning_rate": 5.3576093056922906e-08, |
| "loss": 0.0037, |
| "step": 19720 |
| }, |
| { |
| "epoch": 67.56849315068493, |
| "grad_norm": 0.02712251991033554, |
| "learning_rate": 4.981819039300284e-08, |
| "loss": 0.0026, |
| "step": 19730 |
| }, |
| { |
| "epoch": 67.6027397260274, |
| "grad_norm": 0.01652734912931919, |
| "learning_rate": 4.619684961881254e-08, |
| "loss": 0.0023, |
| "step": 19740 |
| }, |
| { |
| "epoch": 67.63698630136986, |
| "grad_norm": 0.022933751344680786, |
| "learning_rate": 4.2712080634949024e-08, |
| "loss": 0.0025, |
| "step": 19750 |
| }, |
| { |
| "epoch": 67.67123287671232, |
| "grad_norm": 0.024571429938077927, |
| "learning_rate": 3.936389296864129e-08, |
| "loss": 0.0049, |
| "step": 19760 |
| }, |
| { |
| "epoch": 67.70547945205479, |
| "grad_norm": 0.019797664135694504, |
| "learning_rate": 3.615229577371149e-08, |
| "loss": 0.0023, |
| "step": 19770 |
| }, |
| { |
| "epoch": 67.73972602739725, |
| "grad_norm": 0.009142549708485603, |
| "learning_rate": 3.3077297830541584e-08, |
| "loss": 0.0021, |
| "step": 19780 |
| }, |
| { |
| "epoch": 67.77397260273973, |
| "grad_norm": 0.012059146538376808, |
| "learning_rate": 3.01389075460512e-08, |
| "loss": 0.0031, |
| "step": 19790 |
| }, |
| { |
| "epoch": 67.8082191780822, |
| "grad_norm": 0.020258858799934387, |
| "learning_rate": 2.7337132953697554e-08, |
| "loss": 0.003, |
| "step": 19800 |
| }, |
| { |
| "epoch": 67.84246575342466, |
| "grad_norm": 0.021773777902126312, |
| "learning_rate": 2.467198171342e-08, |
| "loss": 0.0026, |
| "step": 19810 |
| }, |
| { |
| "epoch": 67.87671232876713, |
| "grad_norm": 0.014160028658807278, |
| "learning_rate": 2.214346111164556e-08, |
| "loss": 0.0024, |
| "step": 19820 |
| }, |
| { |
| "epoch": 67.91095890410959, |
| "grad_norm": 0.021640092134475708, |
| "learning_rate": 1.9751578061244504e-08, |
| "loss": 0.0021, |
| "step": 19830 |
| }, |
| { |
| "epoch": 67.94520547945206, |
| "grad_norm": 0.01956215500831604, |
| "learning_rate": 1.749633910153592e-08, |
| "loss": 0.0029, |
| "step": 19840 |
| }, |
| { |
| "epoch": 67.97945205479452, |
| "grad_norm": 0.017366180196404457, |
| "learning_rate": 1.5377750398265502e-08, |
| "loss": 0.0028, |
| "step": 19850 |
| }, |
| { |
| "epoch": 68.01369863013699, |
| "grad_norm": 0.01383445505052805, |
| "learning_rate": 1.3395817743561134e-08, |
| "loss": 0.003, |
| "step": 19860 |
| }, |
| { |
| "epoch": 68.04794520547945, |
| "grad_norm": 0.023640619590878487, |
| "learning_rate": 1.1550546555960662e-08, |
| "loss": 0.0042, |
| "step": 19870 |
| }, |
| { |
| "epoch": 68.08219178082192, |
| "grad_norm": 0.029182305559515953, |
| "learning_rate": 9.841941880361916e-09, |
| "loss": 0.0023, |
| "step": 19880 |
| }, |
| { |
| "epoch": 68.11643835616438, |
| "grad_norm": 0.026307787746191025, |
| "learning_rate": 8.270008388022721e-09, |
| "loss": 0.0025, |
| "step": 19890 |
| }, |
| { |
| "epoch": 68.15068493150685, |
| "grad_norm": 0.017121490091085434, |
| "learning_rate": 6.834750376549792e-09, |
| "loss": 0.0038, |
| "step": 19900 |
| }, |
| { |
| "epoch": 68.18493150684931, |
| "grad_norm": 0.023319967091083527, |
| "learning_rate": 5.536171769887632e-09, |
| "loss": 0.0033, |
| "step": 19910 |
| }, |
| { |
| "epoch": 68.21917808219177, |
| "grad_norm": 0.022718269377946854, |
| "learning_rate": 4.3742761183018784e-09, |
| "loss": 0.0027, |
| "step": 19920 |
| }, |
| { |
| "epoch": 68.25342465753425, |
| "grad_norm": 0.009096325375139713, |
| "learning_rate": 3.349066598362649e-09, |
| "loss": 0.0034, |
| "step": 19930 |
| }, |
| { |
| "epoch": 68.28767123287672, |
| "grad_norm": 0.014013983309268951, |
| "learning_rate": 2.4605460129556445e-09, |
| "loss": 0.0023, |
| "step": 19940 |
| }, |
| { |
| "epoch": 68.32191780821918, |
| "grad_norm": 0.03348590061068535, |
| "learning_rate": 1.7087167912710478e-09, |
| "loss": 0.0042, |
| "step": 19950 |
| }, |
| { |
| "epoch": 68.35616438356165, |
| "grad_norm": 0.02735082246363163, |
| "learning_rate": 1.0935809887702154e-09, |
| "loss": 0.0029, |
| "step": 19960 |
| }, |
| { |
| "epoch": 68.39041095890411, |
| "grad_norm": 0.009715776890516281, |
| "learning_rate": 6.151402872134337e-10, |
| "loss": 0.0033, |
| "step": 19970 |
| }, |
| { |
| "epoch": 68.42465753424658, |
| "grad_norm": 0.017182039096951485, |
| "learning_rate": 2.7339599464326627e-10, |
| "loss": 0.0025, |
| "step": 19980 |
| }, |
| { |
| "epoch": 68.45890410958904, |
| "grad_norm": 0.02159772627055645, |
| "learning_rate": 6.834904537900144e-11, |
| "loss": 0.0023, |
| "step": 19990 |
| }, |
| { |
| "epoch": 68.4931506849315, |
| "grad_norm": 0.01271827332675457, |
| "learning_rate": 0.0, |
| "loss": 0.002, |
| "step": 20000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 20000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 69, |
| "save_steps": 5000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.923804113456333e+19, |
| "train_batch_size": 96, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|