| type train | step 10 | loss 384.0812 603.6050 1317.3508 1702.2689 | lr 1.3e-05 | norm 664.9313 | dt 0.025 | |
| type train | step 20 | loss 377.7181 597.2221 1309.8059 1670.9023 | lr 2.7e-05 | norm 660.4241 | dt 0.025 | |
| type train | step 30 | loss 370.8555 586.4268 1290.2092 1659.3643 | lr 4.0e-05 | norm 660.3242 | dt 0.025 | |
| type train | step 40 | loss 361.4067 579.7332 1262.2814 1627.8658 | lr 5.3e-05 | norm 625.9962 | dt 0.025 | |
| type train | step 50 | loss 352.8531 559.1603 1239.7585 1600.7555 | lr 6.7e-05 | norm 632.2708 | dt 0.026 | |
| type train | step 60 | loss 340.3852 538.5740 1193.5436 1553.3081 | lr 8.0e-05 | norm 616.6901 | dt 0.026 | |
| type train | step 70 | loss 324.9385 521.7886 1158.4672 1498.3997 | lr 9.3e-05 | norm 595.4941 | dt 0.026 | |
| type train | step 80 | loss 307.9594 501.9121 1116.5664 1438.8212 | lr 1.1e-04 | norm 558.7804 | dt 0.026 | |
| type train | step 90 | loss 291.5836 470.9009 1071.7709 1379.6477 | lr 1.2e-04 | norm 549.1802 | dt 0.026 | |
| type train | step 100 | loss 272.7011 447.9602 1013.1418 1306.8215 | lr 1.3e-04 | norm 531.1138 | dt 0.025 | |
| type train | step 110 | loss 251.6451 414.3021 953.9789 1251.1482 | lr 1.5e-04 | norm 500.0567 | dt 0.026 | |
| type train | step 120 | loss 230.2295 388.3847 900.7739 1186.1373 | lr 1.6e-04 | norm 462.5960 | dt 0.026 | |
| type train | step 130 | loss 210.0013 358.7007 840.1476 1106.6677 | lr 1.7e-04 | norm 439.5298 | dt 0.026 | |
| type train | step 140 | loss 191.2549 336.2061 791.9597 1033.5027 | lr 1.9e-04 | norm 406.4192 | dt 0.027 | |
| type train | step 150 | loss 171.6487 299.8942 719.0361 958.9811 | lr 2.0e-04 | norm 381.7921 | dt 0.026 | |
| type train | step 160 | loss 152.6129 273.7689 655.1442 868.6413 | lr 2.1e-04 | norm 355.3441 | dt 0.026 | |
| type train | step 170 | loss 133.9432 242.3817 592.7899 789.7123 | lr 2.3e-04 | norm 331.6566 | dt 0.026 | |
| type train | step 180 | loss 116.6308 220.6303 545.0724 733.1215 | lr 2.4e-04 | norm 296.6913 | dt 0.026 | |
| type train | step 190 | loss 99.8736 190.6440 482.1094 656.9692 | lr 2.5e-04 | norm 270.8863 | dt 0.026 | |
| type train | step 200 | loss 86.1931 170.8172 435.7309 597.2067 | lr 2.7e-04 | norm 245.3864 | dt 0.026 | |
| type train | step 210 | loss 72.1103 147.7258 381.3986 527.3098 | lr 2.8e-04 | norm 219.9336 | dt 0.026 | |
| type train | step 220 | loss 60.0742 125.3639 332.9746 461.1277 | lr 2.9e-04 | norm 199.0873 | dt 0.026 | |
| type train | step 230 | loss 48.8339 105.9008 286.6518 400.8742 | lr 3.1e-04 | norm 177.6180 | dt 0.026 | |
| type train | step 240 | loss 40.8135 93.5293 256.5977 365.9275 | lr 3.2e-04 | norm 157.0957 | dt 0.027 | |
| type train | step 250 | loss 33.3292 77.1858 212.3267 306.5554 | lr 3.3e-04 | norm 135.2493 | dt 0.026 | |
| type train | step 260 | loss 26.9150 63.9809 180.8584 266.3138 | lr 3.5e-04 | norm 117.7254 | dt 0.026 | |
| type train | step 270 | loss 22.6015 54.6175 154.4110 226.2550 | lr 3.6e-04 | norm 100.9616 | dt 0.026 | |
| type train | step 280 | loss 18.9439 45.8028 132.0480 198.3277 | lr 3.7e-04 | norm 85.2726 | dt 0.026 | |
| type train | step 290 | loss 15.5676 37.4239 105.6974 160.3169 | lr 3.9e-04 | norm 69.3499 | dt 0.026 | |
| type train | step 300 | loss 13.5331 33.0442 91.6428 138.6691 | lr 4.0e-04 | norm 57.4027 | dt 0.026 | |
| type train | step 310 | loss 11.9939 28.7619 76.2531 118.7802 | lr 4.1e-04 | norm 44.9807 | dt 0.026 | |
| type train | step 320 | loss 10.3177 25.1516 65.2483 103.4113 | lr 4.3e-04 | norm 34.1352 | dt 0.027 | |
| type train | step 330 | loss 8.9162 22.3548 56.7562 88.5769 | lr 4.4e-04 | norm 24.6584 | dt 0.027 | |
| type train | step 340 | loss 7.5457 19.4839 49.5643 77.9648 | lr 4.5e-04 | norm 18.0660 | dt 0.026 | |
| type train | step 350 | loss 6.6426 17.6002 45.1024 70.2655 | lr 4.7e-04 | norm 14.3410 | dt 0.026 | |
| type train | step 360 | loss 5.7267 15.5780 40.1989 63.3912 | lr 4.8e-04 | norm 11.4895 | dt 0.026 | |
| type train | step 370 | loss 4.9218 14.0832 36.9105 59.3091 | lr 4.9e-04 | norm 9.9148 | dt 0.026 | |
| type train | step 380 | loss 4.4869 12.9945 35.3284 56.5724 | lr 5.1e-04 | norm 9.1523 | dt 0.027 | |
| type train | step 390 | loss 3.6072 11.4018 31.4185 51.3770 | lr 5.2e-04 | norm 7.0551 | dt 0.027 | |
| type train | step 400 | loss 3.1588 10.3371 29.0395 47.5967 | lr 5.3e-04 | norm 6.2993 | dt 0.027 | |
| type train | step 410 | loss 2.7178 9.5550 26.9853 45.2461 | lr 5.5e-04 | norm 5.4582 | dt 0.027 | |
| type train | step 420 | loss 2.3939 8.7711 25.0835 43.1858 | lr 5.6e-04 | norm 5.7297 | dt 0.027 | |
| type train | step 430 | loss 1.9473 7.9633 23.4601 41.0735 | lr 5.7e-04 | norm 4.6501 | dt 0.026 | |
| type train | step 440 | loss 1.6624 7.2347 21.7758 39.0239 | lr 5.9e-04 | norm 4.4029 | dt 0.027 | |
| type train | step 450 | loss 1.4510 6.7410 20.6180 37.4248 | lr 6.0e-04 | norm 3.8312 | dt 0.026 | |
| type train | step 460 | loss 1.2229 6.0923 18.8591 34.7421 | lr 6.1e-04 | norm 3.7208 | dt 0.027 | |
| type train | step 470 | loss 1.1152 5.8496 18.3193 34.3106 | lr 6.3e-04 | norm 3.3508 | dt 0.026 | |
| type train | step 480 | loss 0.9253 5.3687 17.2212 32.9390 | lr 6.4e-04 | norm 3.1339 | dt 0.027 | |
| type train | step 490 | loss 0.7863 4.9450 16.1635 31.7328 | lr 6.5e-04 | norm 3.1040 | dt 0.026 | |
| type train | step 500 | loss 0.7037 4.6449 15.4463 30.9118 | lr 6.7e-04 | norm 3.0409 | dt 0.026 | |
| type train | step 510 | loss 0.6068 4.3561 14.8469 30.2850 | lr 6.8e-04 | norm 2.8486 | dt 0.026 | |
| type train | step 520 | loss 0.5210 4.0134 14.0943 29.2939 | lr 6.9e-04 | norm 2.9168 | dt 0.026 | |
| type train | step 530 | loss 0.4968 3.8722 13.7106 28.7569 | lr 7.1e-04 | norm 2.6138 | dt 0.027 | |
| type train | step 540 | loss 0.4043 3.5326 12.4552 26.7873 | lr 7.2e-04 | norm 2.4170 | dt 0.026 | |
| type train | step 550 | loss 0.3709 3.3647 12.1792 26.6829 | lr 7.3e-04 | norm 2.7907 | dt 0.030 | |
| type train | step 560 | loss 0.3708 3.2344 11.9007 26.1986 | lr 7.5e-04 | norm 2.9301 | dt 0.036 | |
| type train | step 570 | loss 0.3257 2.9977 11.6824 26.2829 | lr 7.6e-04 | norm 2.4795 | dt 0.034 | |
| type train | step 580 | loss 0.3074 2.8220 10.6235 24.2963 | lr 7.7e-04 | norm 2.7636 | dt 0.033 | |
| type train | step 590 | loss 0.2869 2.7077 10.6868 24.8517 | lr 7.9e-04 | norm 2.5298 | dt 0.033 | |
| type train | step 600 | loss 0.2544 2.4917 9.7476 23.3804 | lr 8.0e-04 | norm 2.1773 | dt 0.033 | |
| type train | step 610 | loss 0.2454 2.3815 9.7177 23.5174 | lr 8.1e-04 | norm 2.4081 | dt 0.035 | |
| type train | step 620 | loss 0.2322 2.2631 9.3926 22.8799 | lr 8.3e-04 | norm 2.0869 | dt 0.032 | |
| type train | step 630 | loss 0.2236 2.1762 9.2836 23.1233 | lr 8.4e-04 | norm 2.3456 | dt 0.032 | |
| type train | step 640 | loss 0.2176 2.0167 8.6024 21.7992 | lr 8.5e-04 | norm 2.1584 | dt 0.030 | |
| type train | step 650 | loss 0.2235 2.0967 8.9648 22.6110 | lr 8.7e-04 | norm 2.6103 | dt 0.028 | |
| type train | step 660 | loss 0.1977 1.9060 8.1229 21.0103 | lr 8.8e-04 | norm 2.0738 | dt 0.027 | |
| type train | step 670 | loss 0.1902 1.7620 7.8279 20.9968 | lr 8.9e-04 | norm 2.2547 | dt 0.026 | |
| type train | step 680 | loss 0.1818 1.7155 7.6625 20.4752 | lr 9.1e-04 | norm 1.8188 | dt 0.026 | |
| type train | step 690 | loss 0.1838 1.7433 8.1762 21.8251 | lr 9.2e-04 | norm 2.3794 | dt 0.027 | |
| type train | step 700 | loss 0.1757 1.6087 7.3342 20.1155 | lr 9.3e-04 | norm 2.1415 | dt 0.027 | |
| type train | step 710 | loss 0.1650 1.5136 6.9878 19.6845 | lr 9.5e-04 | norm 1.9033 | dt 0.027 | |
| type train | step 720 | loss 0.1673 1.5249 7.0381 19.6879 | lr 9.6e-04 | norm 1.9481 | dt 0.027 | |
| type train | step 730 | loss 0.1656 1.4714 6.8051 19.2932 | lr 9.7e-04 | norm 2.0640 | dt 0.026 | |
| type train | step 740 | loss 0.1540 1.3699 6.5807 19.2313 | lr 9.9e-04 | norm 1.9275 | dt 0.027 | |
| type train | step 750 | loss 0.1700 1.4516 7.0608 19.8960 | lr 1.0e-03 | norm 3.1614 | dt 0.026 | |
| type train | step 760 | loss 0.1544 1.3460 6.5123 19.1121 | lr 1.0e-03 | norm 2.3650 | dt 0.027 | |
| type train | step 770 | loss 0.1444 1.2713 6.3619 18.9392 | lr 1.0e-03 | norm 2.1130 | dt 0.026 | |
| type train | step 780 | loss 0.1400 1.2640 6.1288 18.3710 | lr 1.0e-03 | norm 1.9302 | dt 0.027 | |
| type train | step 790 | loss 0.1395 1.2214 6.0664 18.4872 | lr 1.0e-03 | norm 1.9787 | dt 0.027 | |
| type train | step 800 | loss 0.1362 1.1657 5.9367 18.2790 | lr 1.0e-03 | norm 2.2475 | dt 0.026 | |
| type train | step 810 | loss 0.1418 1.2011 6.2401 18.9319 | lr 1.0e-03 | norm 2.2363 | dt 0.027 | |
| type train | step 820 | loss 0.1353 1.1304 5.8574 18.0648 | lr 1.0e-03 | norm 2.1527 | dt 0.027 | |
| type train | step 830 | loss 0.1285 1.0872 5.7234 17.8093 | lr 1.0e-03 | norm 2.0342 | dt 0.027 | |
| type train | step 840 | loss 0.1224 1.0635 5.5252 17.6231 | lr 1.0e-03 | norm 1.8122 | dt 0.027 | |
| type train | step 850 | loss 0.1296 1.0989 5.6482 17.8348 | lr 1.0e-03 | norm 2.0500 | dt 0.027 | |
| type train | step 860 | loss 0.1226 1.0064 5.3331 17.2588 | lr 1.0e-03 | norm 1.8553 | dt 0.027 | |
| type train | step 870 | loss 0.1196 0.9941 5.3431 17.4280 | lr 1.0e-03 | norm 1.9099 | dt 0.027 | |
| type train | step 880 | loss 0.1228 0.9801 5.2880 17.1490 | lr 1.0e-03 | norm 1.9079 | dt 0.027 | |
| type train | step 890 | loss 0.1211 0.9841 5.4341 17.4650 | lr 1.0e-03 | norm 2.2217 | dt 0.028 | |
| type train | step 900 | loss 0.1155 0.9412 5.0482 16.8841 | lr 1.0e-03 | norm 1.9708 | dt 0.028 | |
| type train | step 910 | loss 0.1181 0.9774 5.1747 17.0548 | lr 1.0e-03 | norm 2.0994 | dt 0.029 | |
| type train | step 920 | loss 0.1158 0.9084 5.1016 17.0275 | lr 1.0e-03 | norm 2.0561 | dt 0.026 | |
| type train | step 930 | loss 0.1133 0.8912 5.0324 16.8753 | lr 1.0e-03 | norm 1.9850 | dt 0.027 | |
| type train | step 940 | loss 0.1129 0.8793 5.1144 17.0822 | lr 1.0e-03 | norm 1.6786 | dt 0.027 | |
| type train | step 950 | loss 0.1086 0.8534 4.9312 16.8898 | lr 1.0e-03 | norm 1.8078 | dt 0.027 | |
| type train | step 960 | loss 0.1092 0.8531 4.9056 16.7642 | lr 1.0e-03 | norm 2.1063 | dt 0.026 | |
| type train | step 970 | loss 0.1066 0.8123 4.6765 16.1094 | lr 1.0e-03 | norm 1.9672 | dt 0.027 | |
| type train | step 980 | loss 0.1070 0.8160 4.7151 16.4035 | lr 1.0e-03 | norm 2.0286 | dt 0.026 | |
| type train | step 990 | loss 0.1099 0.8356 4.8885 16.7654 | lr 1.0e-03 | norm 2.0461 | dt 0.027 | |
| type train | step 1000 | loss 0.1033 0.7800 4.7839 16.6383 | lr 1.0e-03 | norm 1.6388 | dt 0.027 | |
| type train | step 1010 | loss 0.1073 0.7823 4.5947 16.0010 | lr 1.0e-03 | norm 1.7480 | dt 0.027 | |
| type train | step 1020 | loss 0.1038 0.7798 4.6478 16.2010 | lr 1.0e-03 | norm 1.8389 | dt 0.027 | |
| type train | step 1030 | loss 0.1035 0.7825 4.6055 16.1140 | lr 1.0e-03 | norm 2.2100 | dt 0.027 | |
| type train | step 1040 | loss 0.1029 0.7714 4.6442 16.3307 | lr 1.0e-03 | norm 1.7855 | dt 0.028 | |
| type train | step 1050 | loss 0.1007 0.7251 4.4724 15.8384 | lr 1.0e-03 | norm 1.7795 | dt 0.027 | |
| type train | step 1060 | loss 0.1034 0.7424 4.5736 16.2034 | lr 1.0e-03 | norm 1.7100 | dt 0.027 | |
| type train | step 1070 | loss 0.0984 0.6986 4.2960 15.3549 | lr 1.0e-03 | norm 1.7477 | dt 0.027 | |
| type train | step 1080 | loss 0.1002 0.7198 4.5029 15.8858 | lr 1.0e-03 | norm 1.7564 | dt 0.026 | |
| type train | step 1090 | loss 0.0984 0.7093 4.4400 15.9568 | lr 1.0e-03 | norm 1.7535 | dt 0.026 | |
| type train | step 1100 | loss 0.0990 0.7110 4.4753 16.0714 | lr 1.0e-03 | norm 1.8817 | dt 0.026 | |
| type train | step 1110 | loss 0.0985 0.6974 4.4007 15.8472 | lr 1.0e-03 | norm 1.8443 | dt 0.026 | |
| type train | step 1120 | loss 0.0978 0.6827 4.4471 16.1708 | lr 1.0e-03 | norm 1.7028 | dt 0.026 | |
| type train | step 1130 | loss 0.0963 0.6684 4.3764 16.0367 | lr 1.0e-03 | norm 1.9369 | dt 0.026 | |
| type train | step 1140 | loss 0.0970 0.6785 4.4066 16.0829 | lr 1.0e-03 | norm 1.7062 | dt 0.026 | |
| type train | step 1150 | loss 0.0920 0.6492 4.1671 15.2456 | lr 1.0e-03 | norm 1.6941 | dt 0.026 | |
| type train | step 1160 | loss 0.0936 0.6568 4.2033 15.6447 | lr 1.0e-03 | norm 1.9955 | dt 0.026 | |
| type train | step 1170 | loss 0.0958 0.6534 4.2737 15.5455 | lr 1.0e-03 | norm 2.1313 | dt 0.026 | |
| type train | step 1180 | loss 0.0940 0.6423 4.3529 16.2248 | lr 1.0e-03 | norm 1.8046 | dt 0.026 | |
| type train | step 1190 | loss 0.0941 0.6381 4.1122 15.1641 | lr 1.0e-03 | norm 1.8661 | dt 0.026 | |
| type train | step 1200 | loss 0.0926 0.6325 4.2209 15.6743 | lr 1.0e-03 | norm 1.8881 | dt 0.026 | |
| type train | step 1210 | loss 0.0890 0.6143 4.0130 15.0861 | lr 1.0e-03 | norm 1.5638 | dt 0.026 | |
| type train | step 1220 | loss 0.0897 0.6118 4.1757 15.7693 | lr 1.0e-03 | norm 1.9604 | dt 0.029 | |
| type train | step 1230 | loss 0.0900 0.6065 4.0611 15.3404 | lr 1.0e-03 | norm 1.5820 | dt 0.027 | |
| type train | step 1240 | loss 0.0899 0.6033 4.1256 15.7217 | lr 1.0e-03 | norm 1.8256 | dt 0.026 | |
| type train | step 1250 | loss 0.0895 0.5860 3.9421 14.9640 | lr 1.0e-03 | norm 1.6807 | dt 0.027 | |
| type train | step 1260 | loss 0.0929 0.6184 4.2101 15.4498 | lr 1.0e-03 | norm 2.0762 | dt 0.035 | |
| type train | step 1270 | loss 0.0876 0.5936 3.9306 14.7867 | lr 1.0e-03 | norm 1.6668 | dt 0.035 | |
| type train | step 1280 | loss 0.0875 0.5698 3.9110 15.1446 | lr 1.0e-03 | norm 1.8250 | dt 0.035 | |
| type train | step 1290 | loss 0.0867 0.5757 3.8789 14.7370 | lr 1.0e-03 | norm 1.4234 | dt 0.034 | |
| type train | step 1300 | loss 0.0905 0.5928 4.1887 15.9230 | lr 1.0e-03 | norm 1.7805 | dt 0.034 | |
| type train | step 1310 | loss 0.0868 0.5692 3.8863 14.8294 | lr 1.0e-03 | norm 1.7173 | dt 0.034 | |
| type train | step 1320 | loss 0.0845 0.5491 3.7654 14.5498 | lr 1.0e-03 | norm 1.5414 | dt 0.033 | |
| type train | step 1330 | loss 0.0870 0.5732 3.8937 14.8610 | lr 1.0e-03 | norm 1.6057 | dt 0.033 | |
| type train | step 1340 | loss 0.0884 0.5731 3.8134 14.6137 | lr 1.0e-03 | norm 1.7082 | dt 0.033 | |
| type train | step 1350 | loss 0.0846 0.5472 3.7522 14.5626 | lr 1.0e-03 | norm 1.5335 | dt 0.033 | |
| type train | step 1360 | loss 0.0923 0.5707 3.9944 15.0587 | lr 1.0e-03 | norm 2.4033 | dt 0.033 | |
| type train | step 1370 | loss 0.0871 0.5637 3.8848 14.9502 | lr 1.0e-03 | norm 1.8908 | dt 0.032 | |
| type train | step 1380 | loss 0.0839 0.5438 3.8358 14.8446 | lr 1.0e-03 | norm 1.6525 | dt 0.032 | |
| type train | step 1390 | loss 0.0826 0.5500 3.7744 14.5634 | lr 1.0e-03 | norm 1.6394 | dt 0.032 | |
| type train | step 1400 | loss 0.0841 0.5478 3.7527 14.6618 | lr 1.0e-03 | norm 1.6579 | dt 0.032 | |
| type train | step 1410 | loss 0.0827 0.5357 3.7437 14.4283 | lr 1.0e-03 | norm 1.8690 | dt 0.031 | |
| type train | step 1420 | loss 0.0875 0.5501 3.9296 15.1549 | lr 1.0e-03 | norm 1.7236 | dt 0.031 | |
| type train | step 1430 | loss 0.0846 0.5337 3.7298 14.5378 | lr 1.0e-03 | norm 1.7094 | dt 0.029 | |
| type train | step 1440 | loss 0.0812 0.5236 3.7292 14.5161 | lr 1.0e-03 | norm 1.6489 | dt 0.030 | |
| type train | step 1450 | loss 0.0794 0.5227 3.6660 14.4316 | lr 1.0e-03 | norm 1.5204 | dt 0.033 | |
| type train | step 1460 | loss 0.0842 0.5461 3.7541 14.7300 | lr 1.0e-03 | norm 1.7539 | dt 0.031 | |
| type train | step 1470 | loss 0.0808 0.5111 3.5810 14.1820 | lr 1.0e-03 | norm 1.5297 | dt 0.032 | |
| type train | step 1480 | loss 0.0797 0.5095 3.6247 14.4137 | lr 1.0e-03 | norm 1.5864 | dt 0.030 | |
| type train | step 1490 | loss 0.0826 0.5120 3.5984 14.1777 | lr 1.0e-03 | norm 1.5307 | dt 0.028 | |
| type train | step 1500 | loss 0.0820 0.5216 3.7615 14.5116 | lr 1.0e-03 | norm 1.9127 | dt 0.028 | |
| type train | step 1510 | loss 0.0789 0.5041 3.5307 14.1273 | lr 1.0e-03 | norm 1.6542 | dt 0.026 | |
| type train | step 1520 | loss 0.0810 0.5319 3.6447 14.2601 | lr 1.0e-03 | norm 1.7580 | dt 0.026 | |
| type train | step 1530 | loss 0.0805 0.5104 3.6061 14.2966 | lr 1.0e-03 | norm 1.7570 | dt 0.026 | |
| type train | step 1540 | loss 0.0794 0.4980 3.6074 14.2912 | lr 1.0e-03 | norm 1.7291 | dt 0.026 | |
| type train | step 1550 | loss 0.0804 0.5004 3.6730 14.5167 | lr 1.0e-03 | norm 1.4498 | dt 0.026 | |
| type train | step 1560 | loss 0.0772 0.4913 3.6179 14.4217 | lr 1.0e-03 | norm 1.5581 | dt 0.027 | |
| type train | step 1570 | loss 0.0779 0.4963 3.5823 14.3163 | lr 1.0e-03 | norm 1.8360 | dt 0.026 | |
| type train | step 1580 | loss 0.0771 0.4834 3.4501 13.7846 | lr 1.0e-03 | norm 1.7077 | dt 0.026 | |
| type train | step 1590 | loss 0.0776 0.4926 3.4666 13.9642 | lr 1.0e-03 | norm 1.7892 | dt 0.026 | |
| type train | step 1600 | loss 0.0803 0.5017 3.6501 14.3554 | lr 1.0e-03 | norm 1.7634 | dt 0.026 | |
| type train | step 1610 | loss 0.0764 0.4780 3.5701 14.3320 | lr 1.0e-03 | norm 1.4483 | dt 0.027 | |
| type train | step 1620 | loss 0.0787 0.4846 3.4957 13.8667 | lr 1.0e-03 | norm 1.5582 | dt 0.026 | |
| type train | step 1630 | loss 0.0768 0.4866 3.5206 14.0307 | lr 1.0e-03 | norm 1.6357 | dt 0.026 | |
| type train | step 1640 | loss 0.0768 0.4969 3.5157 14.0046 | lr 1.0e-03 | norm 1.9297 | dt 0.026 | |
| type train | step 1650 | loss 0.0768 0.4923 3.5336 14.1889 | lr 1.0e-03 | norm 1.5699 | dt 0.026 | |
| type train | step 1660 | loss 0.0758 0.4705 3.4499 13.8519 | lr 1.0e-03 | norm 1.6024 | dt 0.026 | |
| type train | step 1670 | loss 0.0778 0.4857 3.5294 14.1186 | lr 9.9e-04 | norm 1.4812 | dt 0.026 | |
| type train | step 1680 | loss 0.0746 0.4622 3.3354 13.4433 | lr 9.9e-04 | norm 1.5248 | dt 0.027 | |
| type train | step 1690 | loss 0.0760 0.4713 3.4952 13.9288 | lr 9.9e-04 | norm 1.5368 | dt 0.027 | |
| type train | step 1700 | loss 0.0748 0.4759 3.5103 14.0636 | lr 9.9e-04 | norm 1.5685 | dt 0.026 | |
| type train | step 1710 | loss 0.0755 0.4828 3.5404 14.1368 | lr 9.9e-04 | norm 1.6909 | dt 0.026 | |
| type train | step 1720 | loss 0.0756 0.4732 3.4817 13.9664 | lr 9.9e-04 | norm 1.6548 | dt 0.026 | |
| type train | step 1730 | loss 0.0751 0.4710 3.5276 14.3547 | lr 9.9e-04 | norm 1.5005 | dt 0.026 | |
| type train | step 1740 | loss 0.0744 0.4629 3.4986 14.1720 | lr 9.9e-04 | norm 1.7244 | dt 0.027 | |
| type train | step 1750 | loss 0.0748 0.4690 3.4989 14.2274 | lr 9.9e-04 | norm 1.5405 | dt 0.026 | |
| type train | step 1760 | loss 0.0709 0.4577 3.3503 13.5324 | lr 9.9e-04 | norm 1.5680 | dt 0.026 | |
| type train | step 1770 | loss 0.0725 0.4662 3.3713 13.8527 | lr 9.9e-04 | norm 1.8048 | dt 0.026 | |
| type train | step 1780 | loss 0.0748 0.4638 3.4437 13.8074 | lr 9.9e-04 | norm 1.8665 | dt 0.026 | |
| type train | step 1790 | loss 0.0735 0.4591 3.5316 14.5060 | lr 9.9e-04 | norm 1.6153 | dt 0.026 | |
| type train | step 1800 | loss 0.0734 0.4626 3.3692 13.4970 | lr 9.9e-04 | norm 1.6910 | dt 0.026 | |
| type train | step 1810 | loss 0.0727 0.4571 3.4373 14.0298 | lr 9.9e-04 | norm 1.7393 | dt 0.026 | |
| type train | step 1820 | loss 0.0695 0.4516 3.3028 13.4082 | lr 9.9e-04 | norm 1.4468 | dt 0.027 | |
| type train | step 1830 | loss 0.0706 0.4516 3.4725 14.1451 | lr 9.9e-04 | norm 1.7586 | dt 0.027 | |
| type train | step 1840 | loss 0.0711 0.4492 3.3582 13.7234 | lr 9.9e-04 | norm 1.4673 | dt 0.026 | |
| type train | step 1850 | loss 0.0710 0.4513 3.4161 14.1301 | lr 9.9e-04 | norm 1.6355 | dt 0.026 | |
| type train | step 1860 | loss 0.0710 0.4384 3.2829 13.4515 | lr 9.9e-04 | norm 1.5283 | dt 0.026 | |
| type train | step 1870 | loss 0.0738 0.4616 3.4819 13.9874 | lr 9.9e-04 | norm 1.9515 | dt 0.026 | |
| type train | step 1880 | loss 0.0696 0.4477 3.2757 13.2656 | lr 9.9e-04 | norm 1.5122 | dt 0.026 | |
| type train | step 1890 | loss 0.0700 0.4359 3.2712 13.6962 | lr 9.9e-04 | norm 1.6253 | dt 0.026 | |
| type train | step 1900 | loss 0.0695 0.4405 3.2498 13.2269 | lr 9.9e-04 | norm 1.3396 | dt 0.026 | |
| type train | step 1910 | loss 0.0726 0.4562 3.5262 14.4980 | lr 9.9e-04 | norm 1.6127 | dt 0.027 | |
| type train | step 1920 | loss 0.0694 0.4384 3.2872 13.4376 | lr 9.9e-04 | norm 1.5694 | dt 0.027 | |
| type train | step 1930 | loss 0.0681 0.4238 3.1771 13.2213 | lr 9.9e-04 | norm 1.3905 | dt 0.026 | |
| type train | step 1940 | loss 0.0702 0.4459 3.3032 13.4811 | lr 9.9e-04 | norm 1.4779 | dt 0.026 | |
| type train | step 1950 | loss 0.0713 0.4465 3.2289 13.1965 | lr 9.9e-04 | norm 1.5385 | dt 0.026 | |
| type train | step 1960 | loss 0.0685 0.4312 3.2044 13.2264 | lr 9.9e-04 | norm 1.4340 | dt 0.026 | |
| type train | step 1970 | loss 0.0748 0.4460 3.3756 13.7091 | lr 9.9e-04 | norm 2.1667 | dt 0.026 | |
| type train | step 1980 | loss 0.0709 0.4432 3.3488 13.6000 | lr 9.9e-04 | norm 1.7064 | dt 0.026 | |
| type train | step 1990 | loss 0.0684 0.4288 3.2762 13.5462 | lr 9.9e-04 | norm 1.5058 | dt 0.026 | |
| type train | step 2000 | loss 0.0673 0.4368 3.2496 13.2594 | lr 9.9e-04 | norm 1.5626 | dt 0.026 | |
| type train | step 2010 | loss 0.0684 0.4345 3.2283 13.3712 | lr 9.9e-04 | norm 1.5365 | dt 0.035 | |
| type train | step 2020 | loss 0.0678 0.4301 3.2423 13.1453 | lr 9.9e-04 | norm 1.7551 | dt 0.035 | |
| type train | step 2030 | loss 0.0719 0.4412 3.3910 13.9405 | lr 9.9e-04 | norm 1.5671 | dt 0.035 | |
| type train | step 2040 | loss 0.0699 0.4290 3.2205 13.3000 | lr 9.9e-04 | norm 1.5391 | dt 0.035 | |
| type train | step 2050 | loss 0.0671 0.4229 3.2381 13.3292 | lr 9.9e-04 | norm 1.5344 | dt 0.035 | |
| type train | step 2060 | loss 0.0658 0.4236 3.2005 13.2678 | lr 9.9e-04 | norm 1.4257 | dt 0.035 | |
| type train | step 2070 | loss 0.0694 0.4439 3.2789 13.5472 | lr 9.9e-04 | norm 1.6125 | dt 0.035 | |
| type train | step 2080 | loss 0.0665 0.4163 3.1201 13.0183 | lr 9.9e-04 | norm 1.4004 | dt 0.035 | |
| type train | step 2090 | loss 0.0662 0.4145 3.1662 13.2394 | lr 9.9e-04 | norm 1.5054 | dt 0.035 | |
| type train | step 2100 | loss 0.0689 0.4197 3.1498 13.1092 | lr 9.9e-04 | norm 1.4387 | dt 0.035 | |
| type train | step 2110 | loss 0.0684 0.4241 3.2951 13.4051 | lr 9.9e-04 | norm 1.7645 | dt 0.034 | |
| type train | step 2120 | loss 0.0654 0.4157 3.1238 13.0420 | lr 9.9e-04 | norm 1.5838 | dt 0.034 | |
| type train | step 2130 | loss 0.0675 0.4354 3.2209 13.1388 | lr 9.9e-04 | norm 1.5754 | dt 0.033 | |
| type train | step 2140 | loss 0.0671 0.4200 3.1866 13.1923 | lr 9.9e-04 | norm 1.6272 | dt 0.033 | |
| type train | step 2150 | loss 0.0660 0.4104 3.1787 13.1927 | lr 9.9e-04 | norm 1.5980 | dt 0.033 | |
| type train | step 2160 | loss 0.0674 0.4154 3.2400 13.4986 | lr 9.9e-04 | norm 1.3887 | dt 0.033 | |
| type train | step 2170 | loss 0.0643 0.4080 3.2232 13.3984 | lr 9.9e-04 | norm 1.4669 | dt 0.033 | |
| type train | step 2180 | loss 0.0649 0.4159 3.1885 13.2527 | lr 9.9e-04 | norm 1.6739 | dt 0.032 | |
| type train | step 2190 | loss 0.0643 0.4037 3.0696 12.8216 | lr 9.9e-04 | norm 1.6226 | dt 0.032 | |
| type train | step 2200 | loss 0.0650 0.4131 3.0833 13.0225 | lr 9.9e-04 | norm 1.6874 | dt 0.033 | |
| type train | step 2210 | loss 0.0671 0.4212 3.2359 13.3240 | lr 9.9e-04 | norm 1.6380 | dt 0.033 | |
| type train | step 2220 | loss 0.0643 0.4023 3.1811 13.3814 | lr 9.9e-04 | norm 1.3702 | dt 0.032 | |
| type train | step 2230 | loss 0.0660 0.4069 3.1333 12.9184 | lr 9.9e-04 | norm 1.4314 | dt 0.032 | |
| type train | step 2240 | loss 0.0648 0.4121 3.1512 13.0613 | lr 9.9e-04 | norm 1.5130 | dt 0.032 | |
| type train | step 2250 | loss 0.0645 0.4195 3.1636 13.0512 | lr 9.9e-04 | norm 1.7955 | dt 0.029 | |
| type train | step 2260 | loss 0.0647 0.4167 3.1784 13.2451 | lr 9.9e-04 | norm 1.4857 | dt 0.026 | |
| type train | step 2270 | loss 0.0642 0.4013 3.0849 12.8918 | lr 9.9e-04 | norm 1.5481 | dt 0.026 | |
| type train | step 2280 | loss 0.0660 0.4124 3.1809 13.2098 | lr 9.9e-04 | norm 1.4114 | dt 0.026 | |
| type train | step 2290 | loss 0.0632 0.3954 3.0097 12.5336 | lr 9.9e-04 | norm 1.4373 | dt 0.026 | |
| type train | step 2300 | loss 0.0646 0.4021 3.1344 13.0521 | lr 9.9e-04 | norm 1.4916 | dt 0.026 | |
| type train | step 2310 | loss 0.0635 0.4075 3.1654 13.1833 | lr 9.9e-04 | norm 1.4906 | dt 0.027 | |
| type train | step 2320 | loss 0.0641 0.4127 3.2001 13.1959 | lr 9.9e-04 | norm 1.5790 | dt 0.026 | |
| type train | step 2330 | loss 0.0644 0.4056 3.1262 13.1188 | lr 9.9e-04 | norm 1.5726 | dt 0.026 | |
| type train | step 2340 | loss 0.0641 0.4069 3.2019 13.5050 | lr 9.8e-04 | norm 1.4185 | dt 0.026 | |
| type train | step 2350 | loss 0.0637 0.3995 3.1736 13.2669 | lr 9.8e-04 | norm 1.6074 | dt 0.026 | |
| type train | step 2360 | loss 0.0639 0.4054 3.1534 13.3669 | lr 9.8e-04 | norm 1.4044 | dt 0.026 | |
| type train | step 2370 | loss 0.0606 0.3953 3.0490 12.6725 | lr 9.8e-04 | norm 1.4772 | dt 0.026 | |
| type train | step 2380 | loss 0.0619 0.4036 3.0749 13.0278 | lr 9.8e-04 | norm 1.6753 | dt 0.026 | |
| type train | step 2390 | loss 0.0643 0.4023 3.0971 12.9938 | lr 9.8e-04 | norm 1.7771 | dt 0.027 | |
| type train | step 2400 | loss 0.0634 0.3998 3.2198 13.6839 | lr 9.8e-04 | norm 1.5684 | dt 0.026 | |
| type train | step 2410 | loss 0.0629 0.4003 3.0626 12.6872 | lr 9.8e-04 | norm 1.6066 | dt 0.026 | |
| type train | step 2420 | loss 0.0631 0.4006 3.1295 13.2637 | lr 9.8e-04 | norm 1.6197 | dt 0.026 | |
| type train | step 2430 | loss 0.0603 0.3941 3.0311 12.6458 | lr 9.8e-04 | norm 1.3710 | dt 0.026 | |
| type train | step 2440 | loss 0.0611 0.3952 3.1849 13.3288 | lr 9.8e-04 | norm 1.6329 | dt 0.027 | |
| type train | step 2450 | loss 0.0616 0.3937 3.0704 12.9458 | lr 9.8e-04 | norm 1.3899 | dt 0.026 | |
| type train | step 2460 | loss 0.0618 0.3976 3.1244 13.3397 | lr 9.8e-04 | norm 1.5718 | dt 0.026 | |
| type train | step 2470 | loss 0.0612 0.3858 3.0213 12.6763 | lr 9.8e-04 | norm 1.4193 | dt 0.026 | |
| type train | step 2480 | loss 0.0640 0.4065 3.1806 13.2797 | lr 9.8e-04 | norm 1.8570 | dt 0.026 | |
| type train | step 2490 | loss 0.0603 0.3953 3.0083 12.5098 | lr 9.8e-04 | norm 1.4369 | dt 0.027 | |
| type train | step 2500 | loss 0.0606 0.3863 3.0158 12.9256 | lr 9.8e-04 | norm 1.5273 | dt 0.027 | |
| type train | step 2510 | loss 0.0605 0.3893 2.9963 12.4941 | lr 9.8e-04 | norm 1.2923 | dt 0.026 | |
| type train | step 2520 | loss 0.0632 0.4041 3.2488 13.6878 | lr 9.8e-04 | norm 1.5747 | dt 0.026 | |
| type train | step 2530 | loss 0.0602 0.3900 3.0298 12.7369 | lr 9.8e-04 | norm 1.4806 | dt 0.026 | |
| type train | step 2540 | loss 0.0594 0.3786 2.9327 12.5377 | lr 9.8e-04 | norm 1.3461 | dt 0.026 | |
| type train | step 2550 | loss 0.0610 0.3959 3.0586 12.7739 | lr 9.8e-04 | norm 1.4417 | dt 0.026 | |
| type train | step 2560 | loss 0.0623 0.3956 2.9932 12.4909 | lr 9.8e-04 | norm 1.4374 | dt 0.026 | |
| type train | step 2570 | loss 0.0603 0.3849 2.9665 12.5309 | lr 9.8e-04 | norm 1.3564 | dt 0.026 | |
| type train | step 2580 | loss 0.0654 0.3990 3.0918 13.0282 | lr 9.8e-04 | norm 2.0736 | dt 0.026 | |
| type train | step 2590 | loss 0.0614 0.3937 3.1063 12.9311 | lr 9.8e-04 | norm 1.6125 | dt 0.026 | |
| type train | step 2600 | loss 0.0601 0.3846 3.0265 12.8761 | lr 9.8e-04 | norm 1.4196 | dt 0.026 | |
| type train | step 2610 | loss 0.0589 0.3912 3.0146 12.5994 | lr 9.8e-04 | norm 1.4737 | dt 0.026 | |
| type train | step 2620 | loss 0.0599 0.3881 3.0098 12.7068 | lr 9.8e-04 | norm 1.4450 | dt 0.026 | |
| type train | step 2630 | loss 0.0594 0.3846 3.0035 12.5184 | lr 9.8e-04 | norm 1.6676 | dt 0.026 | |
| type train | step 2640 | loss 0.0631 0.3972 3.1359 13.2545 | lr 9.8e-04 | norm 1.5004 | dt 0.026 | |
| type train | step 2650 | loss 0.0611 0.3866 2.9916 12.6400 | lr 9.8e-04 | norm 1.4569 | dt 0.026 | |
| type train | step 2660 | loss 0.0587 0.3797 3.0154 12.6972 | lr 9.8e-04 | norm 1.4123 | dt 0.026 | |
| type train | step 2670 | loss 0.0579 0.3829 2.9934 12.6264 | lr 9.8e-04 | norm 1.3396 | dt 0.026 | |
| type train | step 2680 | loss 0.0609 0.3999 3.0662 12.9177 | lr 9.8e-04 | norm 1.5736 | dt 0.026 | |
| type train | step 2690 | loss 0.0584 0.3777 2.9146 12.3522 | lr 9.8e-04 | norm 1.3438 | dt 0.027 | |
| type train | step 2700 | loss 0.0583 0.3745 2.9476 12.6230 | lr 9.8e-04 | norm 1.4475 | dt 0.026 | |
| type train | step 2710 | loss 0.0605 0.3797 2.9319 12.4596 | lr 9.8e-04 | norm 1.3680 | dt 0.026 | |
| type train | step 2720 | loss 0.0600 0.3844 3.0700 12.8116 | lr 9.8e-04 | norm 1.7432 | dt 0.026 | |
| type train | step 2730 | loss 0.0578 0.3763 2.9280 12.4482 | lr 9.8e-04 | norm 1.5427 | dt 0.026 | |
| type train | step 2740 | loss 0.0596 0.3923 3.0125 12.5345 | lr 9.8e-04 | norm 1.4793 | dt 0.026 | |
| type train | step 2750 | loss 0.0592 0.3810 2.9766 12.5376 | lr 9.8e-04 | norm 1.5135 | dt 0.026 | |
| type train | step 2760 | loss 0.0585 0.3736 2.9595 12.6063 | lr 9.8e-04 | norm 1.5549 | dt 0.035 | |
| type train | step 2770 | loss 0.0595 0.3799 3.0299 12.8996 | lr 9.8e-04 | norm 1.3028 | dt 0.035 | |
| type train | step 2780 | loss 0.0573 0.3724 3.0046 12.8059 | lr 9.8e-04 | norm 1.3715 | dt 0.035 | |
| type train | step 2790 | loss 0.0573 0.3791 2.9848 12.6832 | lr 9.8e-04 | norm 1.6002 | dt 0.035 | |
| type train | step 2800 | loss 0.0572 0.3696 2.8782 12.2158 | lr 9.8e-04 | norm 1.5460 | dt 0.035 | |
| type train | step 2810 | loss 0.0575 0.3777 2.9009 12.4266 | lr 9.7e-04 | norm 1.6503 | dt 0.035 | |
| type train | step 2820 | loss 0.0595 0.3849 3.0258 12.7842 | lr 9.7e-04 | norm 1.5762 | dt 0.035 | |
| type train | step 2830 | loss 0.0570 0.3695 2.9796 12.8226 | lr 9.7e-04 | norm 1.3354 | dt 0.035 | |
| type train | step 2840 | loss 0.0584 0.3737 2.9320 12.3854 | lr 9.7e-04 | norm 1.3480 | dt 0.034 | |
| type train | step 2850 | loss 0.0574 0.3778 2.9575 12.4950 | lr 9.7e-04 | norm 1.4502 | dt 0.034 | |
| type train | step 2860 | loss 0.0573 0.3829 2.9774 12.4776 | lr 9.7e-04 | norm 1.6544 | dt 0.033 | |
| type train | step 2870 | loss 0.0573 0.3833 2.9853 12.6427 | lr 9.7e-04 | norm 1.4439 | dt 0.032 | |
| type train | step 2880 | loss 0.0570 0.3704 2.8908 12.4123 | lr 9.7e-04 | norm 1.5147 | dt 0.033 | |
| type train | step 2890 | loss 0.0587 0.3795 2.9976 12.6727 | lr 9.7e-04 | norm 1.3595 | dt 0.033 | |
| type train | step 2900 | loss 0.0560 0.3648 2.8406 12.0270 | lr 9.7e-04 | norm 1.4181 | dt 0.032 | |
| type train | step 2910 | loss 0.0574 0.3713 2.9560 12.5182 | lr 9.7e-04 | norm 1.4237 | dt 0.032 | |
| type train | step 2920 | loss 0.0566 0.3756 2.9799 12.6390 | lr 9.7e-04 | norm 1.4390 | dt 0.031 | |
| type train | step 2930 | loss 0.0564 0.3812 3.0100 12.6622 | lr 9.7e-04 | norm 1.5105 | dt 0.032 | |
| type train | step 2940 | loss 0.0574 0.3749 2.9452 12.6312 | lr 9.7e-04 | norm 1.5279 | dt 0.031 | |
| type train | step 2950 | loss 0.0569 0.3744 3.0191 12.9413 | lr 9.7e-04 | norm 1.3753 | dt 0.032 | |
| type train | step 2960 | loss 0.0564 0.3686 2.9912 12.7576 | lr 9.7e-04 | norm 1.5822 | dt 0.031 | |
| type train | step 2970 | loss 0.0570 0.3736 2.9863 12.8765 | lr 9.7e-04 | norm 1.3487 | dt 0.031 | |
| type train | step 2980 | loss 0.0541 0.3648 2.8799 12.1502 | lr 9.7e-04 | norm 1.3934 | dt 0.031 | |
| type train | step 2990 | loss 0.0549 0.3733 2.9162 12.4947 | lr 9.7e-04 | norm 1.6296 | dt 0.028 | |
| type train | step 3000 | loss 0.0570 0.3719 2.9185 12.5105 | lr 9.7e-04 | norm 1.7274 | dt 0.027 | |
| type train | step 3010 | loss 0.0563 0.3692 3.0372 13.1390 | lr 9.7e-04 | norm 1.5003 | dt 0.026 | |
| type train | step 3020 | loss 0.0557 0.3693 2.8954 12.1740 | lr 9.7e-04 | norm 1.4866 | dt 0.026 | |
| type train | step 3030 | loss 0.0563 0.3715 2.9625 12.7532 | lr 9.7e-04 | norm 1.4703 | dt 0.026 | |
| type train | step 3040 | loss 0.0536 0.3653 2.8800 12.1641 | lr 9.7e-04 | norm 1.3349 | dt 0.027 | |
| type train | step 3050 | loss 0.0544 0.3668 3.0124 12.7951 | lr 9.7e-04 | norm 1.5953 | dt 0.029 | |
| type train | step 3060 | loss 0.0549 0.3655 2.9085 12.5088 | lr 9.7e-04 | norm 1.3218 | dt 0.026 | |
| type train | step 3070 | loss 0.0552 0.3698 2.9763 12.8628 | lr 9.7e-04 | norm 1.5182 | dt 0.026 | |
| type train | step 3080 | loss 0.0546 0.3590 2.8586 12.1906 | lr 9.7e-04 | norm 1.3294 | dt 0.026 | |
| type train | step 3090 | loss 0.0570 0.3776 3.0171 12.7756 | lr 9.7e-04 | norm 1.7941 | dt 0.026 | |
| type train | step 3100 | loss 0.0537 0.3672 2.8606 12.0629 | lr 9.7e-04 | norm 1.3783 | dt 0.026 | |
| type train | step 3110 | loss 0.0539 0.3603 2.8682 12.4401 | lr 9.7e-04 | norm 1.4734 | dt 0.026 | |
| type train | step 3120 | loss 0.0538 0.3638 2.8466 12.0401 | lr 9.7e-04 | norm 1.2240 | dt 0.026 | |
| type train | step 3130 | loss 0.0565 0.3768 3.0864 13.2308 | lr 9.7e-04 | norm 1.5429 | dt 0.027 | |
| type train | step 3140 | loss 0.0540 0.3619 2.8787 12.2734 | lr 9.7e-04 | norm 1.5044 | dt 0.026 | |
| type train | step 3150 | loss 0.0533 0.3527 2.7966 12.1054 | lr 9.7e-04 | norm 1.3185 | dt 0.026 | |
| type train | step 3160 | loss 0.0545 0.3693 2.9086 12.3208 | lr 9.7e-04 | norm 1.3682 | dt 0.026 | |
| type train | step 3170 | loss 0.0556 0.3686 2.8377 12.0070 | lr 9.7e-04 | norm 1.3672 | dt 0.026 | |
| type train | step 3180 | loss 0.0538 0.3601 2.8316 12.0836 | lr 9.7e-04 | norm 1.3284 | dt 0.026 | |
| type train | step 3190 | loss 0.0586 0.3724 2.9265 12.6522 | lr 9.6e-04 | norm 2.0321 | dt 0.026 | |
| type train | step 3200 | loss 0.0550 0.3666 2.9529 12.4850 | lr 9.6e-04 | norm 1.6238 | dt 0.026 | |
| type train | step 3210 | loss 0.0538 0.3596 2.8876 12.4287 | lr 9.6e-04 | norm 1.3811 | dt 0.026 | |
| type train | step 3220 | loss 0.0527 0.3652 2.8706 12.1574 | lr 9.6e-04 | norm 1.4491 | dt 0.026 | |
| type train | step 3230 | loss 0.0541 0.3622 2.8619 12.2207 | lr 9.6e-04 | norm 1.4326 | dt 0.026 | |
| type train | step 3240 | loss 0.0532 0.3586 2.8571 12.0854 | lr 9.6e-04 | norm 1.6410 | dt 0.026 | |
| type train | step 3250 | loss 0.0567 0.3720 2.9847 12.8348 | lr 9.6e-04 | norm 1.4567 | dt 0.026 | |
| type train | step 3260 | loss 0.0547 0.3615 2.8487 12.1844 | lr 9.6e-04 | norm 1.4308 | dt 0.026 | |
| type train | step 3270 | loss 0.0528 0.3557 2.8789 12.2907 | lr 9.6e-04 | norm 1.3765 | dt 0.026 | |
| type train | step 3280 | loss 0.0519 0.3569 2.8621 12.2182 | lr 9.6e-04 | norm 1.3759 | dt 0.026 | |
| type train | step 3290 | loss 0.0547 0.3716 2.9264 12.4714 | lr 9.6e-04 | norm 1.4985 | dt 0.026 | |
| type train | step 3300 | loss 0.0525 0.3527 2.7903 11.9076 | lr 9.6e-04 | norm 1.2942 | dt 0.026 | |
| type train | step 3310 | loss 0.0527 0.3509 2.8199 12.2049 | lr 9.6e-04 | norm 1.3831 | dt 0.026 | |
| type train | step 3320 | loss 0.0545 0.3553 2.8058 12.0528 | lr 9.6e-04 | norm 1.3591 | dt 0.026 | |
| type train | step 3330 | loss 0.0542 0.3594 2.9460 12.3934 | lr 9.6e-04 | norm 1.7233 | dt 0.026 | |
| type train | step 3340 | loss 0.0522 0.3518 2.8050 12.0490 | lr 9.6e-04 | norm 1.5194 | dt 0.026 | |
| type train | step 3350 | loss 0.0538 0.3673 2.8799 12.1134 | lr 9.6e-04 | norm 1.4546 | dt 0.026 | |
| type train | step 3360 | loss 0.0534 0.3576 2.8435 12.1523 | lr 9.6e-04 | norm 1.4932 | dt 0.026 | |
| type train | step 3370 | loss 0.0528 0.3507 2.8395 12.1776 | lr 9.6e-04 | norm 1.4997 | dt 0.026 | |
| type train | step 3380 | loss 0.0536 0.3558 2.9088 12.4776 | lr 9.6e-04 | norm 1.2610 | dt 0.026 | |
| type train | step 3390 | loss 0.0517 0.3486 2.8752 12.3821 | lr 9.6e-04 | norm 1.3621 | dt 0.026 | |
| type train | step 3400 | loss 0.0518 0.3559 2.8632 12.2971 | lr 9.6e-04 | norm 1.6112 | dt 0.026 | |
| type train | step 3410 | loss 0.0516 0.3462 2.7581 11.8244 | lr 9.6e-04 | norm 1.5056 | dt 0.026 | |
| type train | step 3420 | loss 0.0519 0.3548 2.7874 12.0641 | lr 9.6e-04 | norm 1.5876 | dt 0.026 | |
| type train | step 3430 | loss 0.0542 0.3605 2.9079 12.3929 | lr 9.6e-04 | norm 1.5410 | dt 0.026 | |
| type train | step 3440 | loss 0.0518 0.3462 2.8642 12.4473 | lr 9.6e-04 | norm 1.2973 | dt 0.026 | |
| type train | step 3450 | loss 0.0531 0.3502 2.8141 11.9815 | lr 9.6e-04 | norm 1.3299 | dt 0.026 | |
| type train | step 3460 | loss 0.0518 0.3544 2.8410 12.0994 | lr 9.6e-04 | norm 1.4403 | dt 0.026 | |
| type train | step 3470 | loss 0.0516 0.3592 2.8599 12.0520 | lr 9.6e-04 | norm 1.6207 | dt 0.026 | |
| type train | step 3480 | loss 0.0518 0.3599 2.8769 12.2525 | lr 9.6e-04 | norm 1.3826 | dt 0.026 | |
| type train | step 3490 | loss 0.0516 0.3483 2.7758 12.0199 | lr 9.6e-04 | norm 1.4527 | dt 0.026 | |
| type train | step 3500 | loss 0.0530 0.3557 2.8834 12.2728 | lr 9.6e-04 | norm 1.3433 | dt 0.026 | |
| type train | step 3510 | loss 0.0508 0.3416 2.7317 11.6389 | lr 9.6e-04 | norm 1.3865 | dt 0.026 | |
| type train | step 3520 | loss 0.0523 0.3481 2.8440 12.1369 | lr 9.5e-04 | norm 1.3877 | dt 0.026 | |
| type train | step 3530 | loss 0.0513 0.3531 2.8741 12.2578 | lr 9.5e-04 | norm 1.4026 | dt 0.026 | |
| type train | step 3540 | loss 0.0508 0.3581 2.8945 12.2824 | lr 9.5e-04 | norm 1.4656 | dt 0.027 | |
| type train | step 3550 | loss 0.0522 0.3522 2.8292 12.2312 | lr 9.5e-04 | norm 1.5012 | dt 0.026 | |
| type train | step 3560 | loss 0.0517 0.3527 2.9087 12.5273 | lr 9.5e-04 | norm 1.3705 | dt 0.027 | |
| type train | step 3570 | loss 0.0512 0.3473 2.8836 12.3574 | lr 9.5e-04 | norm 1.5414 | dt 0.026 | |
| type train | step 3580 | loss 0.0518 0.3518 2.8807 12.4823 | lr 9.5e-04 | norm 1.3589 | dt 0.026 | |
| type train | step 3590 | loss 0.0493 0.3431 2.7844 11.7762 | lr 9.5e-04 | norm 1.3907 | dt 0.026 | |
| type train | step 3600 | loss 0.0502 0.3513 2.8247 12.1535 | lr 9.5e-04 | norm 1.6072 | dt 0.026 | |
| type train | step 3610 | loss 0.0520 0.3498 2.8093 12.1149 | lr 9.5e-04 | norm 1.6690 | dt 0.027 | |
| type train | step 3620 | loss 0.0515 0.3479 2.9285 12.7527 | lr 9.5e-04 | norm 1.4445 | dt 0.026 | |
| type train | step 3630 | loss 0.0506 0.3472 2.7962 11.8101 | lr 9.5e-04 | norm 1.4783 | dt 0.026 | |
| type train | step 3640 | loss 0.0513 0.3506 2.8587 12.3671 | lr 9.5e-04 | norm 1.4835 | dt 0.026 | |
| type train | step 3650 | loss 0.0488 0.3439 2.7878 11.8101 | lr 9.5e-04 | norm 1.3016 | dt 0.027 | |
| type train | step 3660 | loss 0.0495 0.3461 2.9068 12.4179 | lr 9.5e-04 | norm 1.6389 | dt 0.029 | |
| type train | step 3670 | loss 0.0496 0.3447 2.8144 12.1220 | lr 9.5e-04 | norm 1.2853 | dt 0.027 | |
| type train | step 3680 | loss 0.0504 0.3483 2.8763 12.4785 | lr 9.5e-04 | norm 1.4799 | dt 0.026 | |
| type train | step 3690 | loss 0.0499 0.3387 2.7625 11.7823 | lr 9.5e-04 | norm 1.3343 | dt 0.026 | |
| type train | step 3700 | loss 0.0520 0.3565 2.9124 12.3612 | lr 9.5e-04 | norm 1.7606 | dt 0.026 | |
| type train | step 3710 | loss 0.0489 0.3464 2.7724 11.6857 | lr 9.5e-04 | norm 1.3478 | dt 0.026 | |
| type train | step 3720 | loss 0.0490 0.3406 2.7743 12.0722 | lr 9.5e-04 | norm 1.4453 | dt 0.026 | |
| type train | step 3730 | loss 0.0489 0.3446 2.7567 11.6975 | lr 9.5e-04 | norm 1.1885 | dt 0.027 | |
| type train | step 3740 | loss 0.0514 0.3561 2.9801 12.8716 | lr 9.5e-04 | norm 1.5256 | dt 0.026 | |
| type train | step 3750 | loss 0.0491 0.3420 2.7925 11.9101 | lr 9.5e-04 | norm 1.4656 | dt 0.026 | |
| type train | step 3760 | loss 0.0485 0.3335 2.7103 11.7693 | lr 9.5e-04 | norm 1.3072 | dt 0.026 | |
| type train | step 3770 | loss 0.0496 0.3493 2.8189 11.9395 | lr 9.5e-04 | norm 1.3378 | dt 0.026 | |
| type train | step 3780 | loss 0.0507 0.3490 2.7560 11.6340 | lr 9.5e-04 | norm 1.3456 | dt 0.026 | |
| type train | step 3790 | loss 0.0491 0.3413 2.7421 11.7247 | lr 9.5e-04 | norm 1.3153 | dt 0.026 | |
| type train | step 3800 | loss 0.0534 0.3531 2.8140 12.2175 | lr 9.5e-04 | norm 1.9472 | dt 0.026 | |
| type train | step 3810 | loss 0.0502 0.3468 2.8561 12.0985 | lr 9.5e-04 | norm 1.6335 | dt 0.027 | |
| type train | step 3820 | loss 0.0488 0.3401 2.8059 12.0544 | lr 9.4e-04 | norm 1.4002 | dt 0.026 | |
| type train | step 3830 | loss 0.0479 0.3454 2.7890 11.7771 | lr 9.4e-04 | norm 1.4230 | dt 0.026 | |
| type train | step 3840 | loss 0.0493 0.3439 2.7801 11.8475 | lr 9.4e-04 | norm 1.3754 | dt 0.026 | |
| type train | step 3850 | loss 0.0485 0.3401 2.7631 11.7379 | lr 9.4e-04 | norm 1.6225 | dt 0.027 | |
| type train | step 3860 | loss 0.0518 0.3533 2.8810 12.4308 | lr 9.4e-04 | norm 1.4451 | dt 0.026 | |
| type train | step 3870 | loss 0.0502 0.3434 2.7619 11.8238 | lr 9.4e-04 | norm 1.3875 | dt 0.026 | |
| type train | step 3880 | loss 0.0482 0.3372 2.7950 11.9312 | lr 9.4e-04 | norm 1.3598 | dt 0.026 | |
| type train | step 3890 | loss 0.0474 0.3388 2.7826 11.8510 | lr 9.4e-04 | norm 1.3127 | dt 0.026 | |
| type train | step 3900 | loss 0.0498 0.3528 2.8484 12.0868 | lr 9.4e-04 | norm 1.4988 | dt 0.026 | |
| type train | step 3910 | loss 0.0478 0.3354 2.7100 11.5507 | lr 9.4e-04 | norm 1.2853 | dt 0.026 | |
| type train | step 3920 | loss 0.0481 0.3332 2.7376 11.8433 | lr 9.4e-04 | norm 1.3646 | dt 0.026 | |
| type train | step 3930 | loss 0.0498 0.3378 2.7290 11.6732 | lr 9.4e-04 | norm 1.3463 | dt 0.026 | |
| type train | step 3940 | loss 0.0495 0.3406 2.8670 12.0447 | lr 9.4e-04 | norm 1.7119 | dt 0.026 | |
| type train | step 3950 | loss 0.0474 0.3343 2.7258 11.6863 | lr 9.4e-04 | norm 1.5147 | dt 0.026 | |
| type train | step 3960 | loss 0.0489 0.3489 2.8043 11.7551 | lr 9.4e-04 | norm 1.4301 | dt 0.026 | |
| type train | step 3970 | loss 0.0488 0.3406 2.7663 11.8128 | lr 9.4e-04 | norm 1.4610 | dt 0.026 | |
| type train | step 3980 | loss 0.0485 0.3336 2.7564 11.8202 | lr 9.4e-04 | norm 1.4938 | dt 0.026 | |
| type train | step 3990 | loss 0.0492 0.3382 2.8286 12.1149 | lr 9.4e-04 | norm 1.2664 | dt 0.026 | |
| type train | step 4000 | loss 0.0474 0.3314 2.7931 12.0543 | lr 9.4e-04 | norm 1.3516 | dt 0.026 | |
| type train | step 4010 | loss 0.0474 0.3387 2.7879 11.9248 | lr 9.4e-04 | norm 1.6107 | dt 0.026 | |
| type train | step 4020 | loss 0.0474 0.3291 2.6820 11.4700 | lr 9.4e-04 | norm 1.4641 | dt 0.026 | |
| type train | step 4030 | loss 0.0474 0.3377 2.7110 11.6968 | lr 9.4e-04 | norm 1.5485 | dt 0.026 | |
| type train | step 4040 | loss 0.0496 0.3435 2.8254 12.0315 | lr 9.4e-04 | norm 1.4801 | dt 0.026 | |
| type train | step 4050 | loss 0.0474 0.3305 2.7872 12.0963 | lr 9.4e-04 | norm 1.2661 | dt 0.026 | |
| type train | step 4060 | loss 0.0484 0.3339 2.7441 11.6221 | lr 9.4e-04 | norm 1.2822 | dt 0.026 | |
| type train | step 4070 | loss 0.0475 0.3374 2.7630 11.7579 | lr 9.4e-04 | norm 1.4119 | dt 0.026 | |
| type train | step 4080 | loss 0.0471 0.3423 2.7886 11.6672 | lr 9.4e-04 | norm 1.5736 | dt 0.026 | |
| type train | step 4090 | loss 0.0472 0.3433 2.7990 11.9155 | lr 9.3e-04 | norm 1.3420 | dt 0.026 | |
| type train | step 4100 | loss 0.0474 0.3324 2.7045 11.6664 | lr 9.3e-04 | norm 1.4292 | dt 0.026 | |
| type train | step 4110 | loss 0.0487 0.3391 2.8059 11.9233 | lr 9.3e-04 | norm 1.3014 | dt 0.026 | |
| type train | step 4120 | loss 0.0466 0.3263 2.6584 11.3165 | lr 9.3e-04 | norm 1.3533 | dt 0.026 | |
| type train | step 4130 | loss 0.0481 0.3319 2.7707 11.7826 | lr 9.3e-04 | norm 1.4037 | dt 0.026 | |
| type train | step 4140 | loss 0.0472 0.3360 2.7998 11.9178 | lr 9.3e-04 | norm 1.3860 | dt 0.026 | |
| type train | step 4150 | loss 0.0470 0.3420 2.8191 11.9558 | lr 9.3e-04 | norm 1.4335 | dt 0.026 | |
| type train | step 4160 | loss 0.0479 0.3358 2.7575 11.8895 | lr 9.3e-04 | norm 1.4539 | dt 0.026 | |
| type train | step 4170 | loss 0.0474 0.3373 2.8331 12.2122 | lr 9.3e-04 | norm 1.3427 | dt 0.026 | |
| type train | step 4180 | loss 0.0469 0.3317 2.8127 12.0515 | lr 9.3e-04 | norm 1.5261 | dt 0.026 | |
| type train | step 4190 | loss 0.0478 0.3365 2.8119 12.1361 | lr 9.3e-04 | norm 1.3323 | dt 0.026 | |
| type train | step 4200 | loss 0.0452 0.3280 2.7142 11.4598 | lr 9.3e-04 | norm 1.3662 | dt 0.026 | |
| type train | step 4210 | loss 0.0458 0.3352 2.7534 11.8218 | lr 9.3e-04 | norm 1.5690 | dt 0.026 | |
| type train | step 4220 | loss 0.0478 0.3348 2.7384 11.8014 | lr 9.3e-04 | norm 1.6360 | dt 0.026 | |
| type train | step 4230 | loss 0.0472 0.3337 2.8476 12.4515 | lr 9.3e-04 | norm 1.4290 | dt 0.026 | |
| type train | step 4240 | loss 0.0464 0.3325 2.7319 11.4883 | lr 9.3e-04 | norm 1.4601 | dt 0.026 | |
| type train | step 4250 | loss 0.0472 0.3358 2.7885 12.0192 | lr 9.3e-04 | norm 1.4558 | dt 0.026 | |
| type train | step 4260 | loss 0.0447 0.3290 2.7176 11.5048 | lr 9.3e-04 | norm 1.2513 | dt 0.025 | |
| type train | step 4270 | loss 0.0456 0.3320 2.8367 12.0966 | lr 9.3e-04 | norm 1.6007 | dt 0.027 | |
| type train | step 4280 | loss 0.0457 0.3313 2.7536 11.7851 | lr 9.3e-04 | norm 1.2816 | dt 0.026 | |
| type train | step 4290 | loss 0.0465 0.3339 2.8088 12.1647 | lr 9.3e-04 | norm 1.4539 | dt 0.026 | |
| type train | step 4300 | loss 0.0459 0.3245 2.6944 11.5016 | lr 9.3e-04 | norm 1.3256 | dt 0.026 | |
| type train | step 4310 | loss 0.0480 0.3425 2.8418 12.0663 | lr 9.3e-04 | norm 1.7191 | dt 0.026 | |
| type train | step 4320 | loss 0.0449 0.3319 2.7113 11.4019 | lr 9.3e-04 | norm 1.3048 | dt 0.026 | |
| type train | step 4330 | loss 0.0451 0.3254 2.7151 11.7715 | lr 9.3e-04 | norm 1.4615 | dt 0.026 | |
| type train | step 4340 | loss 0.0450 0.3307 2.6904 11.3986 | lr 9.2e-04 | norm 1.1581 | dt 0.026 | |
| type train | step 4350 | loss 0.0473 0.3425 2.9100 12.5642 | lr 9.2e-04 | norm 1.4907 | dt 0.026 | |
| type train | step 4360 | loss 0.0453 0.3279 2.7273 11.6458 | lr 9.2e-04 | norm 1.4868 | dt 0.026 | |
| type train | step 4370 | loss 0.0449 0.3197 2.6476 11.4698 | lr 9.2e-04 | norm 1.2992 | dt 0.027 | |
| type train | step 4380 | loss 0.0455 0.3351 2.7534 11.6453 | lr 9.2e-04 | norm 1.3308 | dt 0.026 | |
| type train | step 4390 | loss 0.0468 0.3349 2.6929 11.3633 | lr 9.2e-04 | norm 1.3111 | dt 0.026 | |
| type train | step 4400 | loss 0.0451 0.3288 2.6836 11.4455 | lr 9.2e-04 | norm 1.2649 | dt 0.026 | |
| type train | step 4410 | loss 0.0493 0.3390 2.7412 11.8685 | lr 9.2e-04 | norm 1.9474 | dt 0.026 | |
| type train | step 4420 | loss 0.0461 0.3333 2.7920 11.8306 | lr 9.2e-04 | norm 1.5993 | dt 0.026 | |
| type train | step 4430 | loss 0.0452 0.3264 2.7420 11.7608 | lr 9.2e-04 | norm 1.3795 | dt 0.026 | |
| type train | step 4440 | loss 0.0442 0.3317 2.7269 11.5267 | lr 9.2e-04 | norm 1.4001 | dt 0.026 | |
| type train | step 4450 | loss 0.0456 0.3305 2.7262 11.5793 | lr 9.2e-04 | norm 1.3541 | dt 0.026 | |
| type train | step 4460 | loss 0.0446 0.3269 2.7004 11.4406 | lr 9.2e-04 | norm 1.5749 | dt 0.026 | |
| type train | step 4470 | loss 0.0480 0.3399 2.8151 12.1593 | lr 9.2e-04 | norm 1.4523 | dt 0.026 | |
| type train | step 4480 | loss 0.0465 0.3302 2.6912 11.5746 | lr 9.2e-04 | norm 1.3941 | dt 0.026 | |
| type train | step 4490 | loss 0.0447 0.3239 2.7347 11.6785 | lr 9.2e-04 | norm 1.3900 | dt 0.026 | |
| type train | step 4500 | loss 0.0440 0.3248 2.7231 11.5861 | lr 9.2e-04 | norm 1.2695 | dt 0.026 | |
| type train | step 4510 | loss 0.0459 0.3380 2.7955 11.8315 | lr 9.2e-04 | norm 1.4690 | dt 0.026 | |
| type train | step 4520 | loss 0.0444 0.3227 2.6538 11.2726 | lr 9.2e-04 | norm 1.2401 | dt 0.026 | |
| type train | step 4530 | loss 0.0448 0.3209 2.6821 11.5980 | lr 9.2e-04 | norm 1.3498 | dt 0.026 | |
| type train | step 4540 | loss 0.0463 0.3256 2.6643 11.3999 | lr 9.2e-04 | norm 1.3091 | dt 0.026 | |
| type train | step 4550 | loss 0.0462 0.3275 2.8028 11.7909 | lr 9.2e-04 | norm 1.7373 | dt 0.026 | |
| type train | step 4560 | loss 0.0439 0.3225 2.6651 11.4719 | lr 9.2e-04 | norm 1.4676 | dt 0.026 | |
| type train | step 4570 | loss 0.0456 0.3361 2.7473 11.5105 | lr 9.2e-04 | norm 1.4228 | dt 0.026 | |
| type train | step 4580 | loss 0.0454 0.3285 2.7135 11.5703 | lr 9.1e-04 | norm 1.4623 | dt 0.026 | |
| type train | step 4590 | loss 0.0448 0.3214 2.6986 11.5916 | lr 9.1e-04 | norm 1.4515 | dt 0.026 | |
| type train | step 4600 | loss 0.0455 0.3257 2.7667 11.8803 | lr 9.1e-04 | norm 1.2482 | dt 0.026 | |
| type train | step 4610 | loss 0.0440 0.3194 2.7373 11.8250 | lr 9.1e-04 | norm 1.3579 | dt 0.026 | |
| type train | step 4620 | loss 0.0442 0.3270 2.7309 11.6765 | lr 9.1e-04 | norm 1.5941 | dt 0.026 | |
| type train | step 4630 | loss 0.0439 0.3181 2.6321 11.2259 | lr 9.1e-04 | norm 1.4586 | dt 0.026 | |
| type train | step 4640 | loss 0.0442 0.3261 2.6571 11.4548 | lr 9.1e-04 | norm 1.5592 | dt 0.026 | |
| type train | step 4650 | loss 0.0462 0.3310 2.7606 11.7724 | lr 9.1e-04 | norm 1.4770 | dt 0.026 | |
| type train | step 4660 | loss 0.0441 0.3192 2.7310 11.8508 | lr 9.1e-04 | norm 1.2632 | dt 0.026 | |
| type train | step 4670 | loss 0.0449 0.3227 2.6892 11.4097 | lr 9.1e-04 | norm 1.2944 | dt 0.026 | |
| type train | step 4680 | loss 0.0441 0.3255 2.7067 11.5358 | lr 9.1e-04 | norm 1.4254 | dt 0.026 | |
| type train | step 4690 | loss 0.0439 0.3307 2.7369 11.4248 | lr 9.1e-04 | norm 1.5495 | dt 0.026 | |
| type train | step 4700 | loss 0.0441 0.3316 2.7482 11.6806 | lr 9.1e-04 | norm 1.3199 | dt 0.026 | |
| type train | step 4710 | loss 0.0443 0.3204 2.6531 11.4583 | lr 9.1e-04 | norm 1.4052 | dt 0.026 | |
| type train | step 4720 | loss 0.0452 0.3276 2.7436 11.7023 | lr 9.1e-04 | norm 1.2844 | dt 0.026 | |
| type train | step 4730 | loss 0.0435 0.3155 2.6014 11.1115 | lr 9.1e-04 | norm 1.3429 | dt 0.026 | |
| type train | step 4740 | loss 0.0448 0.3216 2.7187 11.5458 | lr 9.1e-04 | norm 1.3723 | dt 0.026 | |
| type train | step 4750 | loss 0.0435 0.3251 2.7457 11.6736 | lr 9.1e-04 | norm 1.3346 | dt 0.026 | |
| type train | step 4760 | loss 0.0436 0.3315 2.7720 11.7099 | lr 9.1e-04 | norm 1.4388 | dt 0.026 | |
| type train | step 4770 | loss 0.0448 0.3247 2.7035 11.6983 | lr 9.1e-04 | norm 1.4432 | dt 0.026 | |
| type train | step 4780 | loss 0.0443 0.3259 2.7806 11.9875 | lr 9.1e-04 | norm 1.3111 | dt 0.026 | |
| type train | step 4790 | loss 0.0438 0.3210 2.7620 11.8384 | lr 9.1e-04 | norm 1.5093 | dt 0.026 | |
| type train | step 4800 | loss 0.0448 0.3255 2.7634 11.9188 | lr 9.1e-04 | norm 1.3244 | dt 0.026 | |
| type train | step 4810 | loss 0.0421 0.3165 2.6613 11.2496 | lr 9.0e-04 | norm 1.3279 | dt 0.026 | |
| type train | step 4820 | loss 0.0428 0.3252 2.7094 11.5920 | lr 9.0e-04 | norm 1.5727 | dt 0.027 | |
| type train | step 4830 | loss 0.0448 0.3238 2.6875 11.6023 | lr 9.0e-04 | norm 1.6290 | dt 0.026 | |
| type train | step 4840 | loss 0.0440 0.3227 2.7916 12.2199 | lr 9.0e-04 | norm 1.3941 | dt 0.026 | |
| type train | step 4850 | loss 0.0433 0.3217 2.6814 11.2714 | lr 9.0e-04 | norm 1.4697 | dt 0.026 | |
| type train | step 4860 | loss 0.0443 0.3248 2.7380 11.8080 | lr 9.0e-04 | norm 1.4542 | dt 0.026 | |
| type train | step 4870 | loss 0.0420 0.3177 2.6685 11.3135 | lr 9.0e-04 | norm 1.2513 | dt 0.026 | |
| type train | step 4880 | loss 0.0427 0.3214 2.7876 11.8927 | lr 9.0e-04 | norm 1.5744 | dt 0.028 | |
| type train | step 4890 | loss 0.0427 0.3210 2.7076 11.5889 | lr 9.0e-04 | norm 1.2619 | dt 0.026 | |
| type train | step 4900 | loss 0.0434 0.3229 2.7555 11.9733 | lr 9.0e-04 | norm 1.4307 | dt 0.026 | |
| type train | step 4910 | loss 0.0430 0.3135 2.6444 11.2978 | lr 9.0e-04 | norm 1.3121 | dt 0.026 | |
| type train | step 4920 | loss 0.0449 0.3318 2.7937 11.8584 | lr 9.0e-04 | norm 1.6825 | dt 0.026 | |
| type train | step 4930 | loss 0.0421 0.3212 2.6637 11.2089 | lr 9.0e-04 | norm 1.2903 | dt 0.026 | |
| type train | step 4940 | loss 0.0421 0.3155 2.6729 11.5812 | lr 9.0e-04 | norm 1.4359 | dt 0.026 | |
| type train | step 4950 | loss 0.0420 0.3200 2.6431 11.2049 | lr 9.0e-04 | norm 1.1171 | dt 0.026 | |
| type train | step 4960 | loss 0.0444 0.3327 2.8558 12.3490 | lr 9.0e-04 | norm 1.4665 | dt 0.026 | |
| type train | step 4970 | loss 0.0425 0.3180 2.6787 11.4679 | lr 9.0e-04 | norm 1.4447 | dt 0.026 | |
| type train | step 4980 | loss 0.0423 0.3107 2.5994 11.2778 | lr 9.0e-04 | norm 1.2620 | dt 0.026 | |
| type train | step 4990 | loss 0.0428 0.3238 2.7052 11.4484 | lr 9.0e-04 | norm 1.2951 | dt 0.026 | |
| type train | step 5000 | loss 0.0438 0.3247 2.6483 11.1502 | lr 9.0e-04 | norm 1.2976 | dt 0.026 | |
| type train | step 5010 | loss 0.0424 0.3185 2.6370 11.2714 | lr 9.0e-04 | norm 1.2290 | dt 0.025 | |
| type train | step 5020 | loss 0.0462 0.3291 2.6895 11.6882 | lr 9.0e-04 | norm 1.9387 | dt 0.026 | |
| type train | step 5030 | loss 0.0434 0.3231 2.7428 11.6373 | lr 8.9e-04 | norm 1.5812 | dt 0.026 | |
| type train | step 5040 | loss 0.0425 0.3172 2.6939 11.6006 | lr 8.9e-04 | norm 1.3326 | dt 0.026 | |
| type train | step 5050 | loss 0.0415 0.3213 2.6815 11.3524 | lr 8.9e-04 | norm 1.3950 | dt 0.026 | |
| type train | step 5060 | loss 0.0427 0.3204 2.6840 11.3727 | lr 8.9e-04 | norm 1.3263 | dt 0.026 | |
| type train | step 5070 | loss 0.0418 0.3178 2.6527 11.2670 | lr 8.9e-04 | norm 1.5291 | dt 0.026 | |
| type train | step 5080 | loss 0.0450 0.3298 2.7665 11.9784 | lr 8.9e-04 | norm 1.3960 | dt 0.027 | |
| type train | step 5090 | loss 0.0437 0.3207 2.6427 11.3854 | lr 8.9e-04 | norm 1.3330 | dt 0.026 | |
| type train | step 5100 | loss 0.0420 0.3143 2.6936 11.5060 | lr 8.9e-04 | norm 1.3917 | dt 0.026 | |
| type train | step 5110 | loss 0.0411 0.3152 2.6771 11.4120 | lr 8.9e-04 | norm 1.2688 | dt 0.027 | |
| type train | step 5120 | loss 0.0431 0.3280 2.7505 11.6269 | lr 8.9e-04 | norm 1.4385 | dt 0.026 | |
| type train | step 5130 | loss 0.0417 0.3136 2.6104 11.1336 | lr 8.9e-04 | norm 1.2632 | dt 0.027 | |
| type train | step 5140 | loss 0.0420 0.3125 2.6386 11.4224 | lr 8.9e-04 | norm 1.3306 | dt 0.026 | |
| type train | step 5150 | loss 0.0434 0.3161 2.6220 11.2387 | lr 8.9e-04 | norm 1.3143 | dt 0.026 | |
| type train | step 5160 | loss 0.0433 0.3177 2.7560 11.6177 | lr 8.9e-04 | norm 1.7034 | dt 0.026 | |
| type train | step 5170 | loss 0.0413 0.3138 2.6199 11.3083 | lr 8.9e-04 | norm 1.4455 | dt 0.027 | |
| type train | step 5180 | loss 0.0429 0.3270 2.7077 11.3174 | lr 8.9e-04 | norm 1.4094 | dt 0.026 | |
| type train | step 5190 | loss 0.0426 0.3194 2.6722 11.4070 | lr 8.9e-04 | norm 1.4403 | dt 0.026 | |
| type train | step 5200 | loss 0.0424 0.3128 2.6576 11.4595 | lr 8.9e-04 | norm 1.4589 | dt 0.026 | |
| type train | step 5210 | loss 0.0429 0.3165 2.7223 11.6958 | lr 8.9e-04 | norm 1.2294 | dt 0.026 | |
| type train | step 5220 | loss 0.0415 0.3103 2.6928 11.6384 | lr 8.9e-04 | norm 1.3243 | dt 0.026 | |
| type train | step 5230 | loss 0.0415 0.3177 2.6929 11.4938 | lr 8.8e-04 | norm 1.6023 | dt 0.026 | |
| type train | step 5240 | loss 0.0412 0.3082 2.5952 11.0559 | lr 8.8e-04 | norm 1.4305 | dt 0.026 | |
| type train | step 5250 | loss 0.0416 0.3157 2.6148 11.2842 | lr 8.8e-04 | norm 1.5525 | dt 0.026 | |
| type train | step 5260 | loss 0.0437 0.3215 2.7131 11.6061 | lr 8.8e-04 | norm 1.4738 | dt 0.026 | |
| type train | step 5270 | loss 0.0417 0.3095 2.6929 11.6783 | lr 8.8e-04 | norm 1.2367 | dt 0.026 | |
| type train | step 5280 | loss 0.0424 0.3135 2.6505 11.2301 | lr 8.8e-04 | norm 1.2671 | dt 0.026 | |
| type train | step 5290 | loss 0.0416 0.3151 2.6671 11.3944 | lr 8.8e-04 | norm 1.4272 | dt 0.026 | |
| type train | step 5300 | loss 0.0413 0.3214 2.6909 11.2450 | lr 8.8e-04 | norm 1.5372 | dt 0.026 | |
| type train | step 5310 | loss 0.0415 0.3222 2.7107 11.5338 | lr 8.8e-04 | norm 1.3242 | dt 0.026 | |
| type train | step 5320 | loss 0.0420 0.3106 2.6155 11.2910 | lr 8.8e-04 | norm 1.4148 | dt 0.026 | |
| type train | step 5330 | loss 0.0430 0.3178 2.6967 11.5368 | lr 8.8e-04 | norm 1.3029 | dt 0.026 | |
| type train | step 5340 | loss 0.0411 0.3058 2.5570 10.9467 | lr 8.8e-04 | norm 1.3103 | dt 0.026 | |
| type train | step 5350 | loss 0.0424 0.3126 2.6785 11.3818 | lr 8.8e-04 | norm 1.3870 | dt 0.026 | |
| type train | step 5360 | loss 0.0412 0.3159 2.7043 11.5121 | lr 8.8e-04 | norm 1.3381 | dt 0.026 | |
| type train | step 5370 | loss 0.0410 0.3209 2.7338 11.5302 | lr 8.8e-04 | norm 1.4255 | dt 0.026 | |
| type train | step 5380 | loss 0.0423 0.3155 2.6707 11.5496 | lr 8.8e-04 | norm 1.4347 | dt 0.026 | |
| type train | step 5390 | loss 0.0420 0.3167 2.7428 11.8327 | lr 8.8e-04 | norm 1.3292 | dt 0.026 | |
| type train | step 5400 | loss 0.0414 0.3120 2.7206 11.6683 | lr 8.8e-04 | norm 1.4854 | dt 0.026 | |
| type train | step 5410 | loss 0.0422 0.3152 2.7200 11.7766 | lr 8.8e-04 | norm 1.2767 | dt 0.026 | |
| type train | step 5420 | loss 0.0399 0.3075 2.6230 11.1014 | lr 8.8e-04 | norm 1.3135 | dt 0.026 | |
| type train | step 5430 | loss 0.0405 0.3154 2.6650 11.4357 | lr 8.8e-04 | norm 1.5479 | dt 0.026 | |
| type train | step 5440 | loss 0.0427 0.3146 2.6508 11.4440 | lr 8.7e-04 | norm 1.6299 | dt 0.026 | |
| type train | step 5450 | loss 0.0418 0.3132 2.7594 12.0465 | lr 8.7e-04 | norm 1.3955 | dt 0.026 | |
| type train | step 5460 | loss 0.0409 0.3130 2.6400 11.1333 | lr 8.7e-04 | norm 1.4715 | dt 0.026 | |
| type train | step 5470 | loss 0.0420 0.3159 2.6973 11.6611 | lr 8.7e-04 | norm 1.4315 | dt 0.026 | |
| type train | step 5480 | loss 0.0399 0.3093 2.6336 11.1760 | lr 8.7e-04 | norm 1.2672 | dt 0.026 | |
| type train | step 5490 | loss 0.0402 0.3116 2.7483 11.7651 | lr 8.7e-04 | norm 1.5523 | dt 0.028 | |
| type train | step 5500 | loss 0.0405 0.3113 2.6746 11.4566 | lr 8.7e-04 | norm 1.2247 | dt 0.026 | |
| type train | step 5510 | loss 0.0411 0.3134 2.7239 11.8275 | lr 8.7e-04 | norm 1.4031 | dt 0.026 | |
| type train | step 5520 | loss 0.0409 0.3043 2.6143 11.1539 | lr 8.7e-04 | norm 1.3178 | dt 0.026 | |
| type train | step 5530 | loss 0.0425 0.3227 2.7596 11.6965 | lr 8.7e-04 | norm 1.6734 | dt 0.026 | |
| type train | step 5540 | loss 0.0399 0.3121 2.6303 11.0517 | lr 8.7e-04 | norm 1.2854 | dt 0.026 | |
| type train | step 5550 | loss 0.0402 0.3064 2.6413 11.4532 | lr 8.7e-04 | norm 1.4235 | dt 0.026 | |
| type train | step 5560 | loss 0.0399 0.3111 2.6100 11.0875 | lr 8.7e-04 | norm 1.1265 | dt 0.026 | |
| type train | step 5570 | loss 0.0420 0.3228 2.8217 12.1886 | lr 8.7e-04 | norm 1.4421 | dt 0.027 | |
| type train | step 5580 | loss 0.0401 0.3080 2.6412 11.3195 | lr 8.7e-04 | norm 1.4119 | dt 0.026 | |
| type train | step 5590 | loss 0.0400 0.3010 2.5653 11.1454 | lr 8.7e-04 | norm 1.2486 | dt 0.026 | |
| type train | step 5600 | loss 0.0405 0.3146 2.6668 11.3091 | lr 8.7e-04 | norm 1.2835 | dt 0.026 | |
| type train | step 5610 | loss 0.0415 0.3164 2.6150 11.0086 | lr 8.7e-04 | norm 1.2783 | dt 0.026 | |
| type train | step 5620 | loss 0.0401 0.3095 2.6029 11.1583 | lr 8.7e-04 | norm 1.2122 | dt 0.026 | |
| type train | step 5630 | loss 0.0437 0.3186 2.6531 11.5348 | lr 8.6e-04 | norm 1.8910 | dt 0.026 | |
| type train | step 5640 | loss 0.0411 0.3139 2.7075 11.4926 | lr 8.6e-04 | norm 1.5522 | dt 0.026 | |
| type train | step 5650 | loss 0.0404 0.3088 2.6630 11.4749 | lr 8.6e-04 | norm 1.3142 | dt 0.026 | |
| type train | step 5660 | loss 0.0393 0.3123 2.6433 11.2237 | lr 8.6e-04 | norm 1.3547 | dt 0.026 | |
| type train | step 5670 | loss 0.0405 0.3120 2.6523 11.2497 | lr 8.6e-04 | norm 1.3452 | dt 0.026 | |
| type train | step 5680 | loss 0.0397 0.3096 2.6221 11.1610 | lr 8.6e-04 | norm 1.5245 | dt 0.027 | |
| type train | step 5690 | loss 0.0427 0.3206 2.7319 11.8132 | lr 8.6e-04 | norm 1.3834 | dt 0.026 | |
| type train | step 5700 | loss 0.0416 0.3122 2.6114 11.2732 | lr 8.6e-04 | norm 1.3595 | dt 0.026 | |
| type train | step 5710 | loss 0.0398 0.3067 2.6621 11.3718 | lr 8.6e-04 | norm 1.3652 | dt 0.026 | |
| type train | step 5720 | loss 0.0389 0.3075 2.6422 11.2854 | lr 8.6e-04 | norm 1.2437 | dt 0.026 | |
| type train | step 5730 | loss 0.0410 0.3195 2.7149 11.4899 | lr 8.6e-04 | norm 1.4200 | dt 0.026 | |
| type train | step 5740 | loss 0.0398 0.3056 2.5769 11.0336 | lr 8.6e-04 | norm 1.2290 | dt 0.027 | |
| type train | step 5750 | loss 0.0400 0.3041 2.6072 11.2961 | lr 8.6e-04 | norm 1.3182 | dt 0.026 | |
| type train | step 5760 | loss 0.0414 0.3078 2.5939 11.1259 | lr 8.6e-04 | norm 1.2740 | dt 0.026 | |
| type train | step 5770 | loss 0.0413 0.3101 2.7283 11.4838 | lr 8.6e-04 | norm 1.6994 | dt 0.026 | |
| type train | step 5780 | loss 0.0394 0.3058 2.5888 11.1764 | lr 8.6e-04 | norm 1.4379 | dt 0.026 | |
| type train | step 5790 | loss 0.0407 0.3186 2.6761 11.1923 | lr 8.6e-04 | norm 1.3865 | dt 0.026 | |
| type train | step 5800 | loss 0.0405 0.3104 2.6414 11.2867 | lr 8.6e-04 | norm 1.4144 | dt 0.026 | |
| type train | step 5810 | loss 0.0404 0.3048 2.6254 11.3338 | lr 8.6e-04 | norm 1.4349 | dt 0.026 | |
| type train | step 5820 | loss 0.0408 0.3084 2.6936 11.5761 | lr 8.5e-04 | norm 1.2125 | dt 0.026 | |
| type train | step 5830 | loss 0.0395 0.3026 2.6588 11.4995 | lr 8.5e-04 | norm 1.2984 | dt 0.027 | |
| type train | step 5840 | loss 0.0396 0.3094 2.6649 11.3962 | lr 8.5e-04 | norm 1.5437 | dt 0.026 | |
| type train | step 5850 | loss 0.0392 0.3005 2.5641 10.9609 | lr 8.5e-04 | norm 1.4088 | dt 0.026 | |
| type train | step 5860 | loss 0.0396 0.3070 2.5837 11.1835 | lr 8.5e-04 | norm 1.5180 | dt 0.026 | |
| type train | step 5870 | loss 0.0416 0.3132 2.6767 11.4813 | lr 8.5e-04 | norm 1.4366 | dt 0.026 | |
| type train | step 5880 | loss 0.0398 0.3012 2.6636 11.5470 | lr 8.5e-04 | norm 1.2030 | dt 0.026 | |
| type train | step 5890 | loss 0.0405 0.3065 2.6198 11.1083 | lr 8.5e-04 | norm 1.2429 | dt 0.026 | |
| type train | step 5900 | loss 0.0396 0.3082 2.6395 11.2963 | lr 8.5e-04 | norm 1.4240 | dt 0.026 | |
| type train | step 5910 | loss 0.0392 0.3137 2.6573 11.1565 | lr 8.5e-04 | norm 1.5103 | dt 0.026 | |
| type train | step 5920 | loss 0.0393 0.3145 2.6823 11.4431 | lr 8.5e-04 | norm 1.3000 | dt 0.026 | |
| type train | step 5930 | loss 0.0398 0.3032 2.5854 11.1759 | lr 8.5e-04 | norm 1.3850 | dt 0.026 | |
| type train | step 5940 | loss 0.0407 0.3099 2.6661 11.4209 | lr 8.5e-04 | norm 1.2785 | dt 0.026 | |
| type train | step 5950 | loss 0.0391 0.2986 2.5255 10.8341 | lr 8.5e-04 | norm 1.2957 | dt 0.026 | |
| type train | step 5960 | loss 0.0404 0.3048 2.6506 11.2450 | lr 8.5e-04 | norm 1.3430 | dt 0.026 | |
| type train | step 5970 | loss 0.0391 0.3083 2.6748 11.3992 | lr 8.5e-04 | norm 1.3369 | dt 0.026 | |
| type train | step 5980 | loss 0.0392 0.3140 2.7018 11.4300 | lr 8.5e-04 | norm 1.4205 | dt 0.026 | |
| type train | step 5990 | loss 0.0404 0.3085 2.6431 11.4125 | lr 8.5e-04 | norm 1.4005 | dt 0.026 | |
| type train | step 6000 | loss 0.0398 0.3087 2.7110 11.7043 | lr 8.4e-04 | norm 1.2929 | dt 0.026 | |
| type train | step 6010 | loss 0.0394 0.3050 2.6936 11.5541 | lr 8.4e-04 | norm 1.4688 | dt 0.026 | |
| type train | step 6020 | loss 0.0405 0.3078 2.6925 11.6585 | lr 8.4e-04 | norm 1.2707 | dt 0.026 | |
| type train | step 6030 | loss 0.0379 0.3002 2.5942 10.9772 | lr 8.4e-04 | norm 1.3185 | dt 0.026 | |
| type train | step 6040 | loss 0.0385 0.3079 2.6337 11.3422 | lr 8.4e-04 | norm 1.5169 | dt 0.026 | |
| type train | step 6050 | loss 0.0405 0.3063 2.6239 11.2990 | lr 8.4e-04 | norm 1.5880 | dt 0.026 | |
| type train | step 6060 | loss 0.0398 0.3061 2.7311 11.9267 | lr 8.4e-04 | norm 1.3738 | dt 0.026 | |
| type train | step 6070 | loss 0.0389 0.3056 2.6130 11.0291 | lr 8.4e-04 | norm 1.4229 | dt 0.026 | |
| type train | step 6080 | loss 0.0400 0.3084 2.6704 11.5366 | lr 8.4e-04 | norm 1.4049 | dt 0.026 | |
| type train | step 6090 | loss 0.0381 0.3017 2.6057 11.0638 | lr 8.4e-04 | norm 1.2330 | dt 0.026 | |
| type train | step 6100 | loss 0.0385 0.3050 2.7222 11.6393 | lr 8.4e-04 | norm 1.5194 | dt 0.027 | |
| type train | step 6110 | loss 0.0387 0.3038 2.6472 11.3522 | lr 8.4e-04 | norm 1.2580 | dt 0.026 | |
| type train | step 6120 | loss 0.0394 0.3053 2.6945 11.7061 | lr 8.4e-04 | norm 1.3626 | dt 0.026 | |
| type train | step 6130 | loss 0.0390 0.2975 2.5855 11.0554 | lr 8.4e-04 | norm 1.2755 | dt 0.026 | |
| type train | step 6140 | loss 0.0406 0.3157 2.7257 11.5695 | lr 8.4e-04 | norm 1.6540 | dt 0.026 | |
| type train | step 6150 | loss 0.0381 0.3046 2.5982 10.9500 | lr 8.4e-04 | norm 1.2698 | dt 0.026 | |
| type train | step 6160 | loss 0.0382 0.2990 2.6161 11.3464 | lr 8.4e-04 | norm 1.4190 | dt 0.026 | |
| type train | step 6170 | loss 0.0382 0.3037 2.5834 11.0080 | lr 8.4e-04 | norm 1.1325 | dt 0.026 | |
| type train | step 6180 | loss 0.0402 0.3159 2.7919 12.0291 | lr 8.3e-04 | norm 1.4069 | dt 0.027 | |
| type train | step 6190 | loss 0.0384 0.3017 2.6093 11.2179 | lr 8.3e-04 | norm 1.3970 | dt 0.026 | |
| type train | step 6200 | loss 0.0384 0.2942 2.5400 11.0218 | lr 8.3e-04 | norm 1.1892 | dt 0.027 | |
| type train | step 6210 | loss 0.0388 0.3072 2.6372 11.2336 | lr 8.3e-04 | norm 1.2932 | dt 0.026 | |
| type train | step 6220 | loss 0.0397 0.3082 2.5792 10.9387 | lr 8.3e-04 | norm 1.2480 | dt 0.026 | |
| type train | step 6230 | loss 0.0383 0.3023 2.5738 11.0540 | lr 8.3e-04 | norm 1.1927 | dt 0.026 | |
| type train | step 6240 | loss 0.0419 0.3109 2.6287 11.4150 | lr 8.3e-04 | norm 1.8608 | dt 0.026 | |
| type train | step 6250 | loss 0.0392 0.3067 2.6791 11.3885 | lr 8.3e-04 | norm 1.5134 | dt 0.026 | |
| type train | step 6260 | loss 0.0385 0.3012 2.6363 11.3553 | lr 8.3e-04 | norm 1.2843 | dt 0.035 | |
| type train | step 6270 | loss 0.0375 0.3050 2.6166 11.1204 | lr 8.3e-04 | norm 1.3681 | dt 0.036 | |
| type train | step 6280 | loss 0.0387 0.3043 2.6181 11.1581 | lr 8.3e-04 | norm 1.2913 | dt 0.035 | |
| type train | step 6290 | loss 0.0379 0.3031 2.5970 11.0803 | lr 8.3e-04 | norm 1.5004 | dt 0.033 | |
| type train | step 6300 | loss 0.0410 0.3134 2.7028 11.6923 | lr 8.3e-04 | norm 1.3424 | dt 0.034 | |
| type train | step 6310 | loss 0.0398 0.3054 2.5831 11.1915 | lr 8.3e-04 | norm 1.3117 | dt 0.033 | |
| type train | step 6320 | loss 0.0381 0.2992 2.6321 11.2770 | lr 8.3e-04 | norm 1.2984 | dt 0.033 | |
| type train | step 6330 | loss 0.0374 0.3004 2.6144 11.1907 | lr 8.3e-04 | norm 1.2516 | dt 0.032 | |
| type train | step 6340 | loss 0.0391 0.3130 2.6872 11.4084 | lr 8.3e-04 | norm 1.3981 | dt 0.032 | |
| type train | step 6350 | loss 0.0380 0.2983 2.5506 10.9466 | lr 8.2e-04 | norm 1.1999 | dt 0.032 | |
| type train | step 6360 | loss 0.0384 0.2963 2.5803 11.2052 | lr 8.2e-04 | norm 1.2850 | dt 0.031 | |
| type train | step 6370 | loss 0.0397 0.3006 2.5643 11.0234 | lr 8.2e-04 | norm 1.2293 | dt 0.031 | |
| type train | step 6380 | loss 0.0397 0.3033 2.6966 11.3794 | lr 8.2e-04 | norm 1.6620 | dt 0.031 | |
| type train | step 6390 | loss 0.0378 0.2987 2.5604 11.0880 | lr 8.2e-04 | norm 1.4190 | dt 0.031 | |
| type train | step 6400 | loss 0.0390 0.3109 2.6474 11.1131 | lr 8.2e-04 | norm 1.3729 | dt 0.030 | |
| type train | step 6410 | loss 0.0390 0.3039 2.6109 11.1897 | lr 8.2e-04 | norm 1.3968 | dt 0.031 | |
| type train | step 6420 | loss 0.0390 0.2980 2.5995 11.2379 | lr 8.2e-04 | norm 1.4079 | dt 0.030 | |
| type train | step 6430 | loss 0.0391 0.3013 2.6650 11.4923 | lr 8.2e-04 | norm 1.1888 | dt 0.030 | |
| type train | step 6440 | loss 0.0378 0.2964 2.6331 11.4005 | lr 8.2e-04 | norm 1.2878 | dt 0.030 | |
| type train | step 6450 | loss 0.0379 0.3024 2.6363 11.3286 | lr 8.2e-04 | norm 1.5153 | dt 0.031 | |
| type train | step 6460 | loss 0.0376 0.2938 2.5299 10.8844 | lr 8.2e-04 | norm 1.3853 | dt 0.029 | |
| type train | step 6470 | loss 0.0379 0.3002 2.5542 11.1098 | lr 8.2e-04 | norm 1.4751 | dt 0.030 | |
| type train | step 6480 | loss 0.0401 0.3051 2.6449 11.3941 | lr 8.2e-04 | norm 1.3972 | dt 0.027 | |
| type train | step 6490 | loss 0.0382 0.2949 2.6339 11.4653 | lr 8.2e-04 | norm 1.1763 | dt 0.026 | |
| type train | step 6500 | loss 0.0388 0.3006 2.5924 11.0167 | lr 8.2e-04 | norm 1.2525 | dt 0.027 | |
| type train | step 6510 | loss 0.0381 0.3013 2.6103 11.2121 | lr 8.2e-04 | norm 1.3767 | dt 0.026 | |
| type train | step 6520 | loss 0.0377 0.3063 2.6245 11.0862 | lr 8.1e-04 | norm 1.4883 | dt 0.026 | |
| type train | step 6530 | loss 0.0380 0.3080 2.6523 11.3556 | lr 8.1e-04 | norm 1.2696 | dt 0.027 | |
| type train | step 6540 | loss 0.0383 0.2973 2.5607 11.0692 | lr 8.1e-04 | norm 1.3754 | dt 0.027 | |
| type train | step 6550 | loss 0.0391 0.3041 2.6356 11.3423 | lr 8.1e-04 | norm 1.2548 | dt 0.036 | |
| type train | step 6560 | loss 0.0375 0.2925 2.5000 10.7450 | lr 8.1e-04 | norm 1.2685 | dt 0.029 | |
| type train | step 6570 | loss 0.0389 0.2989 2.6195 11.1490 | lr 8.1e-04 | norm 1.3393 | dt 0.028 | |
| type train | step 6580 | loss 0.0378 0.3015 2.6480 11.3235 | lr 8.1e-04 | norm 1.3444 | dt 0.027 | |
| type train | step 6590 | loss 0.0376 0.3075 2.6725 11.3289 | lr 8.1e-04 | norm 1.3487 | dt 0.027 | |
| type train | step 6600 | loss 0.0387 0.3026 2.6172 11.3207 | lr 8.1e-04 | norm 1.3603 | dt 0.027 | |
| type train | step 6610 | loss 0.0382 0.3026 2.6836 11.5894 | lr 8.1e-04 | norm 1.2282 | dt 0.027 | |
| type train | step 6620 | loss 0.0379 0.2983 2.6666 11.4581 | lr 8.1e-04 | norm 1.4640 | dt 0.027 | |
| type train | step 6630 | loss 0.0388 0.3019 2.6628 11.5647 | lr 8.1e-04 | norm 1.2611 | dt 0.027 | |
| type train | step 6640 | loss 0.0364 0.2942 2.5647 10.8950 | lr 8.1e-04 | norm 1.2967 | dt 0.027 | |
| type train | step 6650 | loss 0.0373 0.3007 2.6062 11.2521 | lr 8.1e-04 | norm 1.5112 | dt 0.027 | |
| type train | step 6660 | loss 0.0392 0.2997 2.5963 11.1912 | lr 8.1e-04 | norm 1.5836 | dt 0.028 | |
| type train | step 6670 | loss 0.0382 0.2999 2.7051 11.8244 | lr 8.1e-04 | norm 1.3490 | dt 0.027 | |
| type train | step 6680 | loss 0.0373 0.3004 2.5858 10.9371 | lr 8.1e-04 | norm 1.4221 | dt 0.027 | |
| type train | step 6690 | loss 0.0387 0.3027 2.6452 11.4581 | lr 8.0e-04 | norm 1.3908 | dt 0.026 | |
| type train | step 6700 | loss 0.0366 0.2958 2.5809 10.9877 | lr 8.0e-04 | norm 1.2346 | dt 0.027 | |
| type train | step 6710 | loss 0.0370 0.2997 2.6938 11.5472 | lr 8.0e-04 | norm 1.4999 | dt 0.031 | |
| type train | step 6720 | loss 0.0372 0.2979 2.6206 11.2589 | lr 8.0e-04 | norm 1.2556 | dt 0.027 | |
| type train | step 6730 | loss 0.0378 0.2990 2.6672 11.6062 | lr 8.0e-04 | norm 1.3468 | dt 0.027 | |
| type train | step 6740 | loss 0.0376 0.2920 2.5579 10.9710 | lr 8.0e-04 | norm 1.2714 | dt 0.027 | |
| type train | step 6750 | loss 0.0391 0.3095 2.6920 11.4977 | lr 8.0e-04 | norm 1.6185 | dt 0.026 | |
| type train | step 6760 | loss 0.0366 0.2986 2.5736 10.8992 | lr 8.0e-04 | norm 1.2730 | dt 0.033 | |
| type train | step 6770 | loss 0.0366 0.2927 2.5920 11.2710 | lr 8.0e-04 | norm 1.3917 | dt 0.032 | |
| type train | step 6780 | loss 0.0367 0.2979 2.5574 10.9234 | lr 8.0e-04 | norm 1.1132 | dt 0.031 | |
| type train | step 6790 | loss 0.0386 0.3085 2.7595 11.9253 | lr 8.0e-04 | norm 1.3740 | dt 0.031 | |
| type train | step 6800 | loss 0.0370 0.2955 2.5778 11.1372 | lr 8.0e-04 | norm 1.3722 | dt 0.031 | |
| type train | step 6810 | loss 0.0367 0.2890 2.5125 10.9370 | lr 8.0e-04 | norm 1.1771 | dt 0.031 | |
| type train | step 6820 | loss 0.0374 0.3014 2.6168 11.1840 | lr 8.0e-04 | norm 1.2716 | dt 0.031 | |
| type train | step 6830 | loss 0.0382 0.3026 2.5555 10.8674 | lr 8.0e-04 | norm 1.2319 | dt 0.029 | |
| type train | step 6840 | loss 0.0370 0.2964 2.5479 10.9772 | lr 8.0e-04 | norm 1.1691 | dt 0.027 | |
| type train | step 6850 | loss 0.0402 0.3043 2.6050 11.3355 | lr 7.9e-04 | norm 1.8344 | dt 0.026 | |
| type train | step 6860 | loss 0.0378 0.3013 2.6447 11.3257 | lr 7.9e-04 | norm 1.4789 | dt 0.026 | |
| type train | step 6870 | loss 0.0371 0.2962 2.6078 11.2749 | lr 7.9e-04 | norm 1.2588 | dt 0.026 | |
| type train | step 6880 | loss 0.0361 0.2998 2.5921 11.0641 | lr 7.9e-04 | norm 1.3423 | dt 0.026 | |
| type train | step 6890 | loss 0.0371 0.2986 2.5893 11.0743 | lr 7.9e-04 | norm 1.2976 | dt 0.026 | |
| type train | step 6900 | loss 0.0365 0.2972 2.5754 10.9878 | lr 7.9e-04 | norm 1.4694 | dt 0.026 | |
| type train | step 6910 | loss 0.0395 0.3075 2.6755 11.6101 | lr 7.9e-04 | norm 1.3242 | dt 0.026 | |
| type train | step 6920 | loss 0.0383 0.2995 2.5527 11.1274 | lr 7.9e-04 | norm 1.3022 | dt 0.026 | |
| type train | step 6930 | loss 0.0366 0.2943 2.6056 11.1906 | lr 7.9e-04 | norm 1.2879 | dt 0.026 | |
| type train | step 6940 | loss 0.0359 0.2953 2.5889 11.1234 | lr 7.9e-04 | norm 1.2420 | dt 0.026 | |
| type train | step 6950 | loss 0.0376 0.3078 2.6624 11.3381 | lr 7.9e-04 | norm 1.3713 | dt 0.026 | |
| type train | step 6960 | loss 0.0366 0.2923 2.5237 10.8712 | lr 7.9e-04 | norm 1.1957 | dt 0.026 | |
| type train | step 6970 | loss 0.0370 0.2908 2.5568 11.1198 | lr 7.9e-04 | norm 1.2906 | dt 0.027 | |
| type train | step 6980 | loss 0.0383 0.2955 2.5345 10.9559 | lr 7.9e-04 | norm 1.2239 | dt 0.026 | |
| type train | step 6990 | loss 0.0381 0.2981 2.6675 11.3191 | lr 7.9e-04 | norm 1.6089 | dt 0.026 | |
| type train | step 7000 | loss 0.0364 0.2938 2.5395 11.0103 | lr 7.9e-04 | norm 1.3819 | dt 0.026 | |
| type train | step 7010 | loss 0.0375 0.3056 2.6188 11.0636 | lr 7.8e-04 | norm 1.3825 | dt 0.026 | |
| type train | step 7020 | loss 0.0375 0.2971 2.5858 11.0895 | lr 7.8e-04 | norm 1.3920 | dt 0.027 | |
| type train | step 7030 | loss 0.0375 0.2924 2.5727 11.1604 | lr 7.8e-04 | norm 1.3599 | dt 0.026 | |
| type train | step 7040 | loss 0.0378 0.2963 2.6380 11.4298 | lr 7.8e-04 | norm 1.1615 | dt 0.026 | |
| type train | step 7050 | loss 0.0365 0.2910 2.6081 11.3227 | lr 7.8e-04 | norm 1.2681 | dt 0.026 | |
| type train | step 7060 | loss 0.0365 0.2982 2.6077 11.2727 | lr 7.8e-04 | norm 1.4911 | dt 0.026 | |
| type train | step 7070 | loss 0.0363 0.2884 2.5042 10.8167 | lr 7.8e-04 | norm 1.3672 | dt 0.026 | |
| type train | step 7080 | loss 0.0366 0.2947 2.5286 11.0509 | lr 7.8e-04 | norm 1.4494 | dt 0.026 | |
| type train | step 7090 | loss 0.0385 0.2993 2.6123 11.3188 | lr 7.8e-04 | norm 1.3607 | dt 0.026 | |
| type train | step 7100 | loss 0.0366 0.2904 2.6080 11.3960 | lr 7.8e-04 | norm 1.1644 | dt 0.026 | |
| type train | step 7110 | loss 0.0373 0.2952 2.5710 10.9631 | lr 7.8e-04 | norm 1.2301 | dt 0.026 | |
| type train | step 7120 | loss 0.0368 0.2967 2.5875 11.1592 | lr 7.8e-04 | norm 1.3557 | dt 0.026 | |
| type train | step 7130 | loss 0.0364 0.3014 2.6008 11.0203 | lr 7.8e-04 | norm 1.4868 | dt 0.026 | |
| type train | step 7140 | loss 0.0366 0.3027 2.6292 11.2875 | lr 7.8e-04 | norm 1.2508 | dt 0.026 | |
| type train | step 7150 | loss 0.0370 0.2924 2.5341 10.9964 | lr 7.8e-04 | norm 1.3570 | dt 0.026 | |
| type train | step 7160 | loss 0.0376 0.2985 2.6083 11.2782 | lr 7.8e-04 | norm 1.2254 | dt 0.026 | |
| type train | step 7170 | loss 0.0360 0.2877 2.4813 10.7027 | lr 7.7e-04 | norm 1.3045 | dt 0.026 | |
| type train | step 7180 | loss 0.0374 0.2946 2.5974 11.0737 | lr 7.7e-04 | norm 1.3145 | dt 0.026 | |
| type train | step 7190 | loss 0.0364 0.2963 2.6234 11.2726 | lr 7.7e-04 | norm 1.3167 | dt 0.026 | |
| type train | step 7200 | loss 0.0362 0.3022 2.6517 11.2637 | lr 7.7e-04 | norm 1.3117 | dt 0.026 | |
| type train | step 7210 | loss 0.0373 0.2978 2.5904 11.2551 | lr 7.7e-04 | norm 1.3637 | dt 0.027 | |
| type train | step 7220 | loss 0.0367 0.2977 2.6600 11.5228 | lr 7.7e-04 | norm 1.2245 | dt 0.026 | |
| type train | step 7230 | loss 0.0363 0.2936 2.6413 11.3920 | lr 7.7e-04 | norm 1.4365 | dt 0.026 | |
| type train | step 7240 | loss 0.0375 0.2977 2.6358 11.4822 | lr 7.7e-04 | norm 1.2546 | dt 0.027 | |
| type train | step 7250 | loss 0.0350 0.2894 2.5384 10.8381 | lr 7.7e-04 | norm 1.2623 | dt 0.026 | |
| type train | step 7260 | loss 0.0357 0.2957 2.5820 11.1935 | lr 7.7e-04 | norm 1.4662 | dt 0.026 | |
| type train | step 7270 | loss 0.0375 0.2951 2.5711 11.1065 | lr 7.7e-04 | norm 1.5285 | dt 0.027 | |
| type train | step 7280 | loss 0.0368 0.2946 2.6831 11.7640 | lr 7.7e-04 | norm 1.3112 | dt 0.026 | |
| type train | step 7290 | loss 0.0359 0.2951 2.5569 10.8745 | lr 7.7e-04 | norm 1.3866 | dt 0.026 | |
| type train | step 7300 | loss 0.0372 0.2982 2.6247 11.4068 | lr 7.7e-04 | norm 1.3840 | dt 0.026 | |
| type train | step 7310 | loss 0.0352 0.2903 2.5540 10.9326 | lr 7.7e-04 | norm 1.1901 | dt 0.026 | |
| type train | step 7320 | loss 0.0356 0.2948 2.6712 11.4828 | lr 7.7e-04 | norm 1.4547 | dt 0.028 | |
| type train | step 7330 | loss 0.0357 0.2925 2.5917 11.1971 | lr 7.6e-04 | norm 1.2025 | dt 0.026 | |
| type train | step 7340 | loss 0.0364 0.2938 2.6413 11.5435 | lr 7.6e-04 | norm 1.3243 | dt 0.026 | |
| type train | step 7350 | loss 0.0361 0.2866 2.5361 10.9112 | lr 7.6e-04 | norm 1.2306 | dt 0.026 | |
| type train | step 7360 | loss 0.0377 0.3044 2.6601 11.4228 | lr 7.6e-04 | norm 1.5715 | dt 0.026 | |
| type train | step 7370 | loss 0.0351 0.2934 2.5540 10.8437 | lr 7.6e-04 | norm 1.2594 | dt 0.026 | |
| type train | step 7380 | loss 0.0351 0.2880 2.5684 11.2427 | lr 7.6e-04 | norm 1.3465 | dt 0.026 | |
| type train | step 7390 | loss 0.0353 0.2932 2.5387 10.8699 | lr 7.6e-04 | norm 1.0864 | dt 0.026 | |
| type train | step 7400 | loss 0.0372 0.3029 2.7354 11.8529 | lr 7.6e-04 | norm 1.3589 | dt 0.026 | |
| type train | step 7410 | loss 0.0355 0.2901 2.5498 11.0891 | lr 7.6e-04 | norm 1.3369 | dt 0.026 | |
| type train | step 7420 | loss 0.0353 0.2838 2.4904 10.8785 | lr 7.6e-04 | norm 1.1458 | dt 0.026 | |
| type train | step 7430 | loss 0.0359 0.2967 2.5948 11.1299 | lr 7.6e-04 | norm 1.2779 | dt 0.026 | |
| type train | step 7440 | loss 0.0367 0.2978 2.5274 10.8104 | lr 7.6e-04 | norm 1.2133 | dt 0.026 | |
| type train | step 7450 | loss 0.0357 0.2907 2.5275 10.9349 | lr 7.6e-04 | norm 1.1767 | dt 0.026 | |
| type train | step 7460 | loss 0.0388 0.2990 2.5776 11.2685 | lr 7.6e-04 | norm 1.8134 | dt 0.026 | |
| type train | step 7470 | loss 0.0364 0.2968 2.6205 11.2706 | lr 7.6e-04 | norm 1.4638 | dt 0.026 | |
| type train | step 7480 | loss 0.0358 0.2918 2.5831 11.2275 | lr 7.5e-04 | norm 1.2590 | dt 0.026 | |
| type train | step 7490 | loss 0.0347 0.2947 2.5676 11.0100 | lr 7.5e-04 | norm 1.3191 | dt 0.026 | |
| type train | step 7500 | loss 0.0358 0.2928 2.5652 11.0283 | lr 7.5e-04 | norm 1.2573 | dt 0.027 | |
| type train | step 7510 | loss 0.0351 0.2927 2.5553 10.9396 | lr 7.5e-04 | norm 1.4676 | dt 0.027 | |
| type train | step 7520 | loss 0.0381 0.3023 2.6528 11.5662 | lr 7.5e-04 | norm 1.3039 | dt 0.026 | |
| type train | step 7530 | loss 0.0371 0.2945 2.5279 11.0831 | lr 7.5e-04 | norm 1.3092 | dt 0.026 | |
| type train | step 7540 | loss 0.0353 0.2891 2.5832 11.1291 | lr 7.5e-04 | norm 1.2787 | dt 0.026 | |
| type train | step 7550 | loss 0.0346 0.2907 2.5647 11.0818 | lr 7.5e-04 | norm 1.2290 | dt 0.026 | |
| type train | step 7560 | loss 0.0363 0.3022 2.6365 11.2981 | lr 7.5e-04 | norm 1.3456 | dt 0.026 | |
| type train | step 7570 | loss 0.0355 0.2878 2.5004 10.8080 | lr 7.5e-04 | norm 1.1597 | dt 0.026 | |
| type train | step 7580 | loss 0.0358 0.2863 2.5329 11.0785 | lr 7.5e-04 | norm 1.2272 | dt 0.026 | |
| type train | step 7590 | loss 0.0368 0.2909 2.5112 10.8973 | lr 7.5e-04 | norm 1.2271 | dt 0.026 | |
| type train | step 7600 | loss 0.0369 0.2934 2.6421 11.2775 | lr 7.5e-04 | norm 1.5689 | dt 0.026 | |
| type train | step 7610 | loss 0.0353 0.2891 2.5150 10.9557 | lr 7.5e-04 | norm 1.3720 | dt 0.026 | |
| type train | step 7620 | loss 0.0361 0.3006 2.5918 11.0285 | lr 7.5e-04 | norm 1.3505 | dt 0.026 | |
| type train | step 7630 | loss 0.0362 0.2924 2.5601 11.0404 | lr 7.4e-04 | norm 1.3597 | dt 0.026 | |
| type train | step 7640 | loss 0.0362 0.2882 2.5472 11.0990 | lr 7.4e-04 | norm 1.3517 | dt 0.026 | |
| type train | step 7650 | loss 0.0364 0.2918 2.6168 11.3819 | lr 7.4e-04 | norm 1.1422 | dt 0.027 | |
| type train | step 7660 | loss 0.0351 0.2868 2.5863 11.2836 | lr 7.4e-04 | norm 1.2605 | dt 0.026 | |
| type train | step 7670 | loss 0.0354 0.2929 2.5809 11.2203 | lr 7.4e-04 | norm 1.4753 | dt 0.026 | |
| type train | step 7680 | loss 0.0350 0.2836 2.4776 10.7726 | lr 7.4e-04 | norm 1.3549 | dt 0.026 | |
| type train | step 7690 | loss 0.0353 0.2905 2.5049 11.0029 | lr 7.4e-04 | norm 1.4065 | dt 0.026 | |
| type train | step 7700 | loss 0.0373 0.2942 2.5851 11.2730 | lr 7.4e-04 | norm 1.3688 | dt 0.026 | |
| type train | step 7710 | loss 0.0356 0.2859 2.5837 11.3362 | lr 7.4e-04 | norm 1.1352 | dt 0.026 | |
| type train | step 7720 | loss 0.0362 0.2914 2.5493 10.9100 | lr 7.4e-04 | norm 1.2361 | dt 0.026 | |
| type train | step 7730 | loss 0.0355 0.2922 2.5611 11.1038 | lr 7.4e-04 | norm 1.3146 | dt 0.026 | |
| type train | step 7740 | loss 0.0351 0.2974 2.5724 10.9830 | lr 7.4e-04 | norm 1.4540 | dt 0.026 | |
| type train | step 7750 | loss 0.0353 0.2985 2.6054 11.2304 | lr 7.4e-04 | norm 1.2205 | dt 0.026 | |
| type train | step 7760 | loss 0.0357 0.2881 2.5079 10.9231 | lr 7.4e-04 | norm 1.3156 | dt 0.025 | |
| type train | step 7770 | loss 0.0366 0.2941 2.5839 11.2342 | lr 7.4e-04 | norm 1.1941 | dt 0.026 | |
| type train | step 7780 | loss 0.0351 0.2834 2.4572 10.6553 | lr 7.3e-04 | norm 1.2690 | dt 0.026 | |
| type train | step 7790 | loss 0.0362 0.2898 2.5761 11.0263 | lr 7.3e-04 | norm 1.2899 | dt 0.026 | |
| type train | step 7800 | loss 0.0353 0.2916 2.5998 11.2232 | lr 7.3e-04 | norm 1.2921 | dt 0.026 | |
| type train | step 7810 | loss 0.0350 0.2980 2.6323 11.2302 | lr 7.3e-04 | norm 1.3366 | dt 0.026 | |
| type train | step 7820 | loss 0.0361 0.2933 2.5673 11.2107 | lr 7.3e-04 | norm 1.3169 | dt 0.026 | |
| type train | step 7830 | loss 0.0356 0.2931 2.6397 11.4696 | lr 7.3e-04 | norm 1.1978 | dt 0.026 | |
| type train | step 7840 | loss 0.0352 0.2895 2.6148 11.3386 | lr 7.3e-04 | norm 1.4154 | dt 0.026 | |
| type train | step 7850 | loss 0.0363 0.2930 2.6110 11.4235 | lr 7.3e-04 | norm 1.1927 | dt 0.026 | |
| type train | step 7860 | loss 0.0339 0.2856 2.5204 10.7952 | lr 7.3e-04 | norm 1.2398 | dt 0.026 | |
| type train | step 7870 | loss 0.0345 0.2919 2.5626 11.1526 | lr 7.3e-04 | norm 1.4672 | dt 0.026 | |
| type train | step 7880 | loss 0.0363 0.2913 2.5468 11.0613 | lr 7.3e-04 | norm 1.5389 | dt 0.026 | |
| type train | step 7890 | loss 0.0355 0.2905 2.6616 11.7134 | lr 7.3e-04 | norm 1.3109 | dt 0.026 | |
| type train | step 7900 | loss 0.0347 0.2912 2.5336 10.8286 | lr 7.3e-04 | norm 1.3723 | dt 0.026 | |
| type train | step 7910 | loss 0.0360 0.2945 2.6015 11.3680 | lr 7.3e-04 | norm 1.3558 | dt 0.026 | |
| type train | step 7920 | loss 0.0340 0.2861 2.5303 10.8895 | lr 7.3e-04 | norm 1.1778 | dt 0.027 | |
| type train | step 7930 | loss 0.0345 0.2910 2.6515 11.4388 | lr 7.2e-04 | norm 1.4742 | dt 0.028 | |
| type train | step 7940 | loss 0.0346 0.2893 2.5690 11.1593 | lr 7.2e-04 | norm 1.1835 | dt 0.026 | |
| type train | step 7950 | loss 0.0351 0.2891 2.6184 11.4851 | lr 7.2e-04 | norm 1.3079 | dt 0.026 | |
| type train | step 7960 | loss 0.0349 0.2828 2.5143 10.8599 | lr 7.2e-04 | norm 1.2207 | dt 0.026 | |
| type train | step 7970 | loss 0.0365 0.3008 2.6356 11.3726 | lr 7.2e-04 | norm 1.5537 | dt 0.026 | |
| type train | step 7980 | loss 0.0339 0.2898 2.5321 10.8109 | lr 7.2e-04 | norm 1.2520 | dt 0.026 | |
| type train | step 7990 | loss 0.0341 0.2847 2.5494 11.2117 | lr 7.2e-04 | norm 1.3190 | dt 0.026 | |
| type train | step 8000 | loss 0.0342 0.2889 2.5208 10.8410 | lr 7.2e-04 | norm 1.0522 | dt 0.026 | |
| type train | step 8010 | loss 0.0362 0.2983 2.7133 11.7944 | lr 7.2e-04 | norm 1.3404 | dt 0.025 | |
| type train | step 8020 | loss 0.0344 0.2868 2.5247 11.0488 | lr 7.2e-04 | norm 1.2999 | dt 0.025 | |
| type train | step 8030 | loss 0.0342 0.2808 2.4663 10.8433 | lr 7.2e-04 | norm 1.1388 | dt 0.026 | |
| type train | step 8040 | loss 0.0347 0.2928 2.5757 11.0878 | lr 7.2e-04 | norm 1.2588 | dt 0.026 | |
| type train | step 8050 | loss 0.0356 0.2936 2.5055 10.7835 | lr 7.2e-04 | norm 1.1788 | dt 0.026 | |
| type train | step 8060 | loss 0.0344 0.2874 2.5065 10.9138 | lr 7.2e-04 | norm 1.1533 | dt 0.026 | |
| type train | step 8070 | loss 0.0374 0.2954 2.5551 11.2071 | lr 7.2e-04 | norm 1.7796 | dt 0.026 | |
| type train | step 8080 | loss 0.0352 0.2929 2.5996 11.2205 | lr 7.1e-04 | norm 1.4444 | dt 0.027 | |
| type train | step 8090 | loss 0.0346 0.2887 2.5641 11.1818 | lr 7.1e-04 | norm 1.2506 | dt 0.026 | |
| type train | step 8100 | loss 0.0336 0.2920 2.5488 10.9592 | lr 7.1e-04 | norm 1.3265 | dt 0.026 | |
| type train | step 8110 | loss 0.0348 0.2895 2.5374 10.9941 | lr 7.1e-04 | norm 1.2387 | dt 0.026 | |
| type train | step 8120 | loss 0.0340 0.2891 2.5358 10.9078 | lr 7.1e-04 | norm 1.4390 | dt 0.026 | |
| type train | step 8130 | loss 0.0369 0.2987 2.6343 11.5142 | lr 7.1e-04 | norm 1.2777 | dt 0.026 | |
| type train | step 8140 | loss 0.0360 0.2911 2.5083 11.0375 | lr 7.1e-04 | norm 1.2916 | dt 0.026 | |
| type train | step 8150 | loss 0.0341 0.2855 2.5623 11.0801 | lr 7.1e-04 | norm 1.2775 | dt 0.026 | |
| type train | step 8160 | loss 0.0335 0.2866 2.5423 11.0496 | lr 7.1e-04 | norm 1.1835 | dt 0.026 | |
| type train | step 8170 | loss 0.0352 0.2984 2.6138 11.2658 | lr 7.1e-04 | norm 1.3535 | dt 0.026 | |
| type train | step 8180 | loss 0.0343 0.2846 2.4835 10.7646 | lr 7.1e-04 | norm 1.1382 | dt 0.026 | |
| type train | step 8190 | loss 0.0346 0.2827 2.5109 11.0509 | lr 7.1e-04 | norm 1.2127 | dt 0.026 | |
| type train | step 8200 | loss 0.0356 0.2870 2.4905 10.8355 | lr 7.1e-04 | norm 1.1895 | dt 0.027 | |
| type train | step 8210 | loss 0.0356 0.2898 2.6223 11.2367 | lr 7.1e-04 | norm 1.5637 | dt 0.026 | |
| type train | step 8220 | loss 0.0339 0.2855 2.4963 10.9161 | lr 7.1e-04 | norm 1.3507 | dt 0.026 | |
| type train | step 8230 | loss 0.0348 0.2973 2.5665 11.0041 | lr 7.0e-04 | norm 1.3140 | dt 0.026 | |
| type train | step 8240 | loss 0.0350 0.2886 2.5431 11.0045 | lr 7.0e-04 | norm 1.3717 | dt 0.026 | |
| type train | step 8250 | loss 0.0349 0.2843 2.5269 11.0590 | lr 7.0e-04 | norm 1.3120 | dt 0.026 | |
| type train | step 8260 | loss 0.0352 0.2885 2.5963 11.3494 | lr 7.0e-04 | norm 1.1205 | dt 0.032 | |
| type train | step 8270 | loss 0.0338 0.2838 2.5670 11.2408 | lr 7.0e-04 | norm 1.2460 | dt 0.031 | |
| type train | step 8280 | loss 0.0341 0.2895 2.5588 11.1745 | lr 7.0e-04 | norm 1.4483 | dt 0.031 | |
| type train | step 8290 | loss 0.0338 0.2812 2.4600 10.7349 | lr 7.0e-04 | norm 1.3570 | dt 0.030 | |
| type train | step 8300 | loss 0.0341 0.2869 2.4860 10.9879 | lr 7.0e-04 | norm 1.3907 | dt 0.030 | |
| type train | step 8310 | loss 0.0359 0.2910 2.5656 11.2340 | lr 7.0e-04 | norm 1.3451 | dt 0.027 | |
| type train | step 8320 | loss 0.0343 0.2828 2.5670 11.2926 | lr 7.0e-04 | norm 1.1086 | dt 0.026 | |
| type train | step 8330 | loss 0.0349 0.2878 2.5309 10.8791 | lr 7.0e-04 | norm 1.2035 | dt 0.026 | |
| type train | step 8340 | loss 0.0342 0.2892 2.5444 11.0654 | lr 7.0e-04 | norm 1.3166 | dt 0.026 | |
| type train | step 8350 | loss 0.0340 0.2940 2.5561 10.9397 | lr 7.0e-04 | norm 1.4403 | dt 0.026 | |
| type train | step 8360 | loss 0.0341 0.2952 2.5915 11.2019 | lr 7.0e-04 | norm 1.2061 | dt 0.026 | |
| type train | step 8370 | loss 0.0345 0.2845 2.4889 10.8817 | lr 6.9e-04 | norm 1.3202 | dt 0.026 | |
| type train | step 8380 | loss 0.0352 0.2913 2.5641 11.1914 | lr 6.9e-04 | norm 1.1771 | dt 0.026 | |
| type train | step 8390 | loss 0.0338 0.2808 2.4403 10.6176 | lr 6.9e-04 | norm 1.2331 | dt 0.026 | |
| type train | step 8400 | loss 0.0350 0.2868 2.5598 10.9871 | lr 6.9e-04 | norm 1.2709 | dt 0.026 | |
| type train | step 8410 | loss 0.0340 0.2888 2.5836 11.1978 | lr 6.9e-04 | norm 1.2573 | dt 0.026 | |
| type train | step 8420 | loss 0.0337 0.2946 2.6188 11.2071 | lr 6.9e-04 | norm 1.3075 | dt 0.026 | |
| type train | step 8430 | loss 0.0349 0.2899 2.5511 11.1747 | lr 6.9e-04 | norm 1.3169 | dt 0.026 | |
| type train | step 8440 | loss 0.0345 0.2903 2.6244 11.4367 | lr 6.9e-04 | norm 1.2098 | dt 0.026 | |
| type train | step 8450 | loss 0.0340 0.2867 2.5984 11.3164 | lr 6.9e-04 | norm 1.4093 | dt 0.026 | |
| type train | step 8460 | loss 0.0352 0.2905 2.5917 11.3795 | lr 6.9e-04 | norm 1.1947 | dt 0.026 | |
| type train | step 8470 | loss 0.0328 0.2824 2.5049 10.7670 | lr 6.9e-04 | norm 1.2233 | dt 0.026 | |
| type train | step 8480 | loss 0.0334 0.2885 2.5471 11.1245 | lr 6.9e-04 | norm 1.4349 | dt 0.026 | |
| type train | step 8490 | loss 0.0351 0.2885 2.5293 11.0311 | lr 6.9e-04 | norm 1.5326 | dt 0.026 | |
| type train | step 8500 | loss 0.0343 0.2875 2.6441 11.6767 | lr 6.9e-04 | norm 1.3068 | dt 0.026 | |
| type train | step 8510 | loss 0.0337 0.2882 2.5151 10.7801 | lr 6.8e-04 | norm 1.3535 | dt 0.026 | |
| type train | step 8520 | loss 0.0349 0.2915 2.5870 11.3279 | lr 6.8e-04 | norm 1.3245 | dt 0.026 | |
| type train | step 8530 | loss 0.0328 0.2835 2.5135 10.8647 | lr 6.8e-04 | norm 1.1575 | dt 0.026 | |
| type train | step 8540 | loss 0.0335 0.2883 2.6344 11.4057 | lr 6.8e-04 | norm 1.4589 | dt 0.029 | |
| type train | step 8550 | loss 0.0335 0.2868 2.5500 11.1289 | lr 6.8e-04 | norm 1.1957 | dt 0.026 | |
| type train | step 8560 | loss 0.0340 0.2862 2.6005 11.4408 | lr 6.8e-04 | norm 1.2949 | dt 0.026 | |
| type train | step 8570 | loss 0.0339 0.2801 2.4964 10.8234 | lr 6.8e-04 | norm 1.2018 | dt 0.026 | |
| type train | step 8580 | loss 0.0354 0.2972 2.6187 11.3381 | lr 6.8e-04 | norm 1.5215 | dt 0.026 | |
| type train | step 8590 | loss 0.0327 0.2864 2.5144 10.7729 | lr 6.8e-04 | norm 1.2486 | dt 0.026 | |
| type train | step 8600 | loss 0.0329 0.2822 2.5376 11.1785 | lr 6.8e-04 | norm 1.3178 | dt 0.026 | |
| type train | step 8610 | loss 0.0331 0.2860 2.5038 10.8077 | lr 6.8e-04 | norm 1.0481 | dt 0.026 | |
| type train | step 8620 | loss 0.0350 0.2948 2.6991 11.7521 | lr 6.8e-04 | norm 1.3041 | dt 0.026 | |
| type train | step 8630 | loss 0.0333 0.2842 2.5090 11.0036 | lr 6.8e-04 | norm 1.3067 | dt 0.026 | |
| type train | step 8640 | loss 0.0333 0.2783 2.4505 10.8041 | lr 6.8e-04 | norm 1.1444 | dt 0.026 | |
| type train | step 8650 | loss 0.0337 0.2894 2.5633 11.0518 | lr 6.8e-04 | norm 1.2455 | dt 0.026 | |
| type train | step 8660 | loss 0.0346 0.2906 2.4918 10.7650 | lr 6.7e-04 | norm 1.1748 | dt 0.026 | |
| type train | step 8670 | loss 0.0334 0.2843 2.4882 10.8850 | lr 6.7e-04 | norm 1.1094 | dt 0.026 | |
| type train | step 8680 | loss 0.0364 0.2923 2.5345 11.1530 | lr 6.7e-04 | norm 1.7630 | dt 0.026 | |
| type train | step 8690 | loss 0.0340 0.2898 2.5856 11.1719 | lr 6.7e-04 | norm 1.4252 | dt 0.026 | |
| type train | step 8700 | loss 0.0336 0.2865 2.5466 11.1397 | lr 6.7e-04 | norm 1.2493 | dt 0.026 | |
| type train | step 8710 | loss 0.0326 0.2887 2.5329 10.9237 | lr 6.7e-04 | norm 1.2996 | dt 0.026 | |
| type train | step 8720 | loss 0.0337 0.2865 2.5159 10.9709 | lr 6.7e-04 | norm 1.2272 | dt 0.026 | |
| type train | step 8730 | loss 0.0330 0.2861 2.5242 10.8791 | lr 6.7e-04 | norm 1.4366 | dt 0.026 | |
| type train | step 8740 | loss 0.0359 0.2955 2.6197 11.4622 | lr 6.7e-04 | norm 1.2642 | dt 0.026 | |
| type train | step 8750 | loss 0.0347 0.2887 2.4948 11.0035 | lr 6.7e-04 | norm 1.2624 | dt 0.026 | |
| type train | step 8760 | loss 0.0332 0.2827 2.5484 11.0338 | lr 6.7e-04 | norm 1.2788 | dt 0.026 | |
| type train | step 8770 | loss 0.0324 0.2837 2.5286 11.0191 | lr 6.7e-04 | norm 1.1758 | dt 0.026 | |
| type train | step 8780 | loss 0.0343 0.2958 2.5991 11.2473 | lr 6.7e-04 | norm 1.3521 | dt 0.026 | |
| type train | step 8790 | loss 0.0333 0.2820 2.4672 10.7151 | lr 6.7e-04 | norm 1.1260 | dt 0.026 | |
| type train | step 8800 | loss 0.0337 0.2800 2.4977 11.0174 | lr 6.6e-04 | norm 1.2089 | dt 0.026 | |
| type train | step 8810 | loss 0.0348 0.2841 2.4767 10.8068 | lr 6.6e-04 | norm 1.1763 | dt 0.026 | |
| type train | step 8820 | loss 0.0346 0.2873 2.6040 11.1818 | lr 6.6e-04 | norm 1.5491 | dt 0.026 | |
| type train | step 8830 | loss 0.0331 0.2822 2.4809 10.8787 | lr 6.6e-04 | norm 1.3413 | dt 0.026 | |
| type train | step 8840 | loss 0.0340 0.2939 2.5458 10.9674 | lr 6.6e-04 | norm 1.2847 | dt 0.026 | |
| type train | step 8850 | loss 0.0340 0.2859 2.5290 10.9500 | lr 6.6e-04 | norm 1.3601 | dt 0.026 | |
| type train | step 8860 | loss 0.0339 0.2823 2.5138 11.0336 | lr 6.6e-04 | norm 1.3107 | dt 0.026 | |
| type train | step 8870 | loss 0.0342 0.2854 2.5820 11.2938 | lr 6.6e-04 | norm 1.1010 | dt 0.026 | |
| type train | step 8880 | loss 0.0331 0.2804 2.5561 11.2066 | lr 6.6e-04 | norm 1.2494 | dt 0.026 | |
| type train | step 8890 | loss 0.0332 0.2859 2.5444 11.1233 | lr 6.6e-04 | norm 1.4829 | dt 0.027 | |
| type train | step 8900 | loss 0.0329 0.2778 2.4433 10.7242 | lr 6.6e-04 | norm 1.3251 | dt 0.026 | |
| type train | step 8910 | loss 0.0332 0.2840 2.4745 10.9336 | lr 6.6e-04 | norm 1.3647 | dt 0.026 | |
| type train | step 8920 | loss 0.0351 0.2883 2.5480 11.1915 | lr 6.6e-04 | norm 1.3246 | dt 0.026 | |
| type train | step 8930 | loss 0.0332 0.2797 2.5519 11.2600 | lr 6.6e-04 | norm 1.0874 | dt 0.026 | |
| type train | step 8940 | loss 0.0340 0.2852 2.5146 10.8306 | lr 6.5e-04 | norm 1.1839 | dt 0.026 | |
| type train | step 8950 | loss 0.0334 0.2860 2.5305 11.0249 | lr 6.5e-04 | norm 1.2880 | dt 0.026 | |
| type train | step 8960 | loss 0.0330 0.2910 2.5421 10.9019 | lr 6.5e-04 | norm 1.4374 | dt 0.026 | |
| type train | step 8970 | loss 0.0332 0.2919 2.5776 11.1543 | lr 6.5e-04 | norm 1.2036 | dt 0.026 | |
| type train | step 8980 | loss 0.0334 0.2814 2.4700 10.8386 | lr 6.5e-04 | norm 1.2809 | dt 0.026 | |
| type train | step 8990 | loss 0.0342 0.2881 2.5481 11.1461 | lr 6.5e-04 | norm 1.1430 | dt 0.026 | |
| type train | step 9000 | loss 0.0331 0.2774 2.4281 10.5717 | lr 6.5e-04 | norm 1.2177 | dt 0.026 | |
| type train | step 9010 | loss 0.0342 0.2843 2.5443 10.9572 | lr 6.5e-04 | norm 1.2427 | dt 0.026 | |
| type train | step 9020 | loss 0.0333 0.2858 2.5698 11.1728 | lr 6.5e-04 | norm 1.2634 | dt 0.026 | |
| type train | step 9030 | loss 0.0328 0.2912 2.6026 11.1678 | lr 6.5e-04 | norm 1.2856 | dt 0.026 | |
| type train | step 9040 | loss 0.0339 0.2860 2.5361 11.1470 | lr 6.5e-04 | norm 1.2915 | dt 0.026 | |
| type train | step 9050 | loss 0.0336 0.2870 2.6093 11.3967 | lr 6.5e-04 | norm 1.1974 | dt 0.026 | |
| type train | step 9060 | loss 0.0331 0.2834 2.5819 11.2791 | lr 6.5e-04 | norm 1.3959 | dt 0.026 | |
| type train | step 9070 | loss 0.0342 0.2871 2.5780 11.3330 | lr 6.5e-04 | norm 1.1718 | dt 0.026 | |
| type train | step 9080 | loss 0.0319 0.2795 2.4914 10.7423 | lr 6.4e-04 | norm 1.2319 | dt 0.026 | |
| type train | step 9090 | loss 0.0325 0.2856 2.5315 11.0740 | lr 6.4e-04 | norm 1.4408 | dt 0.026 | |
| type train | step 9100 | loss 0.0343 0.2848 2.5106 10.9995 | lr 6.4e-04 | norm 1.5182 | dt 0.026 | |
| type train | step 9110 | loss 0.0334 0.2841 2.6299 11.6469 | lr 6.4e-04 | norm 1.2756 | dt 0.026 | |
| type train | step 9120 | loss 0.0327 0.2857 2.5008 10.7397 | lr 6.4e-04 | norm 1.3628 | dt 0.026 | |
| type train | step 9130 | loss 0.0339 0.2877 2.5750 11.2818 | lr 6.4e-04 | norm 1.3323 | dt 0.026 | |
| type train | step 9140 | loss 0.0321 0.2796 2.5022 10.8343 | lr 6.4e-04 | norm 1.1428 | dt 0.027 | |
| type train | step 9150 | loss 0.0326 0.2844 2.6214 11.3773 | lr 6.4e-04 | norm 1.4398 | dt 0.028 | |
| type train | step 9160 | loss 0.0326 0.2827 2.5322 11.1026 | lr 6.4e-04 | norm 1.1673 | dt 0.026 | |
| type train | step 9170 | loss 0.0332 0.2832 2.5885 11.4030 | lr 6.4e-04 | norm 1.3185 | dt 0.026 | |
| type train | step 9180 | loss 0.0328 0.2768 2.4800 10.7843 | lr 6.4e-04 | norm 1.2067 | dt 0.026 | |
| type train | step 9190 | loss 0.0347 0.2938 2.6043 11.3056 | lr 6.4e-04 | norm 1.5225 | dt 0.026 | |
| type train | step 9200 | loss 0.0319 0.2822 2.4993 10.7180 | lr 6.4e-04 | norm 1.2071 | dt 0.026 | |
| type train | step 9210 | loss 0.0321 0.2784 2.5268 11.1399 | lr 6.4e-04 | norm 1.3071 | dt 0.025 | |
| type train | step 9220 | loss 0.0323 0.2824 2.4877 10.7675 | lr 6.3e-04 | norm 1.0325 | dt 0.027 | |
| type train | step 9230 | loss 0.0341 0.2915 2.6861 11.7207 | lr 6.3e-04 | norm 1.3167 | dt 0.026 | |
| type train | step 9240 | loss 0.0324 0.2798 2.4970 10.9698 | lr 6.3e-04 | norm 1.2791 | dt 0.026 | |
| type train | step 9250 | loss 0.0324 0.2747 2.4387 10.7627 | lr 6.3e-04 | norm 1.1396 | dt 0.026 | |
| type train | step 9260 | loss 0.0328 0.2853 2.5488 11.0087 | lr 6.3e-04 | norm 1.2249 | dt 0.026 | |
| type train | step 9270 | loss 0.0337 0.2866 2.4825 10.7423 | lr 6.3e-04 | norm 1.1663 | dt 0.026 | |
| type train | step 9280 | loss 0.0326 0.2802 2.4727 10.8455 | lr 6.3e-04 | norm 1.0837 | dt 0.026 | |
| type train | step 9290 | loss 0.0354 0.2882 2.5151 11.1197 | lr 6.3e-04 | norm 1.7366 | dt 0.026 | |
| type train | step 9300 | loss 0.0332 0.2861 2.5721 11.1451 | lr 6.3e-04 | norm 1.4182 | dt 0.026 | |
| type train | step 9310 | loss 0.0328 0.2825 2.5305 11.1051 | lr 6.3e-04 | norm 1.2514 | dt 0.026 | |
| type train | step 9320 | loss 0.0318 0.2848 2.5189 10.8752 | lr 6.3e-04 | norm 1.3021 | dt 0.026 | |
| type train | step 9330 | loss 0.0328 0.2829 2.5025 10.9413 | lr 6.3e-04 | norm 1.2273 | dt 0.026 | |
| type train | step 9340 | loss 0.0322 0.2830 2.5144 10.8253 | lr 6.3e-04 | norm 1.4122 | dt 0.026 | |
| type train | step 9350 | loss 0.0350 0.2916 2.6049 11.4304 | lr 6.2e-04 | norm 1.2364 | dt 0.026 | |
| type train | step 9360 | loss 0.0340 0.2851 2.4825 10.9655 | lr 6.2e-04 | norm 1.2620 | dt 0.026 | |
| type train | step 9370 | loss 0.0324 0.2791 2.5320 11.0185 | lr 6.2e-04 | norm 1.2686 | dt 0.026 | |
| type train | step 9380 | loss 0.0316 0.2801 2.5150 10.9808 | lr 6.2e-04 | norm 1.1624 | dt 0.026 | |
| type train | step 9390 | loss 0.0332 0.2921 2.5880 11.2224 | lr 6.2e-04 | norm 1.3229 | dt 0.026 | |
| type train | step 9400 | loss 0.0324 0.2786 2.4532 10.6789 | lr 6.2e-04 | norm 1.1307 | dt 0.026 | |
| type train | step 9410 | loss 0.0329 0.2763 2.4803 10.9999 | lr 6.2e-04 | norm 1.2099 | dt 0.026 | |
| type train | step 9420 | loss 0.0337 0.2812 2.4628 10.7869 | lr 6.2e-04 | norm 1.1565 | dt 0.026 | |
| type train | step 9430 | loss 0.0338 0.2839 2.5890 11.1377 | lr 6.2e-04 | norm 1.5540 | dt 0.026 | |
| type train | step 9440 | loss 0.0322 0.2787 2.4696 10.8468 | lr 6.2e-04 | norm 1.3451 | dt 0.026 | |
| type train | step 9450 | loss 0.0332 0.2902 2.5333 10.9228 | lr 6.2e-04 | norm 1.2860 | dt 0.026 | |
| type train | step 9460 | loss 0.0332 0.2821 2.5162 10.9083 | lr 6.2e-04 | norm 1.3227 | dt 0.026 | |
| type train | step 9470 | loss 0.0330 0.2786 2.4993 11.0024 | lr 6.2e-04 | norm 1.2998 | dt 0.026 | |
| type train | step 9480 | loss 0.0334 0.2823 2.5704 11.2680 | lr 6.2e-04 | norm 1.0939 | dt 0.026 | |
| type train | step 9490 | loss 0.0322 0.2769 2.5456 11.1724 | lr 6.1e-04 | norm 1.2399 | dt 0.026 | |
| type train | step 9500 | loss 0.0324 0.2820 2.5326 11.0758 | lr 6.1e-04 | norm 1.4386 | dt 0.026 | |
| type train | step 9510 | loss 0.0321 0.2744 2.4305 10.6923 | lr 6.1e-04 | norm 1.3151 | dt 0.026 | |
| type train | step 9520 | loss 0.0324 0.2802 2.4631 10.8645 | lr 6.1e-04 | norm 1.3441 | dt 0.026 | |
| type train | step 9530 | loss 0.0342 0.2845 2.5323 11.1590 | lr 6.1e-04 | norm 1.2905 | dt 0.026 | |
| type train | step 9540 | loss 0.0326 0.2767 2.5418 11.2255 | lr 6.1e-04 | norm 1.0854 | dt 0.026 | |
| type train | step 9550 | loss 0.0331 0.2826 2.5035 10.7917 | lr 6.1e-04 | norm 1.1685 | dt 0.026 | |
| type train | step 9560 | loss 0.0325 0.2825 2.5165 10.9864 | lr 6.1e-04 | norm 1.2807 | dt 0.026 | |
| type train | step 9570 | loss 0.0324 0.2877 2.5305 10.8847 | lr 6.1e-04 | norm 1.4445 | dt 0.026 | |
| type train | step 9580 | loss 0.0324 0.2886 2.5637 11.1064 | lr 6.1e-04 | norm 1.2013 | dt 0.026 | |
| type train | step 9590 | loss 0.0327 0.2784 2.4562 10.8121 | lr 6.1e-04 | norm 1.2744 | dt 0.026 | |
| type train | step 9600 | loss 0.0334 0.2846 2.5354 11.1124 | lr 6.1e-04 | norm 1.1412 | dt 0.026 | |
| type train | step 9610 | loss 0.0322 0.2743 2.4148 10.5262 | lr 6.1e-04 | norm 1.1993 | dt 0.026 | |
| type train | step 9620 | loss 0.0334 0.2811 2.5301 10.9322 | lr 6.1e-04 | norm 1.2504 | dt 0.026 | |
| type train | step 9630 | loss 0.0325 0.2830 2.5553 11.1584 | lr 6.0e-04 | norm 1.2477 | dt 0.026 | |
| type train | step 9640 | loss 0.0321 0.2880 2.5906 11.1300 | lr 6.0e-04 | norm 1.2652 | dt 0.026 | |
| type train | step 9650 | loss 0.0332 0.2828 2.5266 11.1138 | lr 6.0e-04 | norm 1.3113 | dt 0.026 | |
| type train | step 9660 | loss 0.0327 0.2838 2.5954 11.3512 | lr 6.0e-04 | norm 1.1833 | dt 0.026 | |
| type train | step 9670 | loss 0.0324 0.2801 2.5668 11.2416 | lr 6.0e-04 | norm 1.3497 | dt 0.026 | |
| type train | step 9680 | loss 0.0335 0.2846 2.5654 11.3053 | lr 6.0e-04 | norm 1.1616 | dt 0.026 | |
| type train | step 9690 | loss 0.0311 0.2762 2.4782 10.7213 | lr 6.0e-04 | norm 1.2231 | dt 0.026 | |
| type train | step 9700 | loss 0.0319 0.2818 2.5195 11.0272 | lr 6.0e-04 | norm 1.4340 | dt 0.026 | |
| type train | step 9710 | loss 0.0336 0.2815 2.4974 10.9807 | lr 6.0e-04 | norm 1.5522 | dt 0.027 | |
| type train | step 9720 | loss 0.0327 0.2818 2.6162 11.6102 | lr 6.0e-04 | norm 1.2939 | dt 0.026 | |
| type train | step 9730 | loss 0.0319 0.2829 2.4890 10.7101 | lr 6.0e-04 | norm 1.3625 | dt 0.026 | |
| type train | step 9740 | loss 0.0332 0.2853 2.5627 11.2501 | lr 6.0e-04 | norm 1.3131 | dt 0.026 | |
| type train | step 9750 | loss 0.0314 0.2776 2.4890 10.7971 | lr 6.0e-04 | norm 1.1361 | dt 0.026 | |
| type train | step 9760 | loss 0.0320 0.2820 2.6104 11.3601 | lr 6.0e-04 | norm 1.4234 | dt 0.028 | |
| type train | step 9770 | loss 0.0319 0.2802 2.5183 11.0804 | lr 5.9e-04 | norm 1.1637 | dt 0.026 | |
| type train | step 9780 | loss 0.0324 0.2806 2.5742 11.3698 | lr 5.9e-04 | norm 1.2832 | dt 0.026 | |
| type train | step 9790 | loss 0.0321 0.2740 2.4686 10.7589 | lr 5.9e-04 | norm 1.2015 | dt 0.026 | |
| type train | step 9800 | loss 0.0339 0.2915 2.5901 11.2645 | lr 5.9e-04 | norm 1.5167 | dt 0.026 | |
| type train | step 9810 | loss 0.0315 0.2803 2.4852 10.6798 | lr 5.9e-04 | norm 1.2200 | dt 0.026 | |
| type train | step 9820 | loss 0.0316 0.2758 2.5133 11.1052 | lr 5.9e-04 | norm 1.3126 | dt 0.026 | |
| type train | step 9830 | loss 0.0315 0.2802 2.4750 10.7256 | lr 5.9e-04 | norm 1.0324 | dt 0.026 | |
| type train | step 9840 | loss 0.0334 0.2885 2.6711 11.6939 | lr 5.9e-04 | norm 1.2871 | dt 0.026 | |
| type train | step 9850 | loss 0.0317 0.2769 2.4835 10.9299 | lr 5.9e-04 | norm 1.2806 | dt 0.026 | |
| type train | step 9860 | loss 0.0318 0.2721 2.4257 10.7408 | lr 5.9e-04 | norm 1.1223 | dt 0.026 | |
| type train | step 9870 | loss 0.0321 0.2833 2.5386 10.9700 | lr 5.9e-04 | norm 1.2047 | dt 0.026 | |
| type train | step 9880 | loss 0.0330 0.2843 2.4727 10.7237 | lr 5.9e-04 | norm 1.1521 | dt 0.026 | |
| type train | step 9890 | loss 0.0318 0.2781 2.4594 10.8054 | lr 5.9e-04 | norm 1.0752 | dt 0.026 | |
| type train | step 9900 | loss 0.0347 0.2855 2.4999 11.0739 | lr 5.8e-04 | norm 1.7274 | dt 0.026 | |
| type train | step 9910 | loss 0.0325 0.2841 2.5608 11.1060 | lr 5.8e-04 | norm 1.4242 | dt 0.026 | |
| type train | step 9920 | loss 0.0322 0.2807 2.5191 11.0861 | lr 5.8e-04 | norm 1.2657 | dt 0.026 | |
| type train | step 9930 | loss 0.0312 0.2820 2.5066 10.8396 | lr 5.8e-04 | norm 1.2765 | dt 0.026 | |
| type train | step 9940 | loss 0.0321 0.2808 2.4919 10.9204 | lr 5.8e-04 | norm 1.2290 | dt 0.026 | |
| type train | step 9950 | loss 0.0316 0.2806 2.5059 10.7850 | lr 5.8e-04 | norm 1.4014 | dt 0.026 | |
| type train | step 9960 | loss 0.0342 0.2892 2.5879 11.3929 | lr 5.8e-04 | norm 1.2211 | dt 0.026 | |
| type train | step 9970 | loss 0.0334 0.2829 2.4739 10.9204 | lr 5.8e-04 | norm 1.2676 | dt 0.026 | |
| type train | step 9980 | loss 0.0316 0.2774 2.5199 10.9982 | lr 5.8e-04 | norm 1.2790 | dt 0.026 | |
| type train | step 9990 | loss 0.0310 0.2775 2.5029 10.9577 | lr 5.8e-04 | norm 1.1626 | dt 0.026 | |
| type train | step 10000 | loss 0.0325 0.2896 2.5770 11.1922 | lr 5.8e-04 | norm 1.3241 | dt 0.026 | |
| type train | step 10010 | loss 0.0317 0.2765 2.4420 10.6478 | lr 5.8e-04 | norm 1.1239 | dt 0.026 | |
| type train | step 10020 | loss 0.0323 0.2737 2.4680 10.9786 | lr 5.8e-04 | norm 1.2058 | dt 0.026 | |
| type train | step 10030 | loss 0.0330 0.2793 2.4507 10.7555 | lr 5.8e-04 | norm 1.1450 | dt 0.026 | |
| type train | step 10040 | loss 0.0331 0.2822 2.5769 11.1085 | lr 5.7e-04 | norm 1.5406 | dt 0.026 | |
| type train | step 10050 | loss 0.0316 0.2762 2.4569 10.8196 | lr 5.7e-04 | norm 1.3430 | dt 0.026 | |
| type train | step 10060 | loss 0.0324 0.2877 2.5219 10.8997 | lr 5.7e-04 | norm 1.2699 | dt 0.026 | |
| type train | step 10070 | loss 0.0325 0.2795 2.5046 10.8748 | lr 5.7e-04 | norm 1.3064 | dt 0.026 | |
| type train | step 10080 | loss 0.0323 0.2763 2.4877 10.9741 | lr 5.7e-04 | norm 1.3117 | dt 0.026 | |
| type train | step 10090 | loss 0.0326 0.2798 2.5568 11.2360 | lr 5.7e-04 | norm 1.0608 | dt 0.026 | |
| type train | step 10100 | loss 0.0315 0.2750 2.5351 11.1476 | lr 5.7e-04 | norm 1.2161 | dt 0.026 | |
| type train | step 10110 | loss 0.0317 0.2795 2.5195 11.0436 | lr 5.7e-04 | norm 1.4588 | dt 0.026 | |
| type train | step 10120 | loss 0.0315 0.2721 2.4201 10.6779 | lr 5.7e-04 | norm 1.3247 | dt 0.026 | |
| type train | step 10130 | loss 0.0318 0.2775 2.4525 10.8230 | lr 5.7e-04 | norm 1.3486 | dt 0.026 | |
| type train | step 10140 | loss 0.0334 0.2826 2.5185 11.1260 | lr 5.7e-04 | norm 1.2660 | dt 0.026 | |
| type train | step 10150 | loss 0.0320 0.2741 2.5309 11.2097 | lr 5.7e-04 | norm 1.0774 | dt 0.026 | |
| type train | step 10160 | loss 0.0325 0.2804 2.4900 10.7569 | lr 5.7e-04 | norm 1.1560 | dt 0.026 | |
| type train | step 10170 | loss 0.0318 0.2801 2.5049 10.9660 | lr 5.7e-04 | norm 1.2559 | dt 0.026 | |
| type train | step 10180 | loss 0.0315 0.2853 2.5175 10.8625 | lr 5.6e-04 | norm 1.4539 | dt 0.026 | |
| type train | step 10190 | loss 0.0316 0.2864 2.5487 11.0802 | lr 5.6e-04 | norm 1.1926 | dt 0.026 | |
| type train | step 10200 | loss 0.0320 0.2762 2.4436 10.7821 | lr 5.6e-04 | norm 1.2733 | dt 0.026 | |
| type train | step 10210 | loss 0.0327 0.2824 2.5229 11.0932 | lr 5.6e-04 | norm 1.1204 | dt 0.026 | |
| type train | step 10220 | loss 0.0316 0.2723 2.4036 10.4956 | lr 5.6e-04 | norm 1.2009 | dt 0.026 | |
| type train | step 10230 | loss 0.0327 0.2796 2.5181 10.9212 | lr 5.6e-04 | norm 1.2544 | dt 0.026 | |
| type train | step 10240 | loss 0.0317 0.2810 2.5429 11.1398 | lr 5.6e-04 | norm 1.2211 | dt 0.026 | |
| type train | step 10250 | loss 0.0313 0.2856 2.5799 11.1029 | lr 5.6e-04 | norm 1.2535 | dt 0.026 | |
| type train | step 10260 | loss 0.0324 0.2809 2.5137 11.0797 | lr 5.6e-04 | norm 1.3211 | dt 0.026 | |
| type train | step 10270 | loss 0.0319 0.2821 2.5800 11.3156 | lr 5.6e-04 | norm 1.1697 | dt 0.026 | |
| type train | step 10280 | loss 0.0316 0.2780 2.5556 11.2102 | lr 5.6e-04 | norm 1.3460 | dt 0.026 | |
| type train | step 10290 | loss 0.0327 0.2824 2.5543 11.2832 | lr 5.6e-04 | norm 1.1574 | dt 0.026 | |
| type train | step 10300 | loss 0.0304 0.2744 2.4670 10.6988 | lr 5.6e-04 | norm 1.2063 | dt 0.027 | |
| type train | step 10310 | loss 0.0311 0.2797 2.5056 10.9948 | lr 5.5e-04 | norm 1.4073 | dt 0.026 | |
| type train | step 10320 | loss 0.0329 0.2794 2.4843 10.9522 | lr 5.5e-04 | norm 1.5419 | dt 0.026 | |
| type train | step 10330 | loss 0.0319 0.2798 2.6018 11.5777 | lr 5.5e-04 | norm 1.2973 | dt 0.026 | |
| type train | step 10340 | loss 0.0313 0.2804 2.4772 10.6848 | lr 5.5e-04 | norm 1.3581 | dt 0.026 | |
| type train | step 10350 | loss 0.0324 0.2833 2.5519 11.2285 | lr 5.5e-04 | norm 1.3206 | dt 0.026 | |
| type train | step 10360 | loss 0.0306 0.2752 2.4769 10.7755 | lr 5.5e-04 | norm 1.1071 | dt 0.026 | |
| type train | step 10370 | loss 0.0313 0.2802 2.5997 11.3454 | lr 5.5e-04 | norm 1.4242 | dt 0.027 | |
| type train | step 10380 | loss 0.0312 0.2781 2.5087 11.0694 | lr 5.5e-04 | norm 1.1555 | dt 0.026 | |
| type train | step 10390 | loss 0.0317 0.2788 2.5605 11.3504 | lr 5.5e-04 | norm 1.2752 | dt 0.026 | |
| type train | step 10400 | loss 0.0314 0.2722 2.4568 10.7375 | lr 5.5e-04 | norm 1.2238 | dt 0.026 | |
| type train | step 10410 | loss 0.0330 0.2888 2.5794 11.2472 | lr 5.5e-04 | norm 1.4893 | dt 0.026 | |
| type train | step 10420 | loss 0.0305 0.2782 2.4765 10.6580 | lr 5.5e-04 | norm 1.2084 | dt 0.026 | |
| type train | step 10430 | loss 0.0308 0.2737 2.5025 11.0808 | lr 5.5e-04 | norm 1.2846 | dt 0.026 | |
| type train | step 10440 | loss 0.0308 0.2783 2.4652 10.7025 | lr 5.5e-04 | norm 0.9975 | dt 0.026 | |
| type train | step 10450 | loss 0.0326 0.2866 2.6552 11.6691 | lr 5.4e-04 | norm 1.2811 | dt 0.026 | |
| type train | step 10460 | loss 0.0310 0.2746 2.4721 10.9041 | lr 5.4e-04 | norm 1.2623 | dt 0.026 | |
| type train | step 10470 | loss 0.0309 0.2703 2.4141 10.7290 | lr 5.4e-04 | norm 1.1347 | dt 0.026 | |
| type train | step 10480 | loss 0.0315 0.2811 2.5284 10.9305 | lr 5.4e-04 | norm 1.2135 | dt 0.026 | |
| type train | step 10490 | loss 0.0323 0.2828 2.4621 10.7012 | lr 5.4e-04 | norm 1.1565 | dt 0.026 | |
| type train | step 10500 | loss 0.0311 0.2766 2.4487 10.7712 | lr 5.4e-04 | norm 1.0631 | dt 0.026 | |
| type train | step 10510 | loss 0.0338 0.2841 2.4882 11.0372 | lr 5.4e-04 | norm 1.7315 | dt 0.026 | |
| type train | step 10520 | loss 0.0317 0.2825 2.5514 11.0725 | lr 5.4e-04 | norm 1.4240 | dt 0.026 | |
| type train | step 10530 | loss 0.0314 0.2790 2.5077 11.0760 | lr 5.4e-04 | norm 1.2648 | dt 0.026 | |
| type train | step 10540 | loss 0.0302 0.2806 2.4983 10.8167 | lr 5.4e-04 | norm 1.2793 | dt 0.026 | |
| type train | step 10550 | loss 0.0315 0.2790 2.4833 10.9058 | lr 5.4e-04 | norm 1.2287 | dt 0.026 | |
| type train | step 10560 | loss 0.0309 0.2793 2.4973 10.7534 | lr 5.4e-04 | norm 1.3864 | dt 0.026 | |
| type train | step 10570 | loss 0.0334 0.2874 2.5768 11.3636 | lr 5.4e-04 | norm 1.2164 | dt 0.026 | |
| type train | step 10580 | loss 0.0325 0.2817 2.4604 10.8963 | lr 5.3e-04 | norm 1.2466 | dt 0.026 | |
| type train | step 10590 | loss 0.0309 0.2758 2.5093 10.9777 | lr 5.3e-04 | norm 1.2647 | dt 0.026 | |
| type train | step 10600 | loss 0.0302 0.2756 2.4927 10.9432 | lr 5.3e-04 | norm 1.1574 | dt 0.026 | |
| type train | step 10610 | loss 0.0319 0.2874 2.5672 11.1683 | lr 5.3e-04 | norm 1.3202 | dt 0.026 | |
| type train | step 10620 | loss 0.0310 0.2747 2.4317 10.6083 | lr 5.3e-04 | norm 1.0907 | dt 0.026 | |
| type train | step 10630 | loss 0.0316 0.2717 2.4565 10.9567 | lr 5.3e-04 | norm 1.1844 | dt 0.026 | |
| type train | step 10640 | loss 0.0324 0.2777 2.4398 10.7262 | lr 5.3e-04 | norm 1.1200 | dt 0.026 | |
| type train | step 10650 | loss 0.0324 0.2809 2.5658 11.0841 | lr 5.3e-04 | norm 1.5100 | dt 0.026 | |
| type train | step 10660 | loss 0.0309 0.2748 2.4443 10.7901 | lr 5.3e-04 | norm 1.3292 | dt 0.026 | |
| type train | step 10670 | loss 0.0318 0.2862 2.5142 10.8857 | lr 5.3e-04 | norm 1.2778 | dt 0.026 | |
| type train | step 10680 | loss 0.0319 0.2776 2.4929 10.8461 | lr 5.3e-04 | norm 1.3079 | dt 0.026 | |
| type train | step 10690 | loss 0.0316 0.2745 2.4756 10.9552 | lr 5.3e-04 | norm 1.2749 | dt 0.026 | |
| type train | step 10700 | loss 0.0320 0.2782 2.5444 11.2158 | lr 5.3e-04 | norm 1.0497 | dt 0.026 | |
| type train | step 10710 | loss 0.0308 0.2737 2.5242 11.1209 | lr 5.3e-04 | norm 1.1930 | dt 0.026 | |
| type train | step 10720 | loss 0.0311 0.2782 2.5084 11.0126 | lr 5.2e-04 | norm 1.4400 | dt 0.026 | |
| type train | step 10730 | loss 0.0310 0.2705 2.4128 10.6607 | lr 5.2e-04 | norm 1.3271 | dt 0.026 | |
| type train | step 10740 | loss 0.0311 0.2764 2.4428 10.7907 | lr 5.2e-04 | norm 1.3169 | dt 0.026 | |
| type train | step 10750 | loss 0.0327 0.2808 2.5084 11.0904 | lr 5.2e-04 | norm 1.2447 | dt 0.026 | |
| type train | step 10760 | loss 0.0310 0.2727 2.5201 11.1763 | lr 5.2e-04 | norm 1.0586 | dt 0.026 | |
| type train | step 10770 | loss 0.0320 0.2786 2.4805 10.7209 | lr 5.2e-04 | norm 1.1387 | dt 0.026 | |
| type train | step 10780 | loss 0.0312 0.2788 2.4964 10.9337 | lr 5.2e-04 | norm 1.2423 | dt 0.026 | |
| type train | step 10790 | loss 0.0312 0.2833 2.5048 10.8502 | lr 5.2e-04 | norm 1.4372 | dt 0.026 | |
| type train | step 10800 | loss 0.0311 0.2849 2.5372 11.0534 | lr 5.2e-04 | norm 1.1911 | dt 0.027 | |
| type train | step 10810 | loss 0.0314 0.2744 2.4327 10.7543 | lr 5.2e-04 | norm 1.2544 | dt 0.026 | |
| type train | step 10820 | loss 0.0321 0.2813 2.5092 11.0794 | lr 5.2e-04 | norm 1.1049 | dt 0.026 | |
| type train | step 10830 | loss 0.0308 0.2707 2.3954 10.4622 | lr 5.2e-04 | norm 1.1778 | dt 0.026 | |
| type train | step 10840 | loss 0.0321 0.2779 2.5068 10.9151 | lr 5.2e-04 | norm 1.2088 | dt 0.026 | |
| type train | step 10850 | loss 0.0312 0.2794 2.5314 11.1088 | lr 5.2e-04 | norm 1.2081 | dt 0.026 | |
| type train | step 10860 | loss 0.0308 0.2839 2.5696 11.0792 | lr 5.1e-04 | norm 1.2428 | dt 0.027 | |
| type train | step 10870 | loss 0.0319 0.2797 2.5009 11.0524 | lr 5.1e-04 | norm 1.3176 | dt 0.026 | |
| type train | step 10880 | loss 0.0315 0.2805 2.5676 11.3032 | lr 5.1e-04 | norm 1.1831 | dt 0.026 | |
| type train | step 10890 | loss 0.0310 0.2765 2.5438 11.1730 | lr 5.1e-04 | norm 1.3202 | dt 0.026 | |
| type train | step 10900 | loss 0.0322 0.2812 2.5425 11.2656 | lr 5.1e-04 | norm 1.1564 | dt 0.026 | |
| type train | step 10910 | loss 0.0299 0.2728 2.4556 10.6830 | lr 5.1e-04 | norm 1.1689 | dt 0.026 | |
| type train | step 10920 | loss 0.0305 0.2783 2.4946 10.9586 | lr 5.1e-04 | norm 1.3814 | dt 0.026 | |
| type train | step 10930 | loss 0.0324 0.2785 2.4768 10.9342 | lr 5.1e-04 | norm 1.5456 | dt 0.026 | |
| type train | step 10940 | loss 0.0314 0.2783 2.5908 11.5545 | lr 5.1e-04 | norm 1.2973 | dt 0.026 | |
| type train | step 10950 | loss 0.0308 0.2793 2.4670 10.6633 | lr 5.1e-04 | norm 1.3817 | dt 0.026 | |
| type train | step 10960 | loss 0.0319 0.2822 2.5435 11.1972 | lr 5.1e-04 | norm 1.3293 | dt 0.026 | |
| type train | step 10970 | loss 0.0302 0.2740 2.4701 10.7437 | lr 5.1e-04 | norm 1.1089 | dt 0.026 | |
| type train | step 10980 | loss 0.0307 0.2788 2.5918 11.3374 | lr 5.1e-04 | norm 1.3954 | dt 0.029 | |
| type train | step 10990 | loss 0.0307 0.2769 2.4996 11.0479 | lr 5.0e-04 | norm 1.1413 | dt 0.026 | |
| type train | step 11000 | loss 0.0311 0.2776 2.5509 11.3288 | lr 5.0e-04 | norm 1.2780 | dt 0.026 | |
| type train | step 11010 | loss 0.0309 0.2704 2.4465 10.7039 | lr 5.0e-04 | norm 1.2204 | dt 0.026 | |
| type train | step 11020 | loss 0.0326 0.2873 2.5684 11.2283 | lr 5.0e-04 | norm 1.4823 | dt 0.026 | |
| type train | step 11030 | loss 0.0300 0.2771 2.4680 10.6395 | lr 5.0e-04 | norm 1.1927 | dt 0.026 | |
| type train | step 11040 | loss 0.0302 0.2726 2.4929 11.0577 | lr 5.0e-04 | norm 1.2812 | dt 0.026 | |
| type train | step 11050 | loss 0.0304 0.2773 2.4560 10.6748 | lr 5.0e-04 | norm 1.0037 | dt 0.026 | |
| type train | step 11060 | loss 0.0320 0.2845 2.6453 11.6349 | lr 5.0e-04 | norm 1.2587 | dt 0.026 | |
| type train | step 11070 | loss 0.0306 0.2735 2.4617 10.8677 | lr 5.0e-04 | norm 1.2561 | dt 0.026 | |
| type train | step 11080 | loss 0.0305 0.2688 2.4048 10.7102 | lr 5.0e-04 | norm 1.1205 | dt 0.026 | |
| type train | step 11090 | loss 0.0309 0.2797 2.5163 10.9006 | lr 5.0e-04 | norm 1.1840 | dt 0.026 | |
| type train | step 11100 | loss 0.0318 0.2813 2.4542 10.6837 | lr 5.0e-04 | norm 1.1639 | dt 0.026 | |
| type train | step 11110 | loss 0.0307 0.2753 2.4402 10.7448 | lr 5.0e-04 | norm 1.0593 | dt 0.026 | |
| type train | step 11120 | loss 0.0334 0.2817 2.4786 11.0125 | lr 5.0e-04 | norm 1.7148 | dt 0.026 | |
| type train | step 11130 | loss 0.0313 0.2808 2.5417 11.0290 | lr 4.9e-04 | norm 1.4015 | dt 0.026 | |
| type train | step 11140 | loss 0.0309 0.2774 2.4989 11.0504 | lr 4.9e-04 | norm 1.2614 | dt 0.026 | |
| type train | step 11150 | loss 0.0297 0.2791 2.4896 10.7937 | lr 4.9e-04 | norm 1.2633 | dt 0.026 | |
| type train | step 11160 | loss 0.0309 0.2776 2.4752 10.8872 | lr 4.9e-04 | norm 1.2253 | dt 0.026 | |
| type train | step 11170 | loss 0.0303 0.2780 2.4903 10.7275 | lr 4.9e-04 | norm 1.3564 | dt 0.027 | |
| type train | step 11180 | loss 0.0329 0.2862 2.5620 11.3349 | lr 4.9e-04 | norm 1.2219 | dt 0.026 | |
| type train | step 11190 | loss 0.0320 0.2803 2.4501 10.8764 | lr 4.9e-04 | norm 1.2373 | dt 0.026 | |
| type train | step 11200 | loss 0.0304 0.2739 2.4999 10.9607 | lr 4.9e-04 | norm 1.2451 | dt 0.026 | |
| type train | step 11210 | loss 0.0297 0.2741 2.4855 10.9193 | lr 4.9e-04 | norm 1.1611 | dt 0.026 | |
| type train | step 11220 | loss 0.0314 0.2864 2.5575 11.1417 | lr 4.9e-04 | norm 1.3040 | dt 0.026 | |
| type train | step 11230 | loss 0.0304 0.2735 2.4231 10.5818 | lr 4.9e-04 | norm 1.0862 | dt 0.026 | |
| type train | step 11240 | loss 0.0310 0.2704 2.4477 10.9355 | lr 4.9e-04 | norm 1.1773 | dt 0.026 | |
| type train | step 11250 | loss 0.0318 0.2768 2.4304 10.7002 | lr 4.9e-04 | norm 1.1082 | dt 0.026 | |
| type train | step 11260 | loss 0.0320 0.2803 2.5579 11.0644 | lr 4.9e-04 | norm 1.5100 | dt 0.026 | |
| type train | step 11270 | loss 0.0304 0.2735 2.4352 10.7627 | lr 4.8e-04 | norm 1.3423 | dt 0.026 | |
| type train | step 11280 | loss 0.0313 0.2850 2.5043 10.8691 | lr 4.8e-04 | norm 1.2786 | dt 0.026 | |
| type train | step 11290 | loss 0.0314 0.2765 2.4817 10.8012 | lr 4.8e-04 | norm 1.2863 | dt 0.026 | |
| type train | step 11300 | loss 0.0312 0.2732 2.4676 10.9388 | lr 4.8e-04 | norm 1.2810 | dt 0.026 | |
| type train | step 11310 | loss 0.0315 0.2772 2.5357 11.1944 | lr 4.8e-04 | norm 1.0354 | dt 0.026 | |
| type train | step 11320 | loss 0.0303 0.2726 2.5142 11.0959 | lr 4.8e-04 | norm 1.1988 | dt 0.026 | |
| type train | step 11330 | loss 0.0305 0.2766 2.4992 10.9780 | lr 4.8e-04 | norm 1.4332 | dt 0.026 | |
| type train | step 11340 | loss 0.0304 0.2689 2.4038 10.6264 | lr 4.8e-04 | norm 1.2963 | dt 0.026 | |
| type train | step 11350 | loss 0.0306 0.2743 2.4325 10.7591 | lr 4.8e-04 | norm 1.2877 | dt 0.026 | |
| type train | step 11360 | loss 0.0324 0.2791 2.4984 11.0513 | lr 4.8e-04 | norm 1.2540 | dt 0.026 | |
| type train | step 11370 | loss 0.0307 0.2719 2.5116 11.1481 | lr 4.8e-04 | norm 1.0551 | dt 0.026 | |
| type train | step 11380 | loss 0.0314 0.2774 2.4710 10.6930 | lr 4.8e-04 | norm 1.1295 | dt 0.026 | |
| type train | step 11390 | loss 0.0309 0.2776 2.4885 10.9077 | lr 4.8e-04 | norm 1.2277 | dt 0.026 | |
| type train | step 11400 | loss 0.0306 0.2817 2.4925 10.8364 | lr 4.8e-04 | norm 1.4228 | dt 0.026 | |
| type train | step 11410 | loss 0.0306 0.2833 2.5289 11.0122 | lr 4.7e-04 | norm 1.1766 | dt 0.027 | |
| type train | step 11420 | loss 0.0309 0.2727 2.4236 10.7251 | lr 4.7e-04 | norm 1.2393 | dt 0.026 | |
| type train | step 11430 | loss 0.0317 0.2795 2.4984 11.0516 | lr 4.7e-04 | norm 1.0972 | dt 0.026 | |
| type train | step 11440 | loss 0.0304 0.2694 2.3870 10.4327 | lr 4.7e-04 | norm 1.1557 | dt 0.026 | |
| type train | step 11450 | loss 0.0317 0.2771 2.4987 10.8972 | lr 4.7e-04 | norm 1.2013 | dt 0.026 | |
| type train | step 11460 | loss 0.0308 0.2782 2.5217 11.0783 | lr 4.7e-04 | norm 1.1960 | dt 0.026 | |
| type train | step 11470 | loss 0.0302 0.2824 2.5618 11.0562 | lr 4.7e-04 | norm 1.2349 | dt 0.027 | |
| type train | step 11480 | loss 0.0314 0.2778 2.4911 11.0292 | lr 4.7e-04 | norm 1.3183 | dt 0.026 | |
| type train | step 11490 | loss 0.0310 0.2791 2.5597 11.3011 | lr 4.7e-04 | norm 1.1679 | dt 0.026 | |
| type train | step 11500 | loss 0.0305 0.2751 2.5358 11.1354 | lr 4.7e-04 | norm 1.3144 | dt 0.026 | |
| type train | step 11510 | loss 0.0317 0.2797 2.5361 11.2455 | lr 4.7e-04 | norm 1.1337 | dt 0.026 | |
| type train | step 11520 | loss 0.0295 0.2716 2.4484 10.6589 | lr 4.7e-04 | norm 1.1467 | dt 0.026 | |
| type train | step 11530 | loss 0.0302 0.2769 2.4838 10.9154 | lr 4.7e-04 | norm 1.3540 | dt 0.026 | |
| type train | step 11540 | loss 0.0319 0.2769 2.4673 10.9187 | lr 4.6e-04 | norm 1.5234 | dt 0.026 | |
| type train | step 11550 | loss 0.0309 0.2770 2.5831 11.5308 | lr 4.6e-04 | norm 1.2883 | dt 0.026 | |
| type train | step 11560 | loss 0.0302 0.2779 2.4606 10.6406 | lr 4.6e-04 | norm 1.3740 | dt 0.026 | |
| type train | step 11570 | loss 0.0314 0.2808 2.5339 11.1791 | lr 4.6e-04 | norm 1.3278 | dt 0.026 | |
| type train | step 11580 | loss 0.0296 0.2725 2.4618 10.7080 | lr 4.6e-04 | norm 1.0859 | dt 0.026 | |
| type train | step 11590 | loss 0.0302 0.2769 2.5860 11.3097 | lr 4.6e-04 | norm 1.3863 | dt 0.027 | |
| type train | step 11600 | loss 0.0301 0.2749 2.4915 11.0140 | lr 4.6e-04 | norm 1.1388 | dt 0.026 | |
| type train | step 11610 | loss 0.0307 0.2758 2.5412 11.3014 | lr 4.6e-04 | norm 1.2752 | dt 0.026 | |
| type train | step 11620 | loss 0.0304 0.2694 2.4395 10.6722 | lr 4.6e-04 | norm 1.2372 | dt 0.026 | |
| type train | step 11630 | loss 0.0321 0.2857 2.5587 11.2192 | lr 4.6e-04 | norm 1.4596 | dt 0.026 | |
| type train | step 11640 | loss 0.0295 0.2756 2.4582 10.6218 | lr 4.6e-04 | norm 1.2014 | dt 0.026 | |
| type train | step 11650 | loss 0.0298 0.2710 2.4845 11.0285 | lr 4.6e-04 | norm 1.2516 | dt 0.026 | |
| type train | step 11660 | loss 0.0300 0.2758 2.4496 10.6500 | lr 4.6e-04 | norm 0.9797 | dt 0.026 | |
| type train | step 11670 | loss 0.0316 0.2836 2.6355 11.6123 | lr 4.6e-04 | norm 1.2495 | dt 0.026 | |
| type train | step 11680 | loss 0.0302 0.2719 2.4536 10.8467 | lr 4.5e-04 | norm 1.2856 | dt 0.026 | |
| type train | step 11690 | loss 0.0301 0.2677 2.3978 10.6876 | lr 4.5e-04 | norm 1.1460 | dt 0.026 | |
| type train | step 11700 | loss 0.0305 0.2781 2.5103 10.8673 | lr 4.5e-04 | norm 1.1991 | dt 0.026 | |
| type train | step 11710 | loss 0.0313 0.2802 2.4484 10.6646 | lr 4.5e-04 | norm 1.1631 | dt 0.026 | |
| type train | step 11720 | loss 0.0303 0.2737 2.4319 10.7125 | lr 4.5e-04 | norm 1.0348 | dt 0.026 | |
| type train | step 11730 | loss 0.0329 0.2809 2.4716 10.9879 | lr 4.5e-04 | norm 1.7286 | dt 0.026 | |
| type train | step 11740 | loss 0.0308 0.2791 2.5335 11.0009 | lr 4.5e-04 | norm 1.4076 | dt 0.026 | |
| type train | step 11750 | loss 0.0305 0.2758 2.4895 11.0439 | lr 4.5e-04 | norm 1.2688 | dt 0.026 | |
| type train | step 11760 | loss 0.0295 0.2776 2.4835 10.7580 | lr 4.5e-04 | norm 1.2762 | dt 0.026 | |
| type train | step 11770 | loss 0.0304 0.2765 2.4680 10.8686 | lr 4.5e-04 | norm 1.2230 | dt 0.026 | |
| type train | step 11780 | loss 0.0300 0.2763 2.4854 10.7013 | lr 4.5e-04 | norm 1.3541 | dt 0.026 | |
| type train | step 11790 | loss 0.0324 0.2848 2.5534 11.3095 | lr 4.5e-04 | norm 1.2235 | dt 0.026 | |
| type train | step 11800 | loss 0.0316 0.2789 2.4403 10.8539 | lr 4.5e-04 | norm 1.2433 | dt 0.026 | |
| type train | step 11810 | loss 0.0300 0.2728 2.4927 10.9344 | lr 4.5e-04 | norm 1.2296 | dt 0.026 | |
| type train | step 11820 | loss 0.0293 0.2729 2.4770 10.8898 | lr 4.4e-04 | norm 1.1326 | dt 0.026 | |
| type train | step 11830 | loss 0.0308 0.2854 2.5495 11.1154 | lr 4.4e-04 | norm 1.2826 | dt 0.026 | |
| type train | step 11840 | loss 0.0300 0.2720 2.4139 10.5630 | lr 4.4e-04 | norm 1.0603 | dt 0.034 | |
| type train | step 11850 | loss 0.0305 0.2692 2.4405 10.9120 | lr 4.4e-04 | norm 1.1584 | dt 0.029 | |
| type train | step 11860 | loss 0.0314 0.2754 2.4210 10.6793 | lr 4.4e-04 | norm 1.0829 | dt 0.030 | |
| type train | step 11870 | loss 0.0315 0.2789 2.5480 11.0481 | lr 4.4e-04 | norm 1.4683 | dt 0.030 | |
| type train | step 11880 | loss 0.0300 0.2721 2.4268 10.7331 | lr 4.4e-04 | norm 1.3121 | dt 0.027 | |
| type train | step 11890 | loss 0.0309 0.2835 2.4953 10.8559 | lr 4.4e-04 | norm 1.2668 | dt 0.026 | |
| type train | step 11900 | loss 0.0309 0.2752 2.4731 10.7726 | lr 4.4e-04 | norm 1.2731 | dt 0.026 | |
| type train | step 11910 | loss 0.0309 0.2715 2.4607 10.9154 | lr 4.4e-04 | norm 1.2727 | dt 0.026 | |
| type train | step 11920 | loss 0.0311 0.2761 2.5290 11.1714 | lr 4.4e-04 | norm 1.0231 | dt 0.026 | |
| type train | step 11930 | loss 0.0299 0.2710 2.5081 11.0647 | lr 4.4e-04 | norm 1.1596 | dt 0.026 | |
| type train | step 11940 | loss 0.0301 0.2753 2.4901 10.9514 | lr 4.4e-04 | norm 1.4076 | dt 0.026 | |
| type train | step 11950 | loss 0.0300 0.2677 2.3951 10.6030 | lr 4.4e-04 | norm 1.2760 | dt 0.026 | |
| type train | step 11960 | loss 0.0301 0.2731 2.4220 10.7299 | lr 4.3e-04 | norm 1.2747 | dt 0.026 | |
| type train | step 11970 | loss 0.0317 0.2776 2.4881 11.0185 | lr 4.3e-04 | norm 1.2327 | dt 0.027 | |
| type train | step 11980 | loss 0.0302 0.2707 2.5060 11.1154 | lr 4.3e-04 | norm 1.0629 | dt 0.026 | |
| type train | step 11990 | loss 0.0308 0.2760 2.4632 10.6650 | lr 4.3e-04 | norm 1.1261 | dt 0.026 | |
| type train | step 12000 | loss 0.0304 0.2759 2.4798 10.8817 | lr 4.3e-04 | norm 1.1884 | dt 0.026 | |
| type train | step 12010 | loss 0.0302 0.2803 2.4823 10.8187 | lr 4.3e-04 | norm 1.4163 | dt 0.026 | |
| type train | step 12020 | loss 0.0301 0.2816 2.5217 10.9872 | lr 4.3e-04 | norm 1.1656 | dt 0.026 | |
| type train | step 12030 | loss 0.0304 0.2715 2.4170 10.6917 | lr 4.3e-04 | norm 1.2251 | dt 0.026 | |
| type train | step 12040 | loss 0.0311 0.2787 2.4897 11.0362 | lr 4.3e-04 | norm 1.0812 | dt 0.026 | |
| type train | step 12050 | loss 0.0301 0.2683 2.3806 10.3973 | lr 4.3e-04 | norm 1.1613 | dt 0.026 | |
| type train | step 12060 | loss 0.0312 0.2756 2.4904 10.8759 | lr 4.3e-04 | norm 1.1861 | dt 0.026 | |
| type train | step 12070 | loss 0.0304 0.2766 2.5148 11.0527 | lr 4.3e-04 | norm 1.1699 | dt 0.026 | |
| type train | step 12080 | loss 0.0297 0.2812 2.5564 11.0407 | lr 4.3e-04 | norm 1.2267 | dt 0.026 | |
| type train | step 12090 | loss 0.0309 0.2766 2.4825 10.9934 | lr 4.3e-04 | norm 1.3395 | dt 0.025 | |
| type train | step 12100 | loss 0.0305 0.2777 2.5523 11.2924 | lr 4.2e-04 | norm 1.1696 | dt 0.026 | |
| type train | step 12110 | loss 0.0302 0.2733 2.5290 11.1096 | lr 4.2e-04 | norm 1.2833 | dt 0.026 | |
| type train | step 12120 | loss 0.0313 0.2783 2.5278 11.2173 | lr 4.2e-04 | norm 1.1350 | dt 0.026 | |
| type train | step 12130 | loss 0.0291 0.2705 2.4424 10.6422 | lr 4.2e-04 | norm 1.1219 | dt 0.026 | |
| type train | step 12140 | loss 0.0296 0.2753 2.4762 10.8884 | lr 4.2e-04 | norm 1.3280 | dt 0.026 | |
| type train | step 12150 | loss 0.0315 0.2755 2.4585 10.8866 | lr 4.2e-04 | norm 1.5415 | dt 0.026 | |
| type train | step 12160 | loss 0.0305 0.2752 2.5725 11.4990 | lr 4.2e-04 | norm 1.2619 | dt 0.026 | |
| type train | step 12170 | loss 0.0298 0.2759 2.4562 10.6121 | lr 4.2e-04 | norm 1.3784 | dt 0.026 | |
| type train | step 12180 | loss 0.0309 0.2796 2.5258 11.1521 | lr 4.2e-04 | norm 1.3284 | dt 0.026 | |
| type train | step 12190 | loss 0.0291 0.2709 2.4549 10.6790 | lr 4.2e-04 | norm 1.0796 | dt 0.026 | |
| type train | step 12200 | loss 0.0298 0.2755 2.5805 11.2785 | lr 4.2e-04 | norm 1.3745 | dt 0.027 | |
| type train | step 12210 | loss 0.0298 0.2736 2.4831 10.9889 | lr 4.2e-04 | norm 1.1304 | dt 0.026 | |
| type train | step 12220 | loss 0.0303 0.2743 2.5354 11.2755 | lr 4.2e-04 | norm 1.2572 | dt 0.026 | |
| type train | step 12230 | loss 0.0299 0.2680 2.4323 10.6462 | lr 4.2e-04 | norm 1.2290 | dt 0.026 | |
| type train | step 12240 | loss 0.0316 0.2842 2.5506 11.2002 | lr 4.2e-04 | norm 1.4602 | dt 0.026 | |
| type train | step 12250 | loss 0.0291 0.2740 2.4512 10.5942 | lr 4.1e-04 | norm 1.1769 | dt 0.026 | |
| type train | step 12260 | loss 0.0293 0.2698 2.4772 11.0014 | lr 4.1e-04 | norm 1.2276 | dt 0.026 | |
| type train | step 12270 | loss 0.0295 0.2745 2.4415 10.6332 | lr 4.1e-04 | norm 0.9766 | dt 0.026 | |
| type train | step 12280 | loss 0.0310 0.2818 2.6271 11.5872 | lr 4.1e-04 | norm 1.2273 | dt 0.026 | |
| type train | step 12290 | loss 0.0297 0.2708 2.4486 10.8220 | lr 4.1e-04 | norm 1.2658 | dt 0.026 | |
| type train | step 12300 | loss 0.0295 0.2664 2.3912 10.6717 | lr 4.1e-04 | norm 1.1212 | dt 0.026 | |
| type train | step 12310 | loss 0.0301 0.2770 2.5025 10.8385 | lr 4.1e-04 | norm 1.1925 | dt 0.026 | |
| type train | step 12320 | loss 0.0309 0.2784 2.4439 10.6547 | lr 4.1e-04 | norm 1.1542 | dt 0.026 | |
| type train | step 12330 | loss 0.0297 0.2729 2.4253 10.6843 | lr 4.1e-04 | norm 1.0151 | dt 0.026 | |
| type train | step 12340 | loss 0.0325 0.2789 2.4637 10.9546 | lr 4.1e-04 | norm 1.7123 | dt 0.026 | |
| type train | step 12350 | loss 0.0304 0.2773 2.5241 10.9737 | lr 4.1e-04 | norm 1.3901 | dt 0.026 | |
| type train | step 12360 | loss 0.0300 0.2744 2.4831 11.0193 | lr 4.1e-04 | norm 1.2591 | dt 0.026 | |
| type train | step 12370 | loss 0.0289 0.2761 2.4753 10.7221 | lr 4.1e-04 | norm 1.2708 | dt 0.026 | |
| type train | step 12380 | loss 0.0301 0.2752 2.4622 10.8579 | lr 4.1e-04 | norm 1.2167 | dt 0.026 | |
| type train | step 12390 | loss 0.0294 0.2749 2.4794 10.6862 | lr 4.0e-04 | norm 1.3232 | dt 0.026 | |
| type train | step 12400 | loss 0.0319 0.2832 2.5433 11.2885 | lr 4.0e-04 | norm 1.2146 | dt 0.026 | |
| type train | step 12410 | loss 0.0311 0.2775 2.4327 10.8287 | lr 4.0e-04 | norm 1.2085 | dt 0.026 | |
| type train | step 12420 | loss 0.0295 0.2714 2.4872 10.9099 | lr 4.0e-04 | norm 1.2066 | dt 0.026 | |
| type train | step 12430 | loss 0.0288 0.2717 2.4690 10.8636 | lr 4.0e-04 | norm 1.1248 | dt 0.026 | |
| type train | step 12440 | loss 0.0304 0.2841 2.5453 11.0935 | lr 4.0e-04 | norm 1.2788 | dt 0.026 | |
| type train | step 12450 | loss 0.0296 0.2703 2.4074 10.5501 | lr 4.0e-04 | norm 1.0455 | dt 0.026 | |
| type train | step 12460 | loss 0.0300 0.2676 2.4358 10.8866 | lr 4.0e-04 | norm 1.1401 | dt 0.026 | |
| type train | step 12470 | loss 0.0310 0.2741 2.4158 10.6636 | lr 4.0e-04 | norm 1.0610 | dt 0.026 | |
| type train | step 12480 | loss 0.0310 0.2775 2.5411 11.0233 | lr 4.0e-04 | norm 1.4564 | dt 0.026 | |
| type train | step 12490 | loss 0.0296 0.2708 2.4209 10.7027 | lr 4.0e-04 | norm 1.3077 | dt 0.026 | |
| type train | step 12500 | loss 0.0304 0.2819 2.4893 10.8403 | lr 4.0e-04 | norm 1.2738 | dt 0.026 | |
| type train | step 12510 | loss 0.0305 0.2735 2.4651 10.7546 | lr 4.0e-04 | norm 1.2713 | dt 0.027 | |
| type train | step 12520 | loss 0.0304 0.2700 2.4558 10.8882 | lr 4.0e-04 | norm 1.2628 | dt 0.026 | |
| type train | step 12530 | loss 0.0307 0.2747 2.5218 11.1572 | lr 3.9e-04 | norm 1.0216 | dt 0.026 | |
| type train | step 12540 | loss 0.0295 0.2693 2.5044 11.0339 | lr 3.9e-04 | norm 1.1378 | dt 0.026 | |
| type train | step 12550 | loss 0.0298 0.2737 2.4804 10.9257 | lr 3.9e-04 | norm 1.3680 | dt 0.026 | |
| type train | step 12560 | loss 0.0297 0.2662 2.3888 10.5814 | lr 3.9e-04 | norm 1.2711 | dt 0.026 | |
| type train | step 12570 | loss 0.0298 0.2720 2.4158 10.7116 | lr 3.9e-04 | norm 1.2677 | dt 0.026 | |
| type train | step 12580 | loss 0.0313 0.2760 2.4816 11.0005 | lr 3.9e-04 | norm 1.2306 | dt 0.026 | |
| type train | step 12590 | loss 0.0300 0.2691 2.4998 11.0935 | lr 3.9e-04 | norm 1.0600 | dt 0.026 | |
| type train | step 12600 | loss 0.0304 0.2744 2.4568 10.6548 | lr 3.9e-04 | norm 1.1370 | dt 0.026 | |
| type train | step 12610 | loss 0.0299 0.2753 2.4713 10.8718 | lr 3.9e-04 | norm 1.1487 | dt 0.026 | |
| type train | step 12620 | loss 0.0297 0.2786 2.4756 10.8026 | lr 3.9e-04 | norm 1.4004 | dt 0.026 | |
| type train | step 12630 | loss 0.0297 0.2798 2.5146 10.9694 | lr 3.9e-04 | norm 1.1313 | dt 0.026 | |
| type train | step 12640 | loss 0.0299 0.2704 2.4117 10.6673 | lr 3.9e-04 | norm 1.2216 | dt 0.026 | |
| type train | step 12650 | loss 0.0307 0.2773 2.4835 11.0084 | lr 3.9e-04 | norm 1.0707 | dt 0.026 | |
| type train | step 12660 | loss 0.0297 0.2672 2.3747 10.3778 | lr 3.9e-04 | norm 1.1676 | dt 0.026 | |
| type train | step 12670 | loss 0.0308 0.2743 2.4808 10.8656 | lr 3.9e-04 | norm 1.1589 | dt 0.026 | |
| type train | step 12680 | loss 0.0300 0.2754 2.5102 11.0184 | lr 3.8e-04 | norm 1.1554 | dt 0.026 | |
| type train | step 12690 | loss 0.0294 0.2797 2.5474 11.0331 | lr 3.8e-04 | norm 1.1821 | dt 0.026 | |
| type train | step 12700 | loss 0.0306 0.2751 2.4739 10.9710 | lr 3.8e-04 | norm 1.3138 | dt 0.026 | |
| type train | step 12710 | loss 0.0301 0.2766 2.5454 11.2708 | lr 3.8e-04 | norm 1.1609 | dt 0.026 | |
| type train | step 12720 | loss 0.0297 0.2726 2.5230 11.0810 | lr 3.8e-04 | norm 1.2912 | dt 0.026 | |
| type train | step 12730 | loss 0.0308 0.2770 2.5203 11.1997 | lr 3.8e-04 | norm 1.1251 | dt 0.026 | |
| type train | step 12740 | loss 0.0287 0.2694 2.4365 10.6203 | lr 3.8e-04 | norm 1.1046 | dt 0.026 | |
| type train | step 12750 | loss 0.0293 0.2741 2.4666 10.8554 | lr 3.8e-04 | norm 1.2952 | dt 0.026 | |
| type train | step 12760 | loss 0.0310 0.2738 2.4494 10.8704 | lr 3.8e-04 | norm 1.5313 | dt 0.026 | |
| type train | step 12770 | loss 0.0301 0.2742 2.5666 11.4682 | lr 3.8e-04 | norm 1.2683 | dt 0.026 | |
| type train | step 12780 | loss 0.0295 0.2747 2.4506 10.5954 | lr 3.8e-04 | norm 1.3810 | dt 0.026 | |
| type train | step 12790 | loss 0.0308 0.2783 2.5186 11.1339 | lr 3.8e-04 | norm 1.3328 | dt 0.026 | |
| type train | step 12800 | loss 0.0288 0.2702 2.4510 10.6604 | lr 3.8e-04 | norm 1.0643 | dt 0.026 | |
| type train | step 12810 | loss 0.0294 0.2739 2.5749 11.2462 | lr 3.8e-04 | norm 1.3627 | dt 0.029 | |
| type train | step 12820 | loss 0.0294 0.2727 2.4778 10.9763 | lr 3.8e-04 | norm 1.1212 | dt 0.026 | |
| type train | step 12830 | loss 0.0299 0.2727 2.5288 11.2590 | lr 3.7e-04 | norm 1.2326 | dt 0.026 | |
| type train | step 12840 | loss 0.0296 0.2667 2.4271 10.6283 | lr 3.7e-04 | norm 1.2257 | dt 0.026 | |
| type train | step 12850 | loss 0.0312 0.2829 2.5437 11.1845 | lr 3.7e-04 | norm 1.4468 | dt 0.026 | |
| type train | step 12860 | loss 0.0288 0.2729 2.4430 10.5714 | lr 3.7e-04 | norm 1.1902 | dt 0.026 | |
| type train | step 12870 | loss 0.0289 0.2687 2.4722 10.9804 | lr 3.7e-04 | norm 1.2283 | dt 0.026 | |
| type train | step 12880 | loss 0.0291 0.2734 2.4388 10.6236 | lr 3.7e-04 | norm 0.9657 | dt 0.026 | |
| type train | step 12890 | loss 0.0307 0.2799 2.6200 11.5662 | lr 3.7e-04 | norm 1.2016 | dt 0.026 | |
| type train | step 12900 | loss 0.0293 0.2694 2.4422 10.7965 | lr 3.7e-04 | norm 1.2465 | dt 0.026 | |
| type train | step 12910 | loss 0.0293 0.2654 2.3857 10.6554 | lr 3.7e-04 | norm 1.1152 | dt 0.027 | |
| type train | step 12920 | loss 0.0296 0.2758 2.4950 10.8109 | lr 3.7e-04 | norm 1.1977 | dt 0.026 | |
| type train | step 12930 | loss 0.0306 0.2775 2.4402 10.6367 | lr 3.7e-04 | norm 1.1694 | dt 0.026 | |
| type train | step 12940 | loss 0.0294 0.2719 2.4200 10.6627 | lr 3.7e-04 | norm 1.0168 | dt 0.026 | |
| type train | step 12950 | loss 0.0320 0.2775 2.4557 10.9386 | lr 3.7e-04 | norm 1.6871 | dt 0.025 | |
| type train | step 12960 | loss 0.0300 0.2760 2.5173 10.9496 | lr 3.7e-04 | norm 1.3656 | dt 0.026 | |
| type train | step 12970 | loss 0.0297 0.2733 2.4784 11.0063 | lr 3.7e-04 | norm 1.2643 | dt 0.026 | |
| type train | step 12980 | loss 0.0286 0.2750 2.4690 10.6915 | lr 3.6e-04 | norm 1.2308 | dt 0.026 | |
| type train | step 12990 | loss 0.0297 0.2737 2.4585 10.8348 | lr 3.6e-04 | norm 1.2226 | dt 0.026 | |
| type train | step 13000 | loss 0.0292 0.2738 2.4749 10.6695 | lr 3.6e-04 | norm 1.3156 | dt 0.026 | |
| type train | step 13010 | loss 0.0317 0.2818 2.5374 11.2594 | lr 3.6e-04 | norm 1.2103 | dt 0.026 | |
| type train | step 13020 | loss 0.0308 0.2764 2.4269 10.8095 | lr 3.6e-04 | norm 1.1924 | dt 0.026 | |
| type train | step 13030 | loss 0.0293 0.2704 2.4811 10.8865 | lr 3.6e-04 | norm 1.1992 | dt 0.026 | |
| type train | step 13040 | loss 0.0285 0.2702 2.4641 10.8465 | lr 3.6e-04 | norm 1.1348 | dt 0.026 | |
| type train | step 13050 | loss 0.0301 0.2829 2.5409 11.0726 | lr 3.6e-04 | norm 1.2824 | dt 0.026 | |
| type train | step 13060 | loss 0.0293 0.2694 2.4027 10.5342 | lr 3.6e-04 | norm 1.0504 | dt 0.026 | |
| type train | step 13070 | loss 0.0297 0.2665 2.4310 10.8617 | lr 3.6e-04 | norm 1.1312 | dt 0.026 | |
| type train | step 13080 | loss 0.0305 0.2726 2.4089 10.6377 | lr 3.6e-04 | norm 1.0321 | dt 0.026 | |
| type train | step 13090 | loss 0.0307 0.2755 2.5346 10.9963 | lr 3.6e-04 | norm 1.4158 | dt 0.026 | |
| type train | step 13100 | loss 0.0292 0.2693 2.4155 10.6842 | lr 3.6e-04 | norm 1.2765 | dt 0.026 | |
| type train | step 13110 | loss 0.0301 0.2809 2.4838 10.8209 | lr 3.6e-04 | norm 1.2621 | dt 0.026 | |
| type train | step 13120 | loss 0.0300 0.2726 2.4590 10.7333 | lr 3.6e-04 | norm 1.2736 | dt 0.026 | |
| type train | step 13130 | loss 0.0299 0.2681 2.4497 10.8658 | lr 3.5e-04 | norm 1.2604 | dt 0.026 | |
| type train | step 13140 | loss 0.0303 0.2733 2.5166 11.1332 | lr 3.5e-04 | norm 1.0038 | dt 0.026 | |
| type train | step 13150 | loss 0.0292 0.2677 2.5010 11.0122 | lr 3.5e-04 | norm 1.1056 | dt 0.026 | |
| type train | step 13160 | loss 0.0293 0.2724 2.4725 10.8985 | lr 3.5e-04 | norm 1.3381 | dt 0.026 | |
| type train | step 13170 | loss 0.0294 0.2650 2.3824 10.5586 | lr 3.5e-04 | norm 1.2588 | dt 0.026 | |
| type train | step 13180 | loss 0.0295 0.2706 2.4096 10.6996 | lr 3.5e-04 | norm 1.2658 | dt 0.026 | |
| type train | step 13190 | loss 0.0311 0.2748 2.4740 10.9751 | lr 3.5e-04 | norm 1.2277 | dt 0.026 | |
| type train | step 13200 | loss 0.0295 0.2678 2.4952 11.0749 | lr 3.5e-04 | norm 1.0704 | dt 0.026 | |
| type train | step 13210 | loss 0.0301 0.2728 2.4519 10.6381 | lr 3.5e-04 | norm 1.1172 | dt 0.026 | |
| type train | step 13220 | loss 0.0297 0.2733 2.4650 10.8462 | lr 3.5e-04 | norm 1.1293 | dt 0.026 | |
| type train | step 13230 | loss 0.0294 0.2771 2.4711 10.7830 | lr 3.5e-04 | norm 1.4027 | dt 0.026 | |
| type train | step 13240 | loss 0.0294 0.2785 2.5064 10.9487 | lr 3.5e-04 | norm 1.1363 | dt 0.026 | |
| type train | step 13250 | loss 0.0296 0.2690 2.4048 10.6391 | lr 3.5e-04 | norm 1.2230 | dt 0.026 | |
| type train | step 13260 | loss 0.0305 0.2762 2.4786 10.9859 | lr 3.5e-04 | norm 1.0723 | dt 0.026 | |
| type train | step 13270 | loss 0.0293 0.2656 2.3684 10.3602 | lr 3.5e-04 | norm 1.1438 | dt 0.026 | |
| type train | step 13280 | loss 0.0306 0.2729 2.4728 10.8482 | lr 3.4e-04 | norm 1.1252 | dt 0.026 | |
| type train | step 13290 | loss 0.0296 0.2740 2.5025 10.9865 | lr 3.4e-04 | norm 1.1512 | dt 0.027 | |
| type train | step 13300 | loss 0.0291 0.2784 2.5390 11.0171 | lr 3.4e-04 | norm 1.1923 | dt 0.026 | |
| type train | step 13310 | loss 0.0303 0.2735 2.4661 10.9363 | lr 3.4e-04 | norm 1.3112 | dt 0.026 | |
| type train | step 13320 | loss 0.0298 0.2748 2.5409 11.2646 | lr 3.4e-04 | norm 1.1548 | dt 0.026 | |
| type train | step 13330 | loss 0.0294 0.2711 2.5174 11.0595 | lr 3.4e-04 | norm 1.2798 | dt 0.026 | |
| type train | step 13340 | loss 0.0306 0.2758 2.5148 11.1874 | lr 3.4e-04 | norm 1.1286 | dt 0.025 | |
| type train | step 13350 | loss 0.0283 0.2681 2.4325 10.5930 | lr 3.4e-04 | norm 1.1014 | dt 0.026 | |
| type train | step 13360 | loss 0.0289 0.2726 2.4609 10.8453 | lr 3.4e-04 | norm 1.2887 | dt 0.026 | |
| type train | step 13370 | loss 0.0309 0.2722 2.4424 10.8503 | lr 3.4e-04 | norm 1.5106 | dt 0.026 | |
| type train | step 13380 | loss 0.0297 0.2725 2.5602 11.4411 | lr 3.4e-04 | norm 1.2281 | dt 0.026 | |
| type train | step 13390 | loss 0.0292 0.2735 2.4442 10.5834 | lr 3.4e-04 | norm 1.3608 | dt 0.026 | |
| type train | step 13400 | loss 0.0304 0.2766 2.5127 11.1149 | lr 3.4e-04 | norm 1.3201 | dt 0.026 | |
| type train | step 13410 | loss 0.0285 0.2686 2.4457 10.6387 | lr 3.4e-04 | norm 1.0575 | dt 0.026 | |
| type train | step 13420 | loss 0.0291 0.2730 2.5691 11.2170 | lr 3.4e-04 | norm 1.3562 | dt 0.027 | |
| type train | step 13430 | loss 0.0290 0.2713 2.4711 10.9563 | lr 3.3e-04 | norm 1.1080 | dt 0.026 | |
| type train | step 13440 | loss 0.0295 0.2718 2.5242 11.2351 | lr 3.3e-04 | norm 1.2283 | dt 0.026 | |
| type train | step 13450 | loss 0.0292 0.2656 2.4196 10.6150 | lr 3.3e-04 | norm 1.2332 | dt 0.026 | |
| type train | step 13460 | loss 0.0309 0.2815 2.5402 11.1661 | lr 3.3e-04 | norm 1.4501 | dt 0.026 | |
| type train | step 13470 | loss 0.0284 0.2717 2.4378 10.5582 | lr 3.3e-04 | norm 1.1802 | dt 0.026 | |
| type train | step 13480 | loss 0.0287 0.2677 2.4660 10.9553 | lr 3.3e-04 | norm 1.1983 | dt 0.027 | |
| type train | step 13490 | loss 0.0288 0.2725 2.4350 10.6138 | lr 3.3e-04 | norm 0.9469 | dt 0.026 | |
| type train | step 13500 | loss 0.0304 0.2783 2.6109 11.5406 | lr 3.3e-04 | norm 1.1683 | dt 0.026 | |
| type train | step 13510 | loss 0.0291 0.2680 2.4379 10.7753 | lr 3.3e-04 | norm 1.2404 | dt 0.026 | |
| type train | step 13520 | loss 0.0289 0.2640 2.3786 10.6338 | lr 3.3e-04 | norm 1.1148 | dt 0.026 | |
| type train | step 13530 | loss 0.0294 0.2746 2.4887 10.7800 | lr 3.3e-04 | norm 1.1768 | dt 0.026 | |
| type train | step 13540 | loss 0.0303 0.2765 2.4366 10.6159 | lr 3.3e-04 | norm 1.1521 | dt 0.026 | |
| type train | step 13550 | loss 0.0289 0.2706 2.4151 10.6415 | lr 3.3e-04 | norm 0.9788 | dt 0.026 | |
| type train | step 13560 | loss 0.0318 0.2763 2.4520 10.9093 | lr 3.3e-04 | norm 1.6807 | dt 0.026 | |
| type train | step 13570 | loss 0.0297 0.2746 2.5111 10.9235 | lr 3.3e-04 | norm 1.3599 | dt 0.026 | |
| type train | step 13580 | loss 0.0295 0.2724 2.4709 10.9843 | lr 3.3e-04 | norm 1.2438 | dt 0.026 | |
| type train | step 13590 | loss 0.0283 0.2741 2.4652 10.6647 | lr 3.2e-04 | norm 1.2307 | dt 0.026 | |
| type train | step 13600 | loss 0.0294 0.2723 2.4532 10.8185 | lr 3.2e-04 | norm 1.1884 | dt 0.026 | |
| type train | step 13610 | loss 0.0288 0.2731 2.4718 10.6490 | lr 3.2e-04 | norm 1.3215 | dt 0.026 | |
| type train | step 13620 | loss 0.0314 0.2803 2.5307 11.2338 | lr 3.2e-04 | norm 1.1865 | dt 0.026 | |
| type train | step 13630 | loss 0.0305 0.2752 2.4221 10.7942 | lr 3.2e-04 | norm 1.1578 | dt 0.026 | |
| type train | step 13640 | loss 0.0289 0.2694 2.4750 10.8650 | lr 3.2e-04 | norm 1.1582 | dt 0.026 | |
| type train | step 13650 | loss 0.0283 0.2696 2.4589 10.8206 | lr 3.2e-04 | norm 1.0989 | dt 0.026 | |
| type train | step 13660 | loss 0.0298 0.2813 2.5339 11.0530 | lr 3.2e-04 | norm 1.2748 | dt 0.026 | |
| type train | step 13670 | loss 0.0290 0.2684 2.3973 10.5127 | lr 3.2e-04 | norm 1.0347 | dt 0.026 | |
| type train | step 13680 | loss 0.0295 0.2652 2.4256 10.8354 | lr 3.2e-04 | norm 1.1079 | dt 0.027 | |
| type train | step 13690 | loss 0.0302 0.2722 2.4040 10.6200 | lr 3.2e-04 | norm 1.0186 | dt 0.026 | |
| type train | step 13700 | loss 0.0304 0.2743 2.5275 10.9742 | lr 3.2e-04 | norm 1.3741 | dt 0.026 | |
| type train | step 13710 | loss 0.0290 0.2683 2.4124 10.6575 | lr 3.2e-04 | norm 1.2569 | dt 0.026 | |
| type train | step 13720 | loss 0.0298 0.2795 2.4781 10.7952 | lr 3.2e-04 | norm 1.2599 | dt 0.026 | |
| type train | step 13730 | loss 0.0299 0.2714 2.4516 10.7179 | lr 3.2e-04 | norm 1.2558 | dt 0.027 | |
| type train | step 13740 | loss 0.0297 0.2673 2.4452 10.8453 | lr 3.2e-04 | norm 1.2803 | dt 0.026 | |
| type train | step 13750 | loss 0.0299 0.2729 2.5113 11.1156 | lr 3.1e-04 | norm 1.0260 | dt 0.026 | |
| type train | step 13760 | loss 0.0289 0.2670 2.4942 10.9832 | lr 3.1e-04 | norm 1.0943 | dt 0.026 | |
| type train | step 13770 | loss 0.0291 0.2716 2.4677 10.8728 | lr 3.1e-04 | norm 1.3261 | dt 0.026 | |
| type train | step 13780 | loss 0.0291 0.2642 2.3766 10.5374 | lr 3.1e-04 | norm 1.2446 | dt 0.026 | |
| type train | step 13790 | loss 0.0292 0.2699 2.4021 10.6847 | lr 3.1e-04 | norm 1.2520 | dt 0.026 | |
| type train | step 13800 | loss 0.0306 0.2735 2.4685 10.9652 | lr 3.1e-04 | norm 1.2198 | dt 0.026 | |
| type train | step 13810 | loss 0.0292 0.2670 2.4913 11.0559 | lr 3.1e-04 | norm 1.0748 | dt 0.027 | |
| type train | step 13820 | loss 0.0299 0.2724 2.4464 10.6205 | lr 3.1e-04 | norm 1.1277 | dt 0.026 | |
| type train | step 13830 | loss 0.0293 0.2720 2.4595 10.8256 | lr 3.1e-04 | norm 1.0897 | dt 0.026 | |
| type train | step 13840 | loss 0.0292 0.2762 2.4651 10.7531 | lr 3.1e-04 | norm 1.3922 | dt 0.026 | |
| type train | step 13850 | loss 0.0292 0.2774 2.4998 10.9294 | lr 3.1e-04 | norm 1.1107 | dt 0.026 | |
| type train | step 13860 | loss 0.0294 0.2680 2.4012 10.6157 | lr 3.1e-04 | norm 1.2169 | dt 0.026 | |
| type train | step 13870 | loss 0.0301 0.2747 2.4734 10.9676 | lr 3.1e-04 | norm 1.0371 | dt 0.026 | |
| type train | step 13880 | loss 0.0290 0.2646 2.3648 10.3355 | lr 3.1e-04 | norm 1.1344 | dt 0.026 | |
| type train | step 13890 | loss 0.0303 0.2718 2.4659 10.8354 | lr 3.1e-04 | norm 1.1101 | dt 0.026 | |
| type train | step 13900 | loss 0.0294 0.2732 2.4968 10.9592 | lr 3.1e-04 | norm 1.1299 | dt 0.026 | |
| type train | step 13910 | loss 0.0289 0.2774 2.5321 11.0037 | lr 3.0e-04 | norm 1.1792 | dt 0.026 | |
| type train | step 13920 | loss 0.0300 0.2727 2.4599 10.9107 | lr 3.0e-04 | norm 1.2901 | dt 0.026 | |
| type train | step 13930 | loss 0.0296 0.2734 2.5351 11.2478 | lr 3.0e-04 | norm 1.1183 | dt 0.026 | |
| type train | step 13940 | loss 0.0291 0.2706 2.5122 11.0349 | lr 3.0e-04 | norm 1.2695 | dt 0.026 | |
| type train | step 13950 | loss 0.0303 0.2749 2.5084 11.1697 | lr 3.0e-04 | norm 1.1280 | dt 0.026 | |
| type train | step 13960 | loss 0.0281 0.2675 2.4291 10.5768 | lr 3.0e-04 | norm 1.0934 | dt 0.026 | |
| type train | step 13970 | loss 0.0288 0.2713 2.4555 10.8236 | lr 3.0e-04 | norm 1.2560 | dt 0.026 | |
| type train | step 13980 | loss 0.0306 0.2713 2.4366 10.8322 | lr 3.0e-04 | norm 1.4906 | dt 0.026 | |
| type train | step 13990 | loss 0.0294 0.2715 2.5578 11.4161 | lr 3.0e-04 | norm 1.2167 | dt 0.026 | |
| type train | step 14000 | loss 0.0289 0.2725 2.4404 10.5596 | lr 3.0e-04 | norm 1.3681 | dt 0.026 | |
| type train | step 14010 | loss 0.0300 0.2758 2.5095 11.0940 | lr 3.0e-04 | norm 1.3292 | dt 0.026 | |
| type train | step 14020 | loss 0.0282 0.2678 2.4420 10.6196 | lr 3.0e-04 | norm 1.0561 | dt 0.026 | |
| type train | step 14030 | loss 0.0289 0.2716 2.5660 11.2019 | lr 3.0e-04 | norm 1.3316 | dt 0.029 | |
| type train | step 14040 | loss 0.0287 0.2706 2.4638 10.9367 | lr 3.0e-04 | norm 1.0627 | dt 0.027 | |
| type train | step 14050 | loss 0.0292 0.2710 2.5173 11.2091 | lr 3.0e-04 | norm 1.2105 | dt 0.026 | |
| type train | step 14060 | loss 0.0289 0.2649 2.4142 10.5900 | lr 3.0e-04 | norm 1.2097 | dt 0.026 | |
| type train | step 14070 | loss 0.0307 0.2805 2.5342 11.1457 | lr 2.9e-04 | norm 1.4361 | dt 0.026 | |
| type train | step 14080 | loss 0.0282 0.2707 2.4328 10.5296 | lr 2.9e-04 | norm 1.1574 | dt 0.026 | |
| type train | step 14090 | loss 0.0283 0.2669 2.4622 10.9300 | lr 2.9e-04 | norm 1.1849 | dt 0.026 | |
| type train | step 14100 | loss 0.0285 0.2716 2.4335 10.5989 | lr 2.9e-04 | norm 0.9482 | dt 0.026 | |
| type train | step 14110 | loss 0.0303 0.2781 2.6057 11.5183 | lr 2.9e-04 | norm 1.1880 | dt 0.026 | |
| type train | step 14120 | loss 0.0287 0.2670 2.4353 10.7535 | lr 2.9e-04 | norm 1.2337 | dt 0.026 | |
| type train | step 14130 | loss 0.0287 0.2630 2.3749 10.6141 | lr 2.9e-04 | norm 1.0897 | dt 0.026 | |
| type train | step 14140 | loss 0.0290 0.2737 2.4860 10.7535 | lr 2.9e-04 | norm 1.1562 | dt 0.026 | |
| type train | step 14150 | loss 0.0300 0.2752 2.4326 10.5982 | lr 2.9e-04 | norm 1.1386 | dt 0.026 | |
| type train | step 14160 | loss 0.0287 0.2696 2.4121 10.6273 | lr 2.9e-04 | norm 0.9732 | dt 0.026 | |
| type train | step 14170 | loss 0.0315 0.2755 2.4471 10.8814 | lr 2.9e-04 | norm 1.6685 | dt 0.026 | |
| type train | step 14180 | loss 0.0295 0.2737 2.5068 10.8995 | lr 2.9e-04 | norm 1.3652 | dt 0.026 | |
| type train | step 14190 | loss 0.0292 0.2715 2.4658 10.9551 | lr 2.9e-04 | norm 1.2368 | dt 0.026 | |
| type train | step 14200 | loss 0.0280 0.2727 2.4610 10.6412 | lr 2.9e-04 | norm 1.2005 | dt 0.026 | |
| type train | step 14210 | loss 0.0290 0.2718 2.4481 10.7998 | lr 2.9e-04 | norm 1.1956 | dt 0.026 | |
| type train | step 14220 | loss 0.0287 0.2721 2.4682 10.6340 | lr 2.9e-04 | norm 1.3133 | dt 0.026 | |
| type train | step 14230 | loss 0.0311 0.2793 2.5242 11.2006 | lr 2.9e-04 | norm 1.1662 | dt 0.026 | |
| type train | step 14240 | loss 0.0302 0.2748 2.4187 10.7705 | lr 2.8e-04 | norm 1.1370 | dt 0.026 | |
| type train | step 14250 | loss 0.0286 0.2684 2.4696 10.8475 | lr 2.8e-04 | norm 1.1488 | dt 0.026 | |
| type train | step 14260 | loss 0.0279 0.2683 2.4547 10.7997 | lr 2.8e-04 | norm 1.0741 | dt 0.026 | |
| type train | step 14270 | loss 0.0295 0.2806 2.5299 11.0180 | lr 2.8e-04 | norm 1.2468 | dt 0.026 | |
| type train | step 14280 | loss 0.0287 0.2681 2.3923 10.4971 | lr 2.8e-04 | norm 1.0319 | dt 0.026 | |
| type train | step 14290 | loss 0.0292 0.2644 2.4221 10.8069 | lr 2.8e-04 | norm 1.1118 | dt 0.026 | |
| type train | step 14300 | loss 0.0300 0.2715 2.4005 10.5998 | lr 2.8e-04 | norm 1.0111 | dt 0.026 | |
| type train | step 14310 | loss 0.0301 0.2739 2.5239 10.9569 | lr 2.8e-04 | norm 1.3740 | dt 0.026 | |
| type train | step 14320 | loss 0.0287 0.2676 2.4093 10.6297 | lr 2.8e-04 | norm 1.2559 | dt 0.026 | |
| type train | step 14330 | loss 0.0296 0.2789 2.4727 10.7723 | lr 2.8e-04 | norm 1.2439 | dt 0.026 | |
| type train | step 14340 | loss 0.0296 0.2707 2.4471 10.7077 | lr 2.8e-04 | norm 1.2573 | dt 0.026 | |
| type train | step 14350 | loss 0.0293 0.2662 2.4428 10.8215 | lr 2.8e-04 | norm 1.2871 | dt 0.026 | |
| type train | step 14360 | loss 0.0297 0.2719 2.5068 11.0864 | lr 2.8e-04 | norm 1.0132 | dt 0.026 | |
| type train | step 14370 | loss 0.0287 0.2660 2.4910 10.9572 | lr 2.8e-04 | norm 1.0767 | dt 0.026 | |
| type train | step 14380 | loss 0.0288 0.2706 2.4632 10.8568 | lr 2.8e-04 | norm 1.3019 | dt 0.026 | |
| type train | step 14390 | loss 0.0288 0.2631 2.3704 10.5199 | lr 2.8e-04 | norm 1.2199 | dt 0.026 | |
| type train | step 14400 | loss 0.0289 0.2688 2.3949 10.6735 | lr 2.8e-04 | norm 1.2358 | dt 0.026 | |
| type train | step 14410 | loss 0.0305 0.2728 2.4621 10.9450 | lr 2.7e-04 | norm 1.2091 | dt 0.027 | |
| type train | step 14420 | loss 0.0289 0.2663 2.4867 11.0393 | lr 2.7e-04 | norm 1.0662 | dt 0.026 | |
| type train | step 14430 | loss 0.0296 0.2713 2.4419 10.5972 | lr 2.7e-04 | norm 1.1125 | dt 0.026 | |
| type train | step 14440 | loss 0.0290 0.2716 2.4567 10.8063 | lr 2.7e-04 | norm 1.0754 | dt 0.027 | |
| type train | step 14450 | loss 0.0290 0.2750 2.4597 10.7215 | lr 2.7e-04 | norm 1.3981 | dt 0.026 | |
| type train | step 14460 | loss 0.0289 0.2763 2.4957 10.9056 | lr 2.7e-04 | norm 1.0990 | dt 0.026 | |
| type train | step 14470 | loss 0.0293 0.2674 2.3984 10.5994 | lr 2.7e-04 | norm 1.2253 | dt 0.026 | |
| type train | step 14480 | loss 0.0299 0.2739 2.4704 10.9419 | lr 2.7e-04 | norm 1.0329 | dt 0.027 | |
| type train | step 14490 | loss 0.0288 0.2640 2.3596 10.3144 | lr 2.7e-04 | norm 1.1257 | dt 0.026 | |
| type train | step 14500 | loss 0.0300 0.2711 2.4629 10.8133 | lr 2.7e-04 | norm 1.0937 | dt 0.026 | |
| type train | step 14510 | loss 0.0292 0.2722 2.4918 10.9358 | lr 2.7e-04 | norm 1.1193 | dt 0.026 | |
| type train | step 14520 | loss 0.0285 0.2765 2.5292 10.9948 | lr 2.7e-04 | norm 1.1787 | dt 0.026 | |
| type train | step 14530 | loss 0.0297 0.2722 2.4554 10.8923 | lr 2.7e-04 | norm 1.2804 | dt 0.026 | |
| type train | step 14540 | loss 0.0292 0.2726 2.5314 11.2313 | lr 2.7e-04 | norm 1.1025 | dt 0.026 | |
| type train | step 14550 | loss 0.0288 0.2698 2.5069 11.0183 | lr 2.7e-04 | norm 1.2557 | dt 0.026 | |
| type train | step 14560 | loss 0.0301 0.2740 2.5060 11.1540 | lr 2.7e-04 | norm 1.1334 | dt 0.026 | |
| type train | step 14570 | loss 0.0278 0.2668 2.4262 10.5631 | lr 2.7e-04 | norm 1.0847 | dt 0.026 | |
| type train | step 14580 | loss 0.0285 0.2707 2.4522 10.8083 | lr 2.6e-04 | norm 1.2343 | dt 0.026 | |
| type train | step 14590 | loss 0.0302 0.2705 2.4315 10.8143 | lr 2.6e-04 | norm 1.4617 | dt 0.026 | |
| type train | step 14600 | loss 0.0291 0.2705 2.5524 11.3992 | lr 2.6e-04 | norm 1.2036 | dt 0.026 | |
| type train | step 14610 | loss 0.0288 0.2717 2.4375 10.5396 | lr 2.6e-04 | norm 1.3749 | dt 0.027 | |
| type train | step 14620 | loss 0.0299 0.2748 2.5076 11.0797 | lr 2.6e-04 | norm 1.3171 | dt 0.026 | |
| type train | step 14630 | loss 0.0280 0.2670 2.4375 10.6020 | lr 2.6e-04 | norm 1.0578 | dt 0.026 | |
| type train | step 14640 | loss 0.0286 0.2711 2.5606 11.1850 | lr 2.6e-04 | norm 1.3178 | dt 0.027 | |
| type train | step 14650 | loss 0.0285 0.2698 2.4588 10.9259 | lr 2.6e-04 | norm 1.0434 | dt 0.026 | |
| type train | step 14660 | loss 0.0290 0.2696 2.5147 11.1929 | lr 2.6e-04 | norm 1.1727 | dt 0.026 | |
| type train | step 14670 | loss 0.0287 0.2642 2.4091 10.5767 | lr 2.6e-04 | norm 1.1920 | dt 0.026 | |
| type train | step 14680 | loss 0.0306 0.2798 2.5303 11.1310 | lr 2.6e-04 | norm 1.4318 | dt 0.026 | |
| type train | step 14690 | loss 0.0279 0.2699 2.4289 10.5150 | lr 2.6e-04 | norm 1.1574 | dt 0.026 | |
| type train | step 14700 | loss 0.0281 0.2656 2.4587 10.9089 | lr 2.6e-04 | norm 1.1462 | dt 0.026 | |
| type train | step 14710 | loss 0.0283 0.2709 2.4296 10.5844 | lr 2.6e-04 | norm 0.9247 | dt 0.026 | |
| type train | step 14720 | loss 0.0299 0.2768 2.6014 11.4924 | lr 2.6e-04 | norm 1.1473 | dt 0.026 | |
| type train | step 14730 | loss 0.0285 0.2665 2.4306 10.7382 | lr 2.6e-04 | norm 1.2182 | dt 0.026 | |
| type train | step 14740 | loss 0.0285 0.2621 2.3708 10.5884 | lr 2.6e-04 | norm 1.0830 | dt 0.026 | |
| type train | step 14750 | loss 0.0289 0.2732 2.4813 10.7326 | lr 2.6e-04 | norm 1.1641 | dt 0.026 | |
| type train | step 14760 | loss 0.0298 0.2748 2.4294 10.5770 | lr 2.5e-04 | norm 1.1311 | dt 0.026 | |
| type train | step 14770 | loss 0.0286 0.2690 2.4092 10.6131 | lr 2.5e-04 | norm 0.9863 | dt 0.026 | |
| type train | step 14780 | loss 0.0313 0.2748 2.4428 10.8548 | lr 2.5e-04 | norm 1.6735 | dt 0.026 | |
| type train | step 14790 | loss 0.0292 0.2724 2.5020 10.8864 | lr 2.5e-04 | norm 1.3202 | dt 0.026 | |
| type train | step 14800 | loss 0.0289 0.2708 2.4601 10.9332 | lr 2.5e-04 | norm 1.2076 | dt 0.026 | |
| type train | step 14810 | loss 0.0278 0.2721 2.4561 10.6228 | lr 2.5e-04 | norm 1.1953 | dt 0.026 | |
| type train | step 14820 | loss 0.0289 0.2711 2.4436 10.7812 | lr 2.5e-04 | norm 1.1919 | dt 0.026 | |
| type train | step 14830 | loss 0.0285 0.2711 2.4661 10.6170 | lr 2.5e-04 | norm 1.3039 | dt 0.026 | |
| type train | step 14840 | loss 0.0309 0.2779 2.5198 11.1807 | lr 2.5e-04 | norm 1.1632 | dt 0.026 | |
| type train | step 14850 | loss 0.0300 0.2743 2.4152 10.7485 | lr 2.5e-04 | norm 1.1240 | dt 0.026 | |
| type train | step 14860 | loss 0.0284 0.2679 2.4653 10.8305 | lr 2.5e-04 | norm 1.1398 | dt 0.026 | |
| type train | step 14870 | loss 0.0278 0.2677 2.4499 10.7872 | lr 2.5e-04 | norm 1.0771 | dt 0.026 | |
| type train | step 14880 | loss 0.0294 0.2796 2.5247 10.9966 | lr 2.5e-04 | norm 1.2398 | dt 0.025 | |
| type train | step 14890 | loss 0.0284 0.2671 2.3872 10.4879 | lr 2.5e-04 | norm 1.0079 | dt 0.026 | |
| type train | step 14900 | loss 0.0290 0.2634 2.4177 10.7906 | lr 2.5e-04 | norm 1.0964 | dt 0.026 | |
| type train | step 14910 | loss 0.0298 0.2707 2.3969 10.5850 | lr 2.5e-04 | norm 0.9937 | dt 0.026 | |
| type train | step 14920 | loss 0.0298 0.2724 2.5196 10.9373 | lr 2.5e-04 | norm 1.3417 | dt 0.026 | |
| type train | step 14930 | loss 0.0284 0.2666 2.4043 10.6143 | lr 2.5e-04 | norm 1.2303 | dt 0.026 | |
| type train | step 14940 | loss 0.0293 0.2775 2.4699 10.7581 | lr 2.4e-04 | norm 1.2362 | dt 0.026 | |
| type train | step 14950 | loss 0.0293 0.2700 2.4431 10.6849 | lr 2.4e-04 | norm 1.2455 | dt 0.027 | |
| type train | step 14960 | loss 0.0291 0.2652 2.4388 10.8084 | lr 2.4e-04 | norm 1.2757 | dt 0.026 | |
| type train | step 14970 | loss 0.0294 0.2711 2.5045 11.0614 | lr 2.4e-04 | norm 1.0087 | dt 0.026 | |
| type train | step 14980 | loss 0.0285 0.2652 2.4883 10.9400 | lr 2.4e-04 | norm 1.0716 | dt 0.026 | |
| type train | step 14990 | loss 0.0286 0.2700 2.4598 10.8417 | lr 2.4e-04 | norm 1.2907 | dt 0.026 | |
| type train | step 15000 | loss 0.0286 0.2625 2.3649 10.5053 | lr 2.4e-04 | norm 1.2158 | dt 0.026 | |
| type train | step 15010 | loss 0.0287 0.2683 2.3898 10.6555 | lr 2.4e-04 | norm 1.2279 | dt 0.026 | |
| type train | step 15020 | loss 0.0303 0.2719 2.4583 10.9287 | lr 2.4e-04 | norm 1.2161 | dt 0.026 | |
| type train | step 15030 | loss 0.0287 0.2652 2.4850 11.0197 | lr 2.4e-04 | norm 1.0648 | dt 0.026 | |
| type train | step 15040 | loss 0.0293 0.2705 2.4386 10.5932 | lr 2.4e-04 | norm 1.1129 | dt 0.026 | |
| type train | step 15050 | loss 0.0289 0.2707 2.4534 10.7926 | lr 2.4e-04 | norm 1.0704 | dt 0.026 | |
| type train | step 15060 | loss 0.0287 0.2744 2.4539 10.7000 | lr 2.4e-04 | norm 1.3625 | dt 0.026 | |
| type train | step 15070 | loss 0.0286 0.2755 2.4916 10.8891 | lr 2.4e-04 | norm 1.0735 | dt 0.026 | |
| type train | step 15080 | loss 0.0289 0.2670 2.3968 10.5890 | lr 2.4e-04 | norm 1.2420 | dt 0.026 | |
| type train | step 15090 | loss 0.0295 0.2726 2.4680 10.9220 | lr 2.4e-04 | norm 1.0206 | dt 0.026 | |
| type train | step 15100 | loss 0.0286 0.2632 2.3567 10.2976 | lr 2.4e-04 | norm 1.1188 | dt 0.026 | |
| type train | step 15110 | loss 0.0298 0.2702 2.4597 10.8010 | lr 2.4e-04 | norm 1.0863 | dt 0.026 | |
| type train | step 15120 | loss 0.0290 0.2715 2.4872 10.9119 | lr 2.4e-04 | norm 1.0902 | dt 0.026 | |
| type train | step 15130 | loss 0.0283 0.2756 2.5244 10.9846 | lr 2.3e-04 | norm 1.1732 | dt 0.026 | |
| type train | step 15140 | loss 0.0295 0.2713 2.4499 10.8791 | lr 2.3e-04 | norm 1.2691 | dt 0.026 | |
| type train | step 15150 | loss 0.0290 0.2716 2.5281 11.2185 | lr 2.3e-04 | norm 1.0985 | dt 0.026 | |
| type train | step 15160 | loss 0.0288 0.2687 2.5028 11.0031 | lr 2.3e-04 | norm 1.2651 | dt 0.026 | |
| type train | step 15170 | loss 0.0298 0.2729 2.5043 11.1369 | lr 2.3e-04 | norm 1.1344 | dt 0.026 | |
| type train | step 15180 | loss 0.0275 0.2656 2.4229 10.5441 | lr 2.3e-04 | norm 1.0856 | dt 0.026 | |
| type train | step 15190 | loss 0.0281 0.2696 2.4485 10.7968 | lr 2.3e-04 | norm 1.2251 | dt 0.026 | |
| type train | step 15200 | loss 0.0301 0.2699 2.4275 10.7938 | lr 2.3e-04 | norm 1.4691 | dt 0.026 | |
| type train | step 15210 | loss 0.0289 0.2693 2.5489 11.3852 | lr 2.3e-04 | norm 1.1871 | dt 0.026 | |
| type train | step 15220 | loss 0.0285 0.2705 2.4357 10.5235 | lr 2.3e-04 | norm 1.3776 | dt 0.026 | |
| type train | step 15230 | loss 0.0297 0.2739 2.5038 11.0599 | lr 2.3e-04 | norm 1.3171 | dt 0.026 | |
| type train | step 15240 | loss 0.0278 0.2659 2.4345 10.5863 | lr 2.3e-04 | norm 1.0763 | dt 0.026 | |
| type train | step 15250 | loss 0.0285 0.2700 2.5570 11.1625 | lr 2.3e-04 | norm 1.3032 | dt 0.028 | |
| type train | step 15260 | loss 0.0283 0.2690 2.4555 10.9093 | lr 2.3e-04 | norm 1.0316 | dt 0.026 | |
| type train | step 15270 | loss 0.0287 0.2687 2.5114 11.1794 | lr 2.3e-04 | norm 1.1547 | dt 0.026 | |
| type train | step 15280 | loss 0.0284 0.2633 2.4064 10.5610 | lr 2.3e-04 | norm 1.1940 | dt 0.026 | |
| type train | step 15290 | loss 0.0303 0.2788 2.5264 11.1187 | lr 2.3e-04 | norm 1.4316 | dt 0.026 | |
| type train | step 15300 | loss 0.0278 0.2686 2.4250 10.4972 | lr 2.3e-04 | norm 1.1442 | dt 0.026 | |
| type train | step 15310 | loss 0.0280 0.2646 2.4550 10.8946 | lr 2.3e-04 | norm 1.1370 | dt 0.026 | |
| type train | step 15320 | loss 0.0281 0.2701 2.4265 10.5727 | lr 2.2e-04 | norm 0.9059 | dt 0.026 | |
| type train | step 15330 | loss 0.0298 0.2754 2.5981 11.4750 | lr 2.2e-04 | norm 1.1485 | dt 0.026 | |
| type train | step 15340 | loss 0.0282 0.2651 2.4273 10.7222 | lr 2.2e-04 | norm 1.2189 | dt 0.026 | |
| type train | step 15350 | loss 0.0282 0.2612 2.3682 10.5765 | lr 2.2e-04 | norm 1.0881 | dt 0.026 | |
| type train | step 15360 | loss 0.0287 0.2721 2.4772 10.7174 | lr 2.2e-04 | norm 1.1416 | dt 0.026 | |
| type train | step 15370 | loss 0.0295 0.2737 2.4251 10.5588 | lr 2.2e-04 | norm 1.1203 | dt 0.026 | |
| type train | step 15380 | loss 0.0283 0.2682 2.4072 10.6014 | lr 2.2e-04 | norm 0.9832 | dt 0.026 | |
| type train | step 15390 | loss 0.0310 0.2734 2.4397 10.8404 | lr 2.2e-04 | norm 1.6578 | dt 0.026 | |
| type train | step 15400 | loss 0.0289 0.2714 2.4994 10.8778 | lr 2.2e-04 | norm 1.3256 | dt 0.026 | |
| type train | step 15410 | loss 0.0287 0.2700 2.4554 10.9123 | lr 2.2e-04 | norm 1.1992 | dt 0.026 | |
| type train | step 15420 | loss 0.0277 0.2710 2.4529 10.6031 | lr 2.2e-04 | norm 1.1931 | dt 0.026 | |
| type train | step 15430 | loss 0.0286 0.2701 2.4404 10.7686 | lr 2.2e-04 | norm 1.1737 | dt 0.026 | |
| type train | step 15440 | loss 0.0282 0.2702 2.4647 10.6081 | lr 2.2e-04 | norm 1.3099 | dt 0.026 | |
| type train | step 15450 | loss 0.0306 0.2770 2.5171 11.1610 | lr 2.2e-04 | norm 1.1651 | dt 0.026 | |
| type train | step 15460 | loss 0.0298 0.2731 2.4130 10.7315 | lr 2.2e-04 | norm 1.1278 | dt 0.026 | |
| type train | step 15470 | loss 0.0281 0.2667 2.4620 10.8117 | lr 2.2e-04 | norm 1.1176 | dt 0.026 | |
| type train | step 15480 | loss 0.0275 0.2664 2.4471 10.7670 | lr 2.2e-04 | norm 1.0582 | dt 0.026 | |
| type train | step 15490 | loss 0.0292 0.2785 2.5196 10.9798 | lr 2.2e-04 | norm 1.2358 | dt 0.026 | |
| type train | step 15500 | loss 0.0282 0.2662 2.3834 10.4795 | lr 2.2e-04 | norm 0.9992 | dt 0.026 | |
| type train | step 15510 | loss 0.0288 0.2625 2.4147 10.7701 | lr 2.2e-04 | norm 1.0630 | dt 0.026 | |
| type train | step 15520 | loss 0.0297 0.2695 2.3947 10.5698 | lr 2.2e-04 | norm 0.9838 | dt 0.025 | |
| type train | step 15530 | loss 0.0297 0.2713 2.5147 10.9230 | lr 2.1e-04 | norm 1.3166 | dt 0.026 | |
| type train | step 15540 | loss 0.0282 0.2658 2.4010 10.5991 | lr 2.1e-04 | norm 1.2169 | dt 0.026 | |
| type train | step 15550 | loss 0.0291 0.2769 2.4674 10.7441 | lr 2.1e-04 | norm 1.2540 | dt 0.026 | |
| type train | step 15560 | loss 0.0291 0.2690 2.4386 10.6666 | lr 2.1e-04 | norm 1.2485 | dt 0.026 | |
| type train | step 15570 | loss 0.0291 0.2643 2.4346 10.7906 | lr 2.1e-04 | norm 1.2873 | dt 0.027 | |
| type train | step 15580 | loss 0.0292 0.2702 2.5012 11.0468 | lr 2.1e-04 | norm 1.0202 | dt 0.026 | |
| type train | step 15590 | loss 0.0283 0.2641 2.4861 10.9249 | lr 2.1e-04 | norm 1.0572 | dt 0.026 | |
| type train | step 15600 | loss 0.0285 0.2691 2.4568 10.8251 | lr 2.1e-04 | norm 1.2830 | dt 0.026 | |
| type train | step 15610 | loss 0.0283 0.2616 2.3609 10.4917 | lr 2.1e-04 | norm 1.2245 | dt 0.026 | |
| type train | step 15620 | loss 0.0284 0.2675 2.3854 10.6383 | lr 2.1e-04 | norm 1.2255 | dt 0.026 | |
| type train | step 15630 | loss 0.0300 0.2711 2.4538 10.9145 | lr 2.1e-04 | norm 1.1990 | dt 0.026 | |
| type train | step 15640 | loss 0.0285 0.2643 2.4823 11.0079 | lr 2.1e-04 | norm 1.0702 | dt 0.026 | |
| type train | step 15650 | loss 0.0292 0.2700 2.4368 10.5823 | lr 2.1e-04 | norm 1.1352 | dt 0.026 | |
| type train | step 15660 | loss 0.0287 0.2697 2.4496 10.7837 | lr 2.1e-04 | norm 1.0446 | dt 0.026 | |
| type train | step 15670 | loss 0.0286 0.2734 2.4518 10.6809 | lr 2.1e-04 | norm 1.3638 | dt 0.026 | |
| type train | step 15680 | loss 0.0285 0.2746 2.4895 10.8775 | lr 2.1e-04 | norm 1.0705 | dt 0.026 | |
| type train | step 15690 | loss 0.0288 0.2658 2.3924 10.5773 | lr 2.1e-04 | norm 1.2330 | dt 0.026 | |
| type train | step 15700 | loss 0.0293 0.2717 2.4661 10.9129 | lr 2.1e-04 | norm 0.9905 | dt 0.026 | |
| type train | step 15710 | loss 0.0284 0.2624 2.3546 10.2886 | lr 2.1e-04 | norm 1.1343 | dt 0.026 | |
| type train | step 15720 | loss 0.0296 0.2694 2.4577 10.7874 | lr 2.1e-04 | norm 1.0785 | dt 0.026 | |
| type train | step 15730 | loss 0.0287 0.2708 2.4821 10.8996 | lr 2.0e-04 | norm 1.0909 | dt 0.026 | |
| type train | step 15740 | loss 0.0281 0.2746 2.5211 10.9679 | lr 2.0e-04 | norm 1.1526 | dt 0.026 | |
| type train | step 15750 | loss 0.0292 0.2705 2.4463 10.8694 | lr 2.0e-04 | norm 1.2724 | dt 0.026 | |
| type train | step 15760 | loss 0.0289 0.2708 2.5267 11.2055 | lr 2.0e-04 | norm 1.1005 | dt 0.026 | |
| type train | step 15770 | loss 0.0285 0.2681 2.5016 10.9940 | lr 2.0e-04 | norm 1.2851 | dt 0.026 | |
| type train | step 15780 | loss 0.0296 0.2722 2.4999 11.1189 | lr 2.0e-04 | norm 1.1419 | dt 0.026 | |
| type train | step 15790 | loss 0.0274 0.2650 2.4191 10.5334 | lr 2.0e-04 | norm 1.0677 | dt 0.026 | |
| type train | step 15800 | loss 0.0280 0.2690 2.4452 10.7852 | lr 2.0e-04 | norm 1.2190 | dt 0.026 | |
| type train | step 15810 | loss 0.0298 0.2697 2.4235 10.7839 | lr 2.0e-04 | norm 1.4594 | dt 0.026 | |
| type train | step 15820 | loss 0.0286 0.2683 2.5466 11.3732 | lr 2.0e-04 | norm 1.1688 | dt 0.026 | |
| type train | step 15830 | loss 0.0283 0.2704 2.4332 10.5122 | lr 2.0e-04 | norm 1.3882 | dt 0.027 | |
| type train | step 15840 | loss 0.0294 0.2731 2.5019 11.0487 | lr 2.0e-04 | norm 1.3084 | dt 0.026 | |
| type train | step 15850 | loss 0.0275 0.2652 2.4320 10.5782 | lr 2.0e-04 | norm 1.0441 | dt 0.026 | |
| type train | step 15860 | loss 0.0283 0.2695 2.5540 11.1533 | lr 2.0e-04 | norm 1.3086 | dt 0.027 | |
| type train | step 15870 | loss 0.0281 0.2687 2.4531 10.8890 | lr 2.0e-04 | norm 1.0477 | dt 0.026 | |
| type train | step 15880 | loss 0.0286 0.2682 2.5076 11.1659 | lr 2.0e-04 | norm 1.1477 | dt 0.026 | |
| type train | step 15890 | loss 0.0283 0.2629 2.4048 10.5447 | lr 2.0e-04 | norm 1.1817 | dt 0.026 | |
| type train | step 15900 | loss 0.0300 0.2782 2.5224 11.1024 | lr 2.0e-04 | norm 1.4115 | dt 0.026 | |
| type train | step 15910 | loss 0.0274 0.2684 2.4216 10.4898 | lr 2.0e-04 | norm 1.1174 | dt 0.026 | |
| type train | step 15920 | loss 0.0276 0.2640 2.4531 10.8871 | lr 2.0e-04 | norm 1.1027 | dt 0.026 | |
| type train | step 15930 | loss 0.0279 0.2697 2.4249 10.5633 | lr 2.0e-04 | norm 0.9016 | dt 0.026 | |
| type train | step 15940 | loss 0.0295 0.2744 2.5950 11.4545 | lr 2.0e-04 | norm 1.1260 | dt 0.026 | |
| type train | step 15950 | loss 0.0281 0.2648 2.4261 10.7097 | lr 1.9e-04 | norm 1.2203 | dt 0.026 | |
| type train | step 15960 | loss 0.0280 0.2605 2.3641 10.5635 | lr 1.9e-04 | norm 1.0431 | dt 0.026 | |
| type train | step 15970 | loss 0.0286 0.2712 2.4751 10.7042 | lr 1.9e-04 | norm 1.1322 | dt 0.026 | |
| type train | step 15980 | loss 0.0294 0.2732 2.4231 10.5459 | lr 1.9e-04 | norm 1.1173 | dt 0.026 | |
| type train | step 15990 | loss 0.0281 0.2678 2.4060 10.5996 | lr 1.9e-04 | norm 0.9768 | dt 0.026 | |
| type train | step 16000 | loss 0.0309 0.2733 2.4384 10.8290 | lr 1.9e-04 | norm 1.6776 | dt 0.026 | |
| type train | step 16010 | loss 0.0287 0.2707 2.4979 10.8642 | lr 1.9e-04 | norm 1.3140 | dt 0.026 | |
| type train | step 16020 | loss 0.0285 0.2692 2.4516 10.8927 | lr 1.9e-04 | norm 1.1752 | dt 0.026 | |
| type train | step 16030 | loss 0.0274 0.2707 2.4502 10.5869 | lr 1.9e-04 | norm 1.1640 | dt 0.026 | |
| type train | step 16040 | loss 0.0284 0.2696 2.4382 10.7589 | lr 1.9e-04 | norm 1.1698 | dt 0.026 | |
| type train | step 16050 | loss 0.0282 0.2696 2.4621 10.5954 | lr 1.9e-04 | norm 1.3210 | dt 0.026 | |
| type train | step 16060 | loss 0.0303 0.2765 2.5140 11.1505 | lr 1.9e-04 | norm 1.1713 | dt 0.026 | |
| type train | step 16070 | loss 0.0296 0.2728 2.4116 10.7212 | lr 1.9e-04 | norm 1.1104 | dt 0.026 | |
| type train | step 16080 | loss 0.0280 0.2662 2.4603 10.7912 | lr 1.9e-04 | norm 1.1001 | dt 0.026 | |
| type train | step 16090 | loss 0.0273 0.2661 2.4428 10.7516 | lr 1.9e-04 | norm 1.0644 | dt 0.026 | |
| type train | step 16100 | loss 0.0289 0.2778 2.5183 10.9624 | lr 1.9e-04 | norm 1.2173 | dt 0.026 | |
| type train | step 16110 | loss 0.0281 0.2655 2.3819 10.4671 | lr 1.9e-04 | norm 1.0180 | dt 0.026 | |
| type train | step 16120 | loss 0.0286 0.2618 2.4127 10.7660 | lr 1.9e-04 | norm 1.0735 | dt 0.026 | |
| type train | step 16130 | loss 0.0293 0.2690 2.3929 10.5556 | lr 1.9e-04 | norm 0.9660 | dt 0.026 | |
| type train | step 16140 | loss 0.0293 0.2711 2.5090 10.9067 | lr 1.9e-04 | norm 1.2951 | dt 0.026 | |
| type train | step 16150 | loss 0.0281 0.2655 2.3975 10.5851 | lr 1.9e-04 | norm 1.2063 | dt 0.026 | |
| type train | step 16160 | loss 0.0288 0.2763 2.4653 10.7310 | lr 1.9e-04 | norm 1.2367 | dt 0.026 | |
| type train | step 16170 | loss 0.0289 0.2686 2.4363 10.6523 | lr 1.9e-04 | norm 1.2431 | dt 0.026 | |
| type train | step 16180 | loss 0.0287 0.2638 2.4319 10.7777 | lr 1.8e-04 | norm 1.2795 | dt 0.026 | |
| type train | step 16190 | loss 0.0291 0.2699 2.4988 11.0309 | lr 1.8e-04 | norm 1.0204 | dt 0.026 | |
| type train | step 16200 | loss 0.0282 0.2636 2.4829 10.9134 | lr 1.8e-04 | norm 1.0587 | dt 0.026 | |
| type train | step 16210 | loss 0.0283 0.2688 2.4526 10.8122 | lr 1.8e-04 | norm 1.2493 | dt 0.026 | |
| type train | step 16220 | loss 0.0283 0.2613 2.3586 10.4782 | lr 1.8e-04 | norm 1.1960 | dt 0.026 | |
| type train | step 16230 | loss 0.0284 0.2668 2.3832 10.6279 | lr 1.8e-04 | norm 1.2154 | dt 0.027 | |
| type train | step 16240 | loss 0.0298 0.2701 2.4500 10.9027 | lr 1.8e-04 | norm 1.1909 | dt 0.026 | |
| type train | step 16250 | loss 0.0284 0.2636 2.4797 10.9978 | lr 1.8e-04 | norm 1.0614 | dt 0.026 | |
| type train | step 16260 | loss 0.0289 0.2696 2.4355 10.5721 | lr 1.8e-04 | norm 1.1313 | dt 0.026 | |
| type train | step 16270 | loss 0.0285 0.2690 2.4472 10.7659 | lr 1.8e-04 | norm 1.0149 | dt 0.026 | |
| type train | step 16280 | loss 0.0284 0.2731 2.4501 10.6681 | lr 1.8e-04 | norm 1.3415 | dt 0.026 | |
| type train | step 16290 | loss 0.0282 0.2741 2.4861 10.8627 | lr 1.8e-04 | norm 1.0496 | dt 0.026 | |
| type train | step 16300 | loss 0.0286 0.2655 2.3891 10.5693 | lr 1.8e-04 | norm 1.2303 | dt 0.026 | |
| type train | step 16310 | loss 0.0292 0.2710 2.4627 10.8993 | lr 1.8e-04 | norm 0.9790 | dt 0.026 | |
| type train | step 16320 | loss 0.0283 0.2622 2.3529 10.2741 | lr 1.8e-04 | norm 1.1378 | dt 0.026 | |
| type train | step 16330 | loss 0.0295 0.2690 2.4544 10.7816 | lr 1.8e-04 | norm 1.0802 | dt 0.026 | |
| type train | step 16340 | loss 0.0286 0.2703 2.4794 10.8874 | lr 1.8e-04 | norm 1.0879 | dt 0.026 | |
| type train | step 16350 | loss 0.0279 0.2743 2.5184 10.9618 | lr 1.8e-04 | norm 1.1529 | dt 0.026 | |
| type train | step 16360 | loss 0.0292 0.2698 2.4418 10.8666 | lr 1.8e-04 | norm 1.2695 | dt 0.026 | |
| type train | step 16370 | loss 0.0287 0.2701 2.5244 11.1949 | lr 1.8e-04 | norm 1.0735 | dt 0.026 | |
| type train | step 16380 | loss 0.0284 0.2677 2.4995 10.9819 | lr 1.8e-04 | norm 1.2762 | dt 0.026 | |
| type train | step 16390 | loss 0.0295 0.2718 2.4963 11.1142 | lr 1.8e-04 | norm 1.1353 | dt 0.026 | |
| type train | step 16400 | loss 0.0272 0.2646 2.4175 10.5235 | lr 1.8e-04 | norm 1.0714 | dt 0.026 | |
| type train | step 16410 | loss 0.0278 0.2685 2.4421 10.7723 | lr 1.8e-04 | norm 1.1983 | dt 0.026 | |
| type train | step 16420 | loss 0.0296 0.2689 2.4205 10.7741 | lr 1.7e-04 | norm 1.4322 | dt 0.026 | |
| type train | step 16430 | loss 0.0284 0.2681 2.5429 11.3598 | lr 1.7e-04 | norm 1.1733 | dt 0.026 | |
| type train | step 16440 | loss 0.0281 0.2695 2.4300 10.5020 | lr 1.7e-04 | norm 1.3848 | dt 0.026 | |
| type train | step 16450 | loss 0.0292 0.2727 2.4989 11.0323 | lr 1.7e-04 | norm 1.2983 | dt 0.026 | |
| type train | step 16460 | loss 0.0274 0.2649 2.4308 10.5723 | lr 1.7e-04 | norm 1.0450 | dt 0.026 | |
| type train | step 16470 | loss 0.0280 0.2689 2.5509 11.1436 | lr 1.7e-04 | norm 1.2902 | dt 0.029 | |
| type train | step 16480 | loss 0.0280 0.2682 2.4488 10.8783 | lr 1.7e-04 | norm 1.0277 | dt 0.026 | |
| type train | step 16490 | loss 0.0284 0.2677 2.5051 11.1545 | lr 1.7e-04 | norm 1.1484 | dt 0.026 | |
| type train | step 16500 | loss 0.0282 0.2621 2.4016 10.5365 | lr 1.7e-04 | norm 1.1832 | dt 0.027 | |
| type train | step 16510 | loss 0.0299 0.2777 2.5200 11.0826 | lr 1.7e-04 | norm 1.3814 | dt 0.026 | |
| type train | step 16520 | loss 0.0274 0.2681 2.4206 10.4770 | lr 1.7e-04 | norm 1.1163 | dt 0.026 | |
| type train | step 16530 | loss 0.0275 0.2638 2.4494 10.8824 | lr 1.7e-04 | norm 1.0909 | dt 0.026 | |
| type train | step 16540 | loss 0.0277 0.2691 2.4239 10.5541 | lr 1.7e-04 | norm 0.8944 | dt 0.026 | |
| type train | step 16550 | loss 0.0293 0.2738 2.5931 11.4433 | lr 1.7e-04 | norm 1.1328 | dt 0.026 | |
| type train | step 16560 | loss 0.0280 0.2642 2.4226 10.6954 | lr 1.7e-04 | norm 1.2191 | dt 0.026 | |
| type train | step 16570 | loss 0.0278 0.2599 2.3601 10.5526 | lr 1.7e-04 | norm 1.0299 | dt 0.026 | |
| type train | step 16580 | loss 0.0284 0.2708 2.4705 10.6917 | lr 1.7e-04 | norm 1.1151 | dt 0.026 | |
| type train | step 16590 | loss 0.0292 0.2725 2.4213 10.5361 | lr 1.7e-04 | norm 1.0994 | dt 0.026 | |
| type train | step 16600 | loss 0.0279 0.2675 2.4045 10.5935 | lr 1.7e-04 | norm 0.9728 | dt 0.026 | |
| type train | step 16610 | loss 0.0306 0.2726 2.4347 10.8167 | lr 1.7e-04 | norm 1.6700 | dt 0.026 | |
| type train | step 16620 | loss 0.0285 0.2699 2.4962 10.8501 | lr 1.7e-04 | norm 1.2968 | dt 0.026 | |
| type train | step 16630 | loss 0.0283 0.2686 2.4479 10.8793 | lr 1.7e-04 | norm 1.1479 | dt 0.026 | |
| type train | step 16640 | loss 0.0273 0.2702 2.4468 10.5757 | lr 1.7e-04 | norm 1.1633 | dt 0.026 | |
| type train | step 16650 | loss 0.0283 0.2691 2.4359 10.7471 | lr 1.7e-04 | norm 1.1675 | dt 0.026 | |
| type train | step 16660 | loss 0.0280 0.2695 2.4598 10.5852 | lr 1.7e-04 | norm 1.3131 | dt 0.026 | |
| type train | step 16670 | loss 0.0301 0.2763 2.5106 11.1350 | lr 1.6e-04 | norm 1.1710 | dt 0.026 | |
| type train | step 16680 | loss 0.0294 0.2721 2.4102 10.7090 | lr 1.6e-04 | norm 1.1134 | dt 0.026 | |
| type train | step 16690 | loss 0.0279 0.2654 2.4563 10.7805 | lr 1.6e-04 | norm 1.0877 | dt 0.026 | |
| type train | step 16700 | loss 0.0272 0.2659 2.4393 10.7373 | lr 1.6e-04 | norm 1.0535 | dt 0.026 | |
| type train | step 16710 | loss 0.0288 0.2776 2.5159 10.9537 | lr 1.6e-04 | norm 1.2216 | dt 0.026 | |
| type train | step 16720 | loss 0.0280 0.2652 2.3807 10.4611 | lr 1.6e-04 | norm 1.0002 | dt 0.026 | |
| type train | step 16730 | loss 0.0283 0.2615 2.4098 10.7577 | lr 1.6e-04 | norm 1.0663 | dt 0.026 | |
| type train | step 16740 | loss 0.0291 0.2686 2.3926 10.5493 | lr 1.6e-04 | norm 0.9590 | dt 0.026 | |
| type train | step 16750 | loss 0.0293 0.2699 2.5052 10.8951 | lr 1.6e-04 | norm 1.2534 | dt 0.026 | |
| type train | step 16760 | loss 0.0279 0.2649 2.3933 10.5752 | lr 1.6e-04 | norm 1.2079 | dt 0.026 | |
| type train | step 16770 | loss 0.0287 0.2757 2.4624 10.7186 | lr 1.6e-04 | norm 1.2409 | dt 0.026 | |
| type train | step 16780 | loss 0.0287 0.2681 2.4321 10.6413 | lr 1.6e-04 | norm 1.2419 | dt 0.026 | |
| type train | step 16790 | loss 0.0286 0.2635 2.4287 10.7713 | lr 1.6e-04 | norm 1.2842 | dt 0.026 | |
| type train | step 16800 | loss 0.0289 0.2688 2.4955 11.0151 | lr 1.6e-04 | norm 0.9914 | dt 0.026 | |
| type train | step 16810 | loss 0.0278 0.2631 2.4797 10.9070 | lr 1.6e-04 | norm 1.0432 | dt 0.026 | |
| type train | step 16820 | loss 0.0280 0.2683 2.4504 10.8005 | lr 1.6e-04 | norm 1.2456 | dt 0.026 | |
| type train | step 16830 | loss 0.0282 0.2606 2.3554 10.4711 | lr 1.6e-04 | norm 1.1893 | dt 0.026 | |
| type train | step 16840 | loss 0.0281 0.2664 2.3815 10.6174 | lr 1.6e-04 | norm 1.2121 | dt 0.026 | |
| type train | step 16850 | loss 0.0297 0.2698 2.4468 10.8951 | lr 1.6e-04 | norm 1.1913 | dt 0.026 | |
| type train | step 16860 | loss 0.0281 0.2630 2.4778 10.9904 | lr 1.6e-04 | norm 1.0339 | dt 0.026 | |
| type train | step 16870 | loss 0.0288 0.2688 2.4341 10.5630 | lr 1.6e-04 | norm 1.1491 | dt 0.026 | |
| type train | step 16880 | loss 0.0284 0.2685 2.4450 10.7567 | lr 1.6e-04 | norm 1.0153 | dt 0.026 | |
| type train | step 16890 | loss 0.0282 0.2725 2.4473 10.6517 | lr 1.6e-04 | norm 1.3307 | dt 0.026 | |
| type train | step 16900 | loss 0.0280 0.2735 2.4833 10.8620 | lr 1.6e-04 | norm 1.0487 | dt 0.026 | |
| type train | step 16910 | loss 0.0285 0.2649 2.3866 10.5601 | lr 1.6e-04 | norm 1.2352 | dt 0.026 | |
| type train | step 16920 | loss 0.0290 0.2704 2.4615 10.8918 | lr 1.6e-04 | norm 0.9668 | dt 0.026 | |
| type train | step 16930 | loss 0.0281 0.2615 2.3499 10.2667 | lr 1.6e-04 | norm 1.1304 | dt 0.026 | |
| type train | step 16940 | loss 0.0293 0.2683 2.4538 10.7700 | lr 1.5e-04 | norm 1.0682 | dt 0.026 | |
| type train | step 16950 | loss 0.0284 0.2697 2.4754 10.8788 | lr 1.5e-04 | norm 1.0693 | dt 0.026 | |
| type train | step 16960 | loss 0.0278 0.2736 2.5162 10.9565 | lr 1.5e-04 | norm 1.1411 | dt 0.026 | |
| type train | step 16970 | loss 0.0291 0.2691 2.4389 10.8563 | lr 1.5e-04 | norm 1.2613 | dt 0.026 | |
| type train | step 16980 | loss 0.0285 0.2700 2.5225 11.1896 | lr 1.5e-04 | norm 1.0729 | dt 0.026 | |
| type train | step 16990 | loss 0.0282 0.2671 2.4970 10.9751 | lr 1.5e-04 | norm 1.2709 | dt 0.026 | |
| type train | step 17000 | loss 0.0293 0.2708 2.4932 11.1012 | lr 1.5e-04 | norm 1.1226 | dt 0.026 | |
| type train | step 17010 | loss 0.0271 0.2640 2.4144 10.5164 | lr 1.5e-04 | norm 1.0698 | dt 0.026 | |
| type train | step 17020 | loss 0.0276 0.2677 2.4415 10.7706 | lr 1.5e-04 | norm 1.1840 | dt 0.026 | |
| type train | step 17030 | loss 0.0297 0.2680 2.4168 10.7696 | lr 1.5e-04 | norm 1.4224 | dt 0.026 | |
| type train | step 17040 | loss 0.0282 0.2674 2.5419 11.3519 | lr 1.5e-04 | norm 1.1733 | dt 0.026 | |
| type train | step 17050 | loss 0.0280 0.2692 2.4272 10.4916 | lr 1.5e-04 | norm 1.4104 | dt 0.026 | |
| type train | step 17060 | loss 0.0291 0.2719 2.4955 11.0264 | lr 1.5e-04 | norm 1.2955 | dt 0.026 | |
| type train | step 17070 | loss 0.0273 0.2643 2.4292 10.5640 | lr 1.5e-04 | norm 1.0540 | dt 0.026 | |
| type train | step 17080 | loss 0.0278 0.2683 2.5495 11.1418 | lr 1.5e-04 | norm 1.2881 | dt 0.028 | |
| type train | step 17090 | loss 0.0277 0.2674 2.4475 10.8708 | lr 1.5e-04 | norm 1.0315 | dt 0.026 | |
| type train | step 17100 | loss 0.0282 0.2672 2.5028 11.1425 | lr 1.5e-04 | norm 1.1241 | dt 0.026 | |
| type train | step 17110 | loss 0.0280 0.2618 2.4013 10.5311 | lr 1.5e-04 | norm 1.1660 | dt 0.026 | |
| type train | step 17120 | loss 0.0298 0.2769 2.5163 11.0723 | lr 1.5e-04 | norm 1.3701 | dt 0.026 | |
| type train | step 17130 | loss 0.0272 0.2673 2.4176 10.4706 | lr 1.5e-04 | norm 1.1123 | dt 0.026 | |
| type train | step 17140 | loss 0.0273 0.2629 2.4477 10.8789 | lr 1.5e-04 | norm 1.0883 | dt 0.026 | |
| type train | step 17150 | loss 0.0276 0.2686 2.4226 10.5495 | lr 1.5e-04 | norm 0.8856 | dt 0.026 | |
| type train | step 17160 | loss 0.0292 0.2731 2.5902 11.4296 | lr 1.5e-04 | norm 1.1136 | dt 0.026 | |
| type train | step 17170 | loss 0.0277 0.2635 2.4207 10.6878 | lr 1.5e-04 | norm 1.2063 | dt 0.026 | |
| type train | step 17180 | loss 0.0277 0.2595 2.3577 10.5426 | lr 1.5e-04 | norm 1.0319 | dt 0.026 | |
| type train | step 17190 | loss 0.0283 0.2701 2.4688 10.6805 | lr 1.5e-04 | norm 1.1110 | dt 0.027 | |
| type train | step 17200 | loss 0.0291 0.2720 2.4195 10.5314 | lr 1.5e-04 | norm 1.0983 | dt 0.026 | |
| type train | step 17210 | loss 0.0279 0.2670 2.4026 10.5917 | lr 1.5e-04 | norm 0.9758 | dt 0.026 | |
| type train | step 17220 | loss 0.0304 0.2723 2.4331 10.8120 | lr 1.5e-04 | norm 1.6693 | dt 0.026 | |
| type train | step 17230 | loss 0.0285 0.2694 2.4937 10.8451 | lr 1.5e-04 | norm 1.2932 | dt 0.026 | |
| type train | step 17240 | loss 0.0283 0.2679 2.4442 10.8664 | lr 1.4e-04 | norm 1.1343 | dt 0.027 | |
| type train | step 17250 | loss 0.0272 0.2694 2.4448 10.5655 | lr 1.4e-04 | norm 1.1486 | dt 0.026 | |
| type train | step 17260 | loss 0.0281 0.2685 2.4344 10.7401 | lr 1.4e-04 | norm 1.1584 | dt 0.026 | |
| type train | step 17270 | loss 0.0278 0.2686 2.4586 10.5802 | lr 1.4e-04 | norm 1.2938 | dt 0.026 | |
| type train | step 17280 | loss 0.0300 0.2757 2.5089 11.1276 | lr 1.4e-04 | norm 1.1728 | dt 0.026 | |
| type train | step 17290 | loss 0.0293 0.2714 2.4079 10.7020 | lr 1.4e-04 | norm 1.1061 | dt 0.026 | |
| type train | step 17300 | loss 0.0278 0.2647 2.4542 10.7733 | lr 1.4e-04 | norm 1.0632 | dt 0.026 | |
| type train | step 17310 | loss 0.0271 0.2648 2.4370 10.7313 | lr 1.4e-04 | norm 1.0287 | dt 0.026 | |
| type train | step 17320 | loss 0.0286 0.2767 2.5129 10.9441 | lr 1.4e-04 | norm 1.2090 | dt 0.026 | |
| type train | step 17330 | loss 0.0277 0.2646 2.3775 10.4637 | lr 1.4e-04 | norm 0.9905 | dt 0.026 | |
| type train | step 17340 | loss 0.0282 0.2609 2.4087 10.7524 | lr 1.4e-04 | norm 1.0626 | dt 0.026 | |
| type train | step 17350 | loss 0.0290 0.2683 2.3906 10.5456 | lr 1.4e-04 | norm 0.9610 | dt 0.026 | |
| type train | step 17360 | loss 0.0291 0.2686 2.5011 10.8882 | lr 1.4e-04 | norm 1.2127 | dt 0.026 | |
| type train | step 17370 | loss 0.0278 0.2642 2.3906 10.5731 | lr 1.4e-04 | norm 1.1856 | dt 0.026 | |
| type train | step 17380 | loss 0.0286 0.2751 2.4600 10.7147 | lr 1.4e-04 | norm 1.2361 | dt 0.026 | |
| type train | step 17390 | loss 0.0286 0.2676 2.4292 10.6324 | lr 1.4e-04 | norm 1.2336 | dt 0.026 | |
| type train | step 17400 | loss 0.0285 0.2628 2.4257 10.7733 | lr 1.4e-04 | norm 1.2825 | dt 0.026 | |
| type train | step 17410 | loss 0.0288 0.2684 2.4937 11.0108 | lr 1.4e-04 | norm 0.9814 | dt 0.026 | |
| type train | step 17420 | loss 0.0278 0.2626 2.4775 10.8982 | lr 1.4e-04 | norm 1.0327 | dt 0.026 | |
| type train | step 17430 | loss 0.0279 0.2673 2.4476 10.7919 | lr 1.4e-04 | norm 1.2035 | dt 0.026 | |
| type train | step 17440 | loss 0.0280 0.2602 2.3528 10.4618 | lr 1.4e-04 | norm 1.1743 | dt 0.026 | |
| type train | step 17450 | loss 0.0279 0.2657 2.3783 10.6148 | lr 1.4e-04 | norm 1.1975 | dt 0.026 | |
| type train | step 17460 | loss 0.0297 0.2692 2.4469 10.8899 | lr 1.4e-04 | norm 1.1924 | dt 0.026 | |
| type train | step 17470 | loss 0.0281 0.2624 2.4764 10.9813 | lr 1.4e-04 | norm 1.0288 | dt 0.026 | |
| type train | step 17480 | loss 0.0287 0.2685 2.4330 10.5563 | lr 1.4e-04 | norm 1.1584 | dt 0.026 | |
| type train | step 17490 | loss 0.0282 0.2674 2.4412 10.7497 | lr 1.4e-04 | norm 0.9866 | dt 0.026 | |
| type train | step 17500 | loss 0.0281 0.2720 2.4457 10.6418 | lr 1.4e-04 | norm 1.3102 | dt 0.026 | |
| type train | step 17510 | loss 0.0279 0.2729 2.4807 10.8588 | lr 1.4e-04 | norm 1.0313 | dt 0.026 | |
| type train | step 17520 | loss 0.0282 0.2645 2.3842 10.5540 | lr 1.4e-04 | norm 1.2241 | dt 0.026 | |
| type train | step 17530 | loss 0.0289 0.2701 2.4599 10.8846 | lr 1.4e-04 | norm 0.9594 | dt 0.026 | |
| type train | step 17540 | loss 0.0279 0.2611 2.3484 10.2588 | lr 1.4e-04 | norm 1.1380 | dt 0.026 | |
| type train | step 17550 | loss 0.0291 0.2676 2.4511 10.7611 | lr 1.4e-04 | norm 1.0496 | dt 0.026 | |
| type train | step 17560 | loss 0.0283 0.2690 2.4735 10.8735 | lr 1.4e-04 | norm 1.0632 | dt 0.026 | |
| type train | step 17570 | loss 0.0276 0.2731 2.5139 10.9517 | lr 1.3e-04 | norm 1.1319 | dt 0.026 | |
| type train | step 17580 | loss 0.0288 0.2688 2.4382 10.8520 | lr 1.3e-04 | norm 1.2424 | dt 0.026 | |
| type train | step 17590 | loss 0.0283 0.2690 2.5200 11.1839 | lr 1.3e-04 | norm 1.0606 | dt 0.026 | |
| type train | step 17600 | loss 0.0280 0.2666 2.4955 10.9665 | lr 1.3e-04 | norm 1.2713 | dt 0.026 | |
| type train | step 17610 | loss 0.0292 0.2704 2.4915 11.0970 | lr 1.3e-04 | norm 1.1063 | dt 0.026 | |
| type train | step 17620 | loss 0.0269 0.2635 2.4131 10.5076 | lr 1.3e-04 | norm 1.0440 | dt 0.026 | |
| type train | step 17630 | loss 0.0275 0.2672 2.4385 10.7648 | lr 1.3e-04 | norm 1.1808 | dt 0.026 | |
| type train | step 17640 | loss 0.0293 0.2676 2.4133 10.7619 | lr 1.3e-04 | norm 1.3987 | dt 0.026 | |
| type train | step 17650 | loss 0.0281 0.2665 2.5414 11.3444 | lr 1.3e-04 | norm 1.1495 | dt 0.026 | |
| type train | step 17660 | loss 0.0280 0.2686 2.4259 10.4872 | lr 1.3e-04 | norm 1.4041 | dt 0.026 | |
| type train | step 17670 | loss 0.0290 0.2714 2.4922 11.0179 | lr 1.3e-04 | norm 1.2716 | dt 0.026 | |
| type train | step 17680 | loss 0.0271 0.2637 2.4282 10.5552 | lr 1.3e-04 | norm 1.0311 | dt 0.026 | |
| type train | step 17690 | loss 0.0277 0.2678 2.5470 11.1360 | lr 1.3e-04 | norm 1.2645 | dt 0.028 | |
| type train | step 17700 | loss 0.0276 0.2671 2.4451 10.8639 | lr 1.3e-04 | norm 1.0195 | dt 0.026 | |
| type train | step 17710 | loss 0.0280 0.2665 2.4997 11.1383 | lr 1.3e-04 | norm 1.1038 | dt 0.026 | |
| type train | step 17720 | loss 0.0279 0.2613 2.3993 10.5249 | lr 1.3e-04 | norm 1.1509 | dt 0.026 | |
| type train | step 17730 | loss 0.0297 0.2766 2.5135 11.0586 | lr 1.3e-04 | norm 1.3456 | dt 0.026 | |
| type train | step 17740 | loss 0.0271 0.2668 2.4150 10.4625 | lr 1.3e-04 | norm 1.0918 | dt 0.026 | |
| type train | step 17750 | loss 0.0272 0.2624 2.4444 10.8706 | lr 1.3e-04 | norm 1.0691 | dt 0.026 | |
| type train | step 17760 | loss 0.0275 0.2683 2.4206 10.5446 | lr 1.3e-04 | norm 0.8731 | dt 0.025 | |
| type train | step 17770 | loss 0.0290 0.2724 2.5873 11.4224 | lr 1.3e-04 | norm 1.1026 | dt 0.026 | |
| type train | step 17780 | loss 0.0276 0.2629 2.4185 10.6790 | lr 1.3e-04 | norm 1.1971 | dt 0.026 | |
| type train | step 17790 | loss 0.0275 0.2590 2.3539 10.5367 | lr 1.3e-04 | norm 0.9890 | dt 0.026 | |
| type train | step 17800 | loss 0.0281 0.2696 2.4670 10.6725 | lr 1.3e-04 | norm 1.0977 | dt 0.026 | |
| type train | step 17810 | loss 0.0289 0.2713 2.4181 10.5270 | lr 1.3e-04 | norm 1.0859 | dt 0.026 | |
| type train | step 17820 | loss 0.0277 0.2667 2.4016 10.5909 | lr 1.3e-04 | norm 0.9613 | dt 0.026 | |
| type train | step 17830 | loss 0.0303 0.2716 2.4294 10.8072 | lr 1.3e-04 | norm 1.6670 | dt 0.026 | |
| type train | step 17840 | loss 0.0283 0.2689 2.4929 10.8370 | lr 1.3e-04 | norm 1.2799 | dt 0.026 | |
| type train | step 17850 | loss 0.0281 0.2675 2.4428 10.8575 | lr 1.3e-04 | norm 1.1157 | dt 0.026 | |
| type train | step 17860 | loss 0.0270 0.2692 2.4431 10.5585 | lr 1.3e-04 | norm 1.1526 | dt 0.026 | |
| type train | step 17870 | loss 0.0280 0.2682 2.4317 10.7332 | lr 1.3e-04 | norm 1.1666 | dt 0.026 | |
| type train | step 17880 | loss 0.0276 0.2683 2.4578 10.5787 | lr 1.3e-04 | norm 1.2958 | dt 0.026 | |
| type train | step 17890 | loss 0.0299 0.2754 2.5067 11.1193 | lr 1.3e-04 | norm 1.1621 | dt 0.026 | |
| type train | step 17900 | loss 0.0290 0.2710 2.4063 10.6958 | lr 1.3e-04 | norm 1.1048 | dt 0.026 | |
| type train | step 17910 | loss 0.0276 0.2642 2.4513 10.7635 | lr 1.3e-04 | norm 1.0388 | dt 0.026 | |
| type train | step 17920 | loss 0.0269 0.2644 2.4336 10.7249 | lr 1.3e-04 | norm 1.0254 | dt 0.026 | |
| type train | step 17930 | loss 0.0285 0.2764 2.5112 10.9360 | lr 1.3e-04 | norm 1.2007 | dt 0.026 | |
| type train | step 17940 | loss 0.0276 0.2644 2.3740 10.4585 | lr 1.3e-04 | norm 0.9852 | dt 0.026 | |
| type train | step 17950 | loss 0.0281 0.2606 2.4069 10.7441 | lr 1.2e-04 | norm 1.0682 | dt 0.026 | |
| type train | step 17960 | loss 0.0288 0.2678 2.3889 10.5400 | lr 1.2e-04 | norm 0.9440 | dt 0.027 | |
| type train | step 17970 | loss 0.0290 0.2686 2.4972 10.8779 | lr 1.2e-04 | norm 1.1951 | dt 0.026 | |
| type train | step 17980 | loss 0.0276 0.2639 2.3872 10.5674 | lr 1.2e-04 | norm 1.1725 | dt 0.026 | |
| type train | step 17990 | loss 0.0284 0.2748 2.4579 10.7080 | lr 1.2e-04 | norm 1.2326 | dt 0.026 | |
| type train | step 18000 | loss 0.0284 0.2672 2.4277 10.6282 | lr 1.2e-04 | norm 1.2325 | dt 0.026 | |
| type train | step 18010 | loss 0.0283 0.2627 2.4227 10.7687 | lr 1.2e-04 | norm 1.2791 | dt 0.026 | |
| type train | step 18020 | loss 0.0286 0.2679 2.4924 11.0016 | lr 1.2e-04 | norm 0.9714 | dt 0.026 | |
| type train | step 18030 | loss 0.0276 0.2623 2.4756 10.8897 | lr 1.2e-04 | norm 1.0196 | dt 0.026 | |
| type train | step 18040 | loss 0.0277 0.2670 2.4448 10.7827 | lr 1.2e-04 | norm 1.1954 | dt 0.026 | |
| type train | step 18050 | loss 0.0278 0.2598 2.3506 10.4561 | lr 1.2e-04 | norm 1.1702 | dt 0.026 | |
| type train | step 18060 | loss 0.0279 0.2654 2.3774 10.6067 | lr 1.2e-04 | norm 1.1958 | dt 0.027 | |
| type train | step 18070 | loss 0.0294 0.2688 2.4446 10.8834 | lr 1.2e-04 | norm 1.1850 | dt 0.026 | |
| type train | step 18080 | loss 0.0279 0.2619 2.4740 10.9768 | lr 1.2e-04 | norm 1.0039 | dt 0.026 | |
| type train | step 18090 | loss 0.0285 0.2681 2.4315 10.5541 | lr 1.2e-04 | norm 1.1648 | dt 0.026 | |
| type train | step 18100 | loss 0.0281 0.2670 2.4390 10.7436 | lr 1.2e-04 | norm 0.9720 | dt 0.026 | |
| type train | step 18110 | loss 0.0280 0.2711 2.4438 10.6355 | lr 1.2e-04 | norm 1.2957 | dt 0.026 | |
| type train | step 18120 | loss 0.0278 0.2722 2.4791 10.8585 | lr 1.2e-04 | norm 1.0191 | dt 0.026 | |
| type train | step 18130 | loss 0.0282 0.2639 2.3814 10.5470 | lr 1.2e-04 | norm 1.2225 | dt 0.026 | |
| type train | step 18140 | loss 0.0287 0.2698 2.4580 10.8747 | lr 1.2e-04 | norm 0.9498 | dt 0.026 | |
| type train | step 18150 | loss 0.0278 0.2609 2.3480 10.2594 | lr 1.2e-04 | norm 1.1463 | dt 0.026 | |
| type train | step 18160 | loss 0.0290 0.2674 2.4486 10.7551 | lr 1.2e-04 | norm 1.0392 | dt 0.027 | |
| type train | step 18170 | loss 0.0283 0.2684 2.4699 10.8643 | lr 1.2e-04 | norm 1.0415 | dt 0.026 | |
| type train | step 18180 | loss 0.0275 0.2732 2.5124 10.9488 | lr 1.2e-04 | norm 1.1247 | dt 0.026 | |
| type train | step 18190 | loss 0.0287 0.2685 2.4354 10.8508 | lr 1.2e-04 | norm 1.2366 | dt 0.026 | |
| type train | step 18200 | loss 0.0282 0.2687 2.5182 11.1773 | lr 1.2e-04 | norm 1.0399 | dt 0.026 | |
| type train | step 18210 | loss 0.0279 0.2662 2.4940 10.9624 | lr 1.2e-04 | norm 1.2685 | dt 0.026 | |
| type train | step 18220 | loss 0.0290 0.2703 2.4889 11.0943 | lr 1.2e-04 | norm 1.0890 | dt 0.026 | |
| type train | step 18230 | loss 0.0268 0.2634 2.4111 10.5006 | lr 1.2e-04 | norm 1.0529 | dt 0.026 | |
| type train | step 18240 | loss 0.0274 0.2668 2.4366 10.7602 | lr 1.2e-04 | norm 1.1565 | dt 0.027 | |
| type train | step 18250 | loss 0.0292 0.2673 2.4115 10.7561 | lr 1.2e-04 | norm 1.3896 | dt 0.027 | |
| type train | step 18260 | loss 0.0280 0.2665 2.5387 11.3395 | lr 1.2e-04 | norm 1.1460 | dt 0.026 | |
| type train | step 18270 | loss 0.0279 0.2685 2.4248 10.4817 | lr 1.2e-04 | norm 1.4074 | dt 0.026 | |
| type train | step 18280 | loss 0.0288 0.2713 2.4902 11.0120 | lr 1.2e-04 | norm 1.2559 | dt 0.026 | |
| type train | step 18290 | loss 0.0271 0.2636 2.4274 10.5538 | lr 1.2e-04 | norm 1.0574 | dt 0.026 | |
| type train | step 18300 | loss 0.0276 0.2678 2.5442 11.1327 | lr 1.2e-04 | norm 1.2682 | dt 0.028 | |
| type train | step 18310 | loss 0.0275 0.2666 2.4427 10.8569 | lr 1.2e-04 | norm 1.0055 | dt 0.027 | |
| type train | step 18320 | loss 0.0279 0.2662 2.4981 11.1317 | lr 1.2e-04 | norm 1.1130 | dt 0.026 | |
| type train | step 18330 | loss 0.0278 0.2612 2.3983 10.5157 | lr 1.2e-04 | norm 1.1613 | dt 0.026 | |
| type train | step 18340 | loss 0.0294 0.2765 2.5092 11.0506 | lr 1.2e-04 | norm 1.3206 | dt 0.026 | |
| type train | step 18350 | loss 0.0271 0.2668 2.4135 10.4608 | lr 1.2e-04 | norm 1.1103 | dt 0.026 | |
| type train | step 18360 | loss 0.0272 0.2622 2.4426 10.8683 | lr 1.2e-04 | norm 1.0637 | dt 0.026 | |
| type train | step 18370 | loss 0.0273 0.2681 2.4185 10.5419 | lr 1.2e-04 | norm 0.8790 | dt 0.026 | |
| type train | step 18380 | loss 0.0289 0.2723 2.5847 11.4178 | lr 1.2e-04 | norm 1.0992 | dt 0.026 | |
| type train | step 18390 | loss 0.0275 0.2626 2.4170 10.6749 | lr 1.2e-04 | norm 1.1907 | dt 0.026 | |
| type train | step 18400 | loss 0.0274 0.2589 2.3518 10.5290 | lr 1.2e-04 | norm 0.9791 | dt 0.026 | |
| type train | step 18410 | loss 0.0279 0.2699 2.4648 10.6656 | lr 1.2e-04 | norm 1.0956 | dt 0.026 | |
| type train | step 18420 | loss 0.0288 0.2714 2.4172 10.5175 | lr 1.1e-04 | norm 1.0886 | dt 0.026 | |
| type train | step 18430 | loss 0.0275 0.2664 2.4005 10.5891 | lr 1.1e-04 | norm 0.9744 | dt 0.026 | |
| type train | step 18440 | loss 0.0302 0.2717 2.4270 10.8046 | lr 1.1e-04 | norm 1.6667 | dt 0.026 | |
| type train | step 18450 | loss 0.0283 0.2689 2.4906 10.8379 | lr 1.1e-04 | norm 1.2849 | dt 0.026 | |
| type train | step 18460 | loss 0.0280 0.2673 2.4395 10.8470 | lr 1.1e-04 | norm 1.0974 | dt 0.026 | |
| type train | step 18470 | loss 0.0269 0.2690 2.4403 10.5600 | lr 1.1e-04 | norm 1.1438 | dt 0.026 | |
| type train | step 18480 | loss 0.0280 0.2679 2.4304 10.7275 | lr 1.1e-04 | norm 1.1681 | dt 0.026 | |
| type train | step 18490 | loss 0.0275 0.2680 2.4558 10.5721 | lr 1.1e-04 | norm 1.2947 | dt 0.026 | |
| type train | step 18500 | loss 0.0298 0.2752 2.5036 11.1166 | lr 1.1e-04 | norm 1.1512 | dt 0.026 | |
| type train | step 18510 | loss 0.0290 0.2709 2.4050 10.6948 | lr 1.1e-04 | norm 1.1003 | dt 0.026 | |
| type train | step 18520 | loss 0.0275 0.2642 2.4492 10.7569 | lr 1.1e-04 | norm 1.0419 | dt 0.026 | |
| type train | step 18530 | loss 0.0268 0.2642 2.4311 10.7141 | lr 1.1e-04 | norm 1.0189 | dt 0.026 | |
| type train | step 18540 | loss 0.0285 0.2762 2.5091 10.9310 | lr 1.1e-04 | norm 1.2010 | dt 0.026 | |
| type train | step 18550 | loss 0.0276 0.2641 2.3707 10.4568 | lr 1.1e-04 | norm 0.9817 | dt 0.026 | |
| type train | step 18560 | loss 0.0281 0.2604 2.4055 10.7375 | lr 1.1e-04 | norm 1.0693 | dt 0.026 | |
| type train | step 18570 | loss 0.0287 0.2674 2.3879 10.5356 | lr 1.1e-04 | norm 0.9378 | dt 0.026 | |
| type train | step 18580 | loss 0.0288 0.2682 2.4952 10.8703 | lr 1.1e-04 | norm 1.1755 | dt 0.026 | |
| type train | step 18590 | loss 0.0276 0.2639 2.3841 10.5630 | lr 1.1e-04 | norm 1.1578 | dt 0.027 | |
| type train | step 18600 | loss 0.0283 0.2744 2.4558 10.7063 | lr 1.1e-04 | norm 1.2248 | dt 0.026 | |
| type train | step 18610 | loss 0.0284 0.2670 2.4257 10.6204 | lr 1.1e-04 | norm 1.2222 | dt 0.026 | |
| type train | step 18620 | loss 0.0282 0.2624 2.4201 10.7650 | lr 1.1e-04 | norm 1.2790 | dt 0.026 | |
| type train | step 18630 | loss 0.0284 0.2677 2.4909 10.9938 | lr 1.1e-04 | norm 0.9668 | dt 0.026 | |
| type train | step 18640 | loss 0.0275 0.2621 2.4734 10.8859 | lr 1.1e-04 | norm 1.0298 | dt 0.026 | |
| type train | step 18650 | loss 0.0277 0.2669 2.4440 10.7804 | lr 1.1e-04 | norm 1.1874 | dt 0.026 | |
| type train | step 18660 | loss 0.0277 0.2594 2.3502 10.4513 | lr 1.1e-04 | norm 1.1597 | dt 0.026 | |
| type train | step 18670 | loss 0.0279 0.2650 2.3751 10.6085 | lr 1.1e-04 | norm 1.2007 | dt 0.026 | |
| type train | step 18680 | loss 0.0293 0.2685 2.4413 10.8764 | lr 1.1e-04 | norm 1.1860 | dt 0.026 | |
| type train | step 18690 | loss 0.0278 0.2619 2.4711 10.9742 | lr 1.1e-04 | norm 1.0106 | dt 0.026 | |
| type train | step 18700 | loss 0.0284 0.2678 2.4297 10.5486 | lr 1.1e-04 | norm 1.1666 | dt 0.026 | |
| type train | step 18710 | loss 0.0281 0.2668 2.4350 10.7429 | lr 1.1e-04 | norm 0.9674 | dt 0.026 | |
| type train | step 18720 | loss 0.0278 0.2713 2.4420 10.6314 | lr 1.1e-04 | norm 1.2881 | dt 0.026 | |
| type train | step 18730 | loss 0.0277 0.2720 2.4775 10.8571 | lr 1.1e-04 | norm 1.0144 | dt 0.026 | |
| type train | step 18740 | loss 0.0281 0.2637 2.3806 10.5445 | lr 1.1e-04 | norm 1.2180 | dt 0.026 | |
| type train | step 18750 | loss 0.0286 0.2696 2.4568 10.8673 | lr 1.1e-04 | norm 0.9392 | dt 0.026 | |
| type train | step 18760 | loss 0.0277 0.2607 2.3462 10.2531 | lr 1.1e-04 | norm 1.1543 | dt 0.026 | |
| type train | step 18770 | loss 0.0289 0.2671 2.4472 10.7526 | lr 1.1e-04 | norm 1.0478 | dt 0.026 | |
| type train | step 18780 | loss 0.0281 0.2684 2.4692 10.8603 | lr 1.1e-04 | norm 1.0402 | dt 0.026 | |
| type train | step 18790 | loss 0.0274 0.2725 2.5107 10.9483 | lr 1.1e-04 | norm 1.1183 | dt 0.026 | |
| type train | step 18800 | loss 0.0287 0.2682 2.4336 10.8432 | lr 1.1e-04 | norm 1.2396 | dt 0.026 | |
| type train | step 18810 | loss 0.0281 0.2686 2.5156 11.1738 | lr 1.1e-04 | norm 1.0421 | dt 0.026 | |
| type train | step 18820 | loss 0.0277 0.2662 2.4926 10.9586 | lr 1.1e-04 | norm 1.2669 | dt 0.026 | |
| type train | step 18830 | loss 0.0292 0.2702 2.4870 11.0894 | lr 1.1e-04 | norm 1.0917 | dt 0.026 | |
| type train | step 18840 | loss 0.0267 0.2629 2.4097 10.4975 | lr 1.1e-04 | norm 1.0365 | dt 0.026 | |
| type train | step 18850 | loss 0.0273 0.2667 2.4351 10.7581 | lr 1.1e-04 | norm 1.1634 | dt 0.026 | |
| type train | step 18860 | loss 0.0291 0.2670 2.4114 10.7509 | lr 1.1e-04 | norm 1.3792 | dt 0.026 | |
| type train | step 18870 | loss 0.0280 0.2662 2.5364 11.3395 | lr 1.1e-04 | norm 1.1338 | dt 0.026 | |
| type train | step 18880 | loss 0.0277 0.2682 2.4247 10.4757 | lr 1.1e-04 | norm 1.3974 | dt 0.026 | |
| type train | step 18890 | loss 0.0287 0.2708 2.4886 11.0051 | lr 1.1e-04 | norm 1.2413 | dt 0.026 | |
| type train | step 18900 | loss 0.0269 0.2631 2.4262 10.5468 | lr 1.1e-04 | norm 1.0453 | dt 0.026 | |
| type train | step 18910 | loss 0.0275 0.2675 2.5426 11.1299 | lr 1.1e-04 | norm 1.2595 | dt 0.029 | |
| type train | step 18920 | loss 0.0274 0.2664 2.4406 10.8511 | lr 1.1e-04 | norm 0.9899 | dt 0.026 | |
| type train | step 18930 | loss 0.0279 0.2660 2.4956 11.1264 | lr 1.1e-04 | norm 1.0985 | dt 0.026 | |
| type train | step 18940 | loss 0.0276 0.2607 2.3969 10.5104 | lr 1.1e-04 | norm 1.1497 | dt 0.027 | |
| type train | step 18950 | loss 0.0294 0.2763 2.5077 11.0450 | lr 1.1e-04 | norm 1.3193 | dt 0.026 | |
| type train | step 18960 | loss 0.0269 0.2664 2.4118 10.4598 | lr 1.1e-04 | norm 1.0911 | dt 0.026 | |
| type train | step 18970 | loss 0.0269 0.2616 2.4406 10.8667 | lr 1.1e-04 | norm 1.0270 | dt 0.026 | |
| type train | step 18980 | loss 0.0273 0.2676 2.4179 10.5406 | lr 1.1e-04 | norm 0.8664 | dt 0.026 | |
| type train | step 18990 | loss 0.0289 0.2721 2.5823 11.4112 | lr 1.1e-04 | norm 1.1063 | dt 0.026 | |
| type train | step 19000 | loss 0.0274 0.2620 2.4165 10.6667 | lr 1.1e-04 | norm 1.1900 | dt 0.026 | |
| type train | step 19010 | loss 0.0273 0.2588 2.3511 10.5287 | lr 1.1e-04 | norm 0.9671 | dt 0.026 | |
| type train | step 19020 | loss 0.0278 0.2693 2.4632 10.6607 | lr 1.1e-04 | norm 1.0892 | dt 0.026 | |
| type train | step 19030 | loss 0.0287 0.2710 2.4165 10.5106 | lr 1.1e-04 | norm 1.0868 | dt 0.026 | |
| type train | step 19040 | loss 0.0275 0.2660 2.3990 10.5865 | lr 1.1e-04 | norm 0.9512 | dt 0.026 | |
| type train | step 19050 | loss 0.0300 0.2713 2.4250 10.8032 | lr 1.1e-04 | norm 1.6517 | dt 0.026 | |
| type train | step 19060 | loss 0.0281 0.2685 2.4882 10.8341 | lr 1.1e-04 | norm 1.2547 | dt 0.026 | |
| type train | step 19070 | loss 0.0279 0.2671 2.4369 10.8379 | lr 1.1e-04 | norm 1.0753 | dt 0.027 | |
| type train | step 19080 | loss 0.0269 0.2687 2.4378 10.5573 | lr 1.1e-04 | norm 1.1445 | dt 0.026 | |
| type train | step 19090 | loss 0.0278 0.2675 2.4285 10.7230 | lr 1.0e-04 | norm 1.1561 | dt 0.026 | |
| type train | step 19100 | loss 0.0274 0.2677 2.4542 10.5714 | lr 1.0e-04 | norm 1.2919 | dt 0.026 | |
| type train | step 19110 | loss 0.0296 0.2748 2.5026 11.1075 | lr 1.0e-04 | norm 1.1659 | dt 0.026 | |
| type train | step 19120 | loss 0.0289 0.2708 2.4030 10.6926 | lr 1.0e-04 | norm 1.0975 | dt 0.026 | |
| type train | step 19130 | loss 0.0273 0.2637 2.4479 10.7513 | lr 1.0e-04 | norm 1.0502 | dt 0.026 | |
| type train | step 19140 | loss 0.0268 0.2640 2.4298 10.7084 | lr 1.0e-04 | norm 1.0172 | dt 0.026 | |
| type train | step 19150 | loss 0.0283 0.2758 2.5074 10.9300 | lr 1.0e-04 | norm 1.1887 | dt 0.026 | |
| type train | step 19160 | loss 0.0275 0.2637 2.3685 10.4526 | lr 1.0e-04 | norm 0.9859 | dt 0.026 | |
| type train | step 19170 | loss 0.0279 0.2600 2.4039 10.7341 | lr 1.0e-04 | norm 1.0604 | dt 0.026 | |
| type train | step 19180 | loss 0.0286 0.2673 2.3858 10.5343 | lr 1.0e-04 | norm 0.9408 | dt 0.026 | |
| type train | step 19190 | loss 0.0288 0.2677 2.4925 10.8673 | lr 1.0e-04 | norm 1.1764 | dt 0.026 | |
| type train | step 19200 | loss 0.0275 0.2633 2.3834 10.5651 | lr 1.0e-04 | norm 1.1595 | dt 0.026 | |
| type train | step 19210 | loss 0.0282 0.2741 2.4544 10.6983 | lr 1.0e-04 | norm 1.2128 | dt 0.026 | |
| type train | step 19220 | loss 0.0284 0.2667 2.4241 10.6157 | lr 1.0e-04 | norm 1.2197 | dt 0.026 | |
| type train | step 19230 | loss 0.0281 0.2621 2.4182 10.7575 | lr 1.0e-04 | norm 1.2752 | dt 0.026 | |
| type train | step 19240 | loss 0.0284 0.2674 2.4895 10.9938 | lr 1.0e-04 | norm 0.9683 | dt 0.026 | |
| type train | step 19250 | loss 0.0274 0.2616 2.4720 10.8853 | lr 1.0e-04 | norm 1.0211 | dt 0.026 | |
| type train | step 19260 | loss 0.0276 0.2662 2.4425 10.7762 | lr 1.0e-04 | norm 1.1762 | dt 0.026 | |
| type train | step 19270 | loss 0.0277 0.2589 2.3474 10.4443 | lr 1.0e-04 | norm 1.1469 | dt 0.026 | |
| type train | step 19280 | loss 0.0276 0.2648 2.3738 10.6042 | lr 1.0e-04 | norm 1.1941 | dt 0.026 | |
| type train | step 19290 | loss 0.0292 0.2682 2.4390 10.8681 | lr 1.0e-04 | norm 1.1730 | dt 0.026 | |
| type train | step 19300 | loss 0.0277 0.2615 2.4684 10.9713 | lr 1.0e-04 | norm 1.0075 | dt 0.026 | |
| type train | step 19310 | loss 0.0283 0.2675 2.4290 10.5480 | lr 1.0e-04 | norm 1.1747 | dt 0.026 | |
| type train | step 19320 | loss 0.0279 0.2662 2.4336 10.7387 | lr 1.0e-04 | norm 0.9488 | dt 0.026 | |
| type train | step 19330 | loss 0.0277 0.2709 2.4403 10.6266 | lr 1.0e-04 | norm 1.2990 | dt 0.026 | |
| type train | step 19340 | loss 0.0276 0.2718 2.4776 10.8564 | lr 1.0e-04 | norm 1.0135 | dt 0.026 | |
| type train | step 19350 | loss 0.0280 0.2633 2.3790 10.5389 | lr 1.0e-04 | norm 1.1997 | dt 0.026 | |
| type train | step 19360 | loss 0.0286 0.2694 2.4556 10.8632 | lr 1.0e-04 | norm 0.9433 | dt 0.026 | |
| type train | step 19370 | loss 0.0276 0.2605 2.3456 10.2510 | lr 1.0e-04 | norm 1.1792 | dt 0.026 | |
| type train | step 19380 | loss 0.0287 0.2668 2.4452 10.7472 | lr 1.0e-04 | norm 1.0387 | dt 0.026 | |
| type train | step 19390 | loss 0.0280 0.2681 2.4674 10.8530 | lr 1.0e-04 | norm 1.0247 | dt 0.026 | |
| type train | step 19400 | loss 0.0273 0.2723 2.5092 10.9472 | lr 1.0e-04 | norm 1.1132 | dt 0.026 | |
| type train | step 19410 | loss 0.0285 0.2679 2.4323 10.8398 | lr 1.0e-04 | norm 1.2236 | dt 0.026 | |
| type train | step 19420 | loss 0.0280 0.2681 2.5129 11.1672 | lr 1.0e-04 | norm 1.0434 | dt 0.026 | |
| type train | step 19430 | loss 0.0276 0.2658 2.4913 10.9585 | lr 1.0e-04 | norm 1.2646 | dt 0.026 | |
| type train | step 19440 | loss 0.0288 0.2697 2.4869 11.0839 | lr 1.0e-04 | norm 1.0775 | dt 0.026 | |
| type train | step 19450 | loss 0.0266 0.2629 2.4085 10.4980 | lr 1.0e-04 | norm 1.0361 | dt 0.026 | |
| type train | step 19460 | loss 0.0272 0.2666 2.4327 10.7559 | lr 1.0e-04 | norm 1.1615 | dt 0.026 | |
| type train | step 19470 | loss 0.0290 0.2670 2.4090 10.7510 | lr 1.0e-04 | norm 1.3704 | dt 0.027 | |
| type train | step 19480 | loss 0.0278 0.2660 2.5353 11.3327 | lr 1.0e-04 | norm 1.1393 | dt 0.026 | |
| type train | step 19490 | loss 0.0276 0.2680 2.4235 10.4752 | lr 1.0e-04 | norm 1.4057 | dt 0.026 | |
| type train | step 19500 | loss 0.0286 0.2707 2.4877 10.9996 | lr 1.0e-04 | norm 1.2561 | dt 0.026 | |
| type train | step 19510 | loss 0.0268 0.2631 2.4238 10.5440 | lr 1.0e-04 | norm 1.0391 | dt 0.026 | |
| type train | step 19520 | loss 0.0274 0.2672 2.5425 11.1245 | lr 1.0e-04 | norm 1.2668 | dt 0.027 | |
| type train | step 19530 | loss 0.0273 0.2659 2.4387 10.8496 | lr 1.0e-04 | norm 0.9821 | dt 0.026 | |
| type train | step 19540 | loss 0.0277 0.2659 2.4940 11.1212 | lr 1.0e-04 | norm 1.0971 | dt 0.026 | |
| type train | step 19550 | loss 0.0275 0.2607 2.3957 10.5108 | lr 1.0e-04 | norm 1.1618 | dt 0.026 | |
| type train | step 19560 | loss 0.0293 0.2758 2.5066 11.0408 | lr 1.0e-04 | norm 1.3164 | dt 0.026 | |
| type train | step 19570 | loss 0.0268 0.2661 2.4100 10.4550 | lr 1.0e-04 | norm 1.0908 | dt 0.026 | |
| type train | step 19580 | loss 0.0268 0.2614 2.4382 10.8660 | lr 1.0e-04 | norm 1.0158 | dt 0.027 | |
| type train | step 19590 | loss 0.0272 0.2674 2.4171 10.5414 | lr 1.0e-04 | norm 0.8618 | dt 0.026 | |
| type train | step 19600 | loss 0.0287 0.2715 2.5811 11.4082 | lr 1.0e-04 | norm 1.0913 | dt 0.026 | |
| type train | step 19610 | loss 0.0273 0.2622 2.4139 10.6668 | lr 1.0e-04 | norm 1.1971 | dt 0.026 | |
| type train | step 19620 | loss 0.0271 0.2583 2.3493 10.5226 | lr 1.0e-04 | norm 0.9766 | dt 0.026 | |
| type train | step 19630 | loss 0.0277 0.2690 2.4626 10.6620 | lr 1.0e-04 | norm 1.0743 | dt 0.026 | |
| type train | step 19640 | loss 0.0285 0.2707 2.4150 10.5064 | lr 1.0e-04 | norm 1.0716 | dt 0.026 | |
| type train | step 19650 | loss 0.0274 0.2656 2.3973 10.5874 | lr 1.0e-04 | norm 0.9607 | dt 0.027 | |
| type train | step 19660 | loss 0.0299 0.2710 2.4239 10.8011 | lr 1.0e-04 | norm 1.6558 | dt 0.026 | |
| type train | step 19670 | loss 0.0279 0.2682 2.4877 10.8294 | lr 1.0e-04 | norm 1.2452 | dt 0.026 | |
| type train | step 19680 | loss 0.0278 0.2665 2.4353 10.8325 | lr 1.0e-04 | norm 1.0720 | dt 0.026 | |
| type train | step 19690 | loss 0.0267 0.2682 2.4360 10.5553 | lr 1.0e-04 | norm 1.1369 | dt 0.027 | |
| type train | step 19700 | loss 0.0276 0.2671 2.4273 10.7197 | lr 1.0e-04 | norm 1.1394 | dt 0.026 | |
| type train | step 19710 | loss 0.0273 0.2674 2.4527 10.5704 | lr 1.0e-04 | norm 1.2860 | dt 0.025 | |
| type train | step 19720 | loss 0.0295 0.2746 2.5016 11.1032 | lr 1.0e-04 | norm 1.1534 | dt 0.026 | |
| type train | step 19730 | loss 0.0288 0.2705 2.4011 10.6891 | lr 1.0e-04 | norm 1.0935 | dt 0.026 | |
| type train | step 19740 | loss 0.0272 0.2632 2.4450 10.7490 | lr 1.0e-04 | norm 1.0425 | dt 0.026 | |
| type train | step 19750 | loss 0.0266 0.2638 2.4278 10.7063 | lr 1.0e-04 | norm 1.0179 | dt 0.026 | |
| type train | step 19760 | loss 0.0282 0.2756 2.5060 10.9248 | lr 1.0e-04 | norm 1.1910 | dt 0.030 | |
| type train | step 19770 | loss 0.0274 0.2635 2.3664 10.4561 | lr 1.0e-04 | norm 0.9807 | dt 0.029 | |
| type train | step 19780 | loss 0.0278 0.2599 2.4035 10.7323 | lr 1.0e-04 | norm 1.0541 | dt 0.029 | |
| type train | step 19790 | loss 0.0285 0.2670 2.3849 10.5349 | lr 1.0e-04 | norm 0.9354 | dt 0.027 | |
| type train | step 19800 | loss 0.0286 0.2675 2.4910 10.8643 | lr 1.0e-04 | norm 1.1827 | dt 0.026 | |
| type train | step 19810 | loss 0.0272 0.2633 2.3807 10.5656 | lr 1.0e-04 | norm 1.1544 | dt 0.026 | |
| type train | step 19820 | loss 0.0282 0.2741 2.4531 10.6963 | lr 1.0e-04 | norm 1.2139 | dt 0.026 | |
| type train | step 19830 | loss 0.0282 0.2661 2.4232 10.6108 | lr 1.0e-04 | norm 1.2220 | dt 0.026 | |
| type train | step 19840 | loss 0.0279 0.2618 2.4162 10.7511 | lr 1.0e-04 | norm 1.2572 | dt 0.026 | |
| type train | step 19850 | loss 0.0283 0.2674 2.4881 10.9921 | lr 1.0e-04 | norm 0.9829 | dt 0.026 | |
| type train | step 19860 | loss 0.0273 0.2615 2.4702 10.8854 | lr 1.0e-04 | norm 1.0138 | dt 0.026 | |
| type train | step 19870 | loss 0.0275 0.2661 2.4398 10.7774 | lr 1.0e-04 | norm 1.1610 | dt 0.026 | |
| type train | step 19880 | loss 0.0275 0.2588 2.3447 10.4403 | lr 1.0e-04 | norm 1.1434 | dt 0.026 | |
| type train | step 19890 | loss 0.0275 0.2645 2.3731 10.6034 | lr 1.0e-04 | norm 1.1913 | dt 0.026 | |
| type train | step 19900 | loss 0.0291 0.2680 2.4380 10.8649 | lr 1.0e-04 | norm 1.1715 | dt 0.026 | |
| type train | step 19910 | loss 0.0275 0.2615 2.4675 10.9694 | lr 1.0e-04 | norm 1.0118 | dt 0.026 | |
| type train | step 19920 | loss 0.0282 0.2673 2.4283 10.5476 | lr 1.0e-04 | norm 1.1692 | dt 0.026 | |
| type train | step 19930 | loss 0.0278 0.2663 2.4326 10.7373 | lr 1.0e-04 | norm 0.9444 | dt 0.026 | |
| type train | step 19940 | loss 0.0277 0.2705 2.4392 10.6224 | lr 1.0e-04 | norm 1.2848 | dt 0.026 | |
| type train | step 19950 | loss 0.0275 0.2717 2.4762 10.8572 | lr 1.0e-04 | norm 1.0074 | dt 0.026 | |
| type train | step 19960 | loss 0.0279 0.2633 2.3771 10.5370 | lr 1.0e-04 | norm 1.2027 | dt 0.026 | |
| type train | step 19970 | loss 0.0284 0.2690 2.4536 10.8627 | lr 1.0e-04 | norm 0.9364 | dt 0.026 | |
| type train | step 19980 | loss 0.0275 0.2602 2.3442 10.2518 | lr 1.0e-04 | norm 1.1643 | dt 0.026 | |
| type train | step 19990 | loss 0.0287 0.2665 2.4430 10.7421 | lr 1.0e-04 | norm 1.0291 | dt 0.026 | |
| type train | step 20000 | loss 0.0278 0.2676 2.4652 10.8475 | lr 1.0e-04 | norm 1.0262 | dt 0.026 | |