| type train | step 10 | loss 28.0993 28.5301 28.7057 81.6879 | lr 1.3e-05 | norm 64.9445 | dt 0.023 | |
| type train | step 20 | loss 27.7327 28.3385 28.2366 80.9877 | lr 2.7e-05 | norm 66.2509 | dt 0.024 | |
| type train | step 30 | loss 27.2789 27.8336 28.0822 80.2899 | lr 4.0e-05 | norm 64.6159 | dt 0.024 | |
| type train | step 40 | loss 26.5784 27.5673 27.5447 77.0393 | lr 5.3e-05 | norm 59.4472 | dt 0.024 | |
| type train | step 50 | loss 25.9618 26.5494 26.9634 76.5858 | lr 6.7e-05 | norm 61.5857 | dt 0.024 | |
| type train | step 60 | loss 24.8168 25.5476 26.0907 72.7405 | lr 8.0e-05 | norm 58.2491 | dt 0.023 | |
| type train | step 70 | loss 23.7141 24.5806 25.2642 70.7902 | lr 9.3e-05 | norm 57.1319 | dt 0.024 | |
| type train | step 80 | loss 22.5552 23.6960 24.2113 65.6807 | lr 1.1e-04 | norm 51.6726 | dt 0.023 | |
| type train | step 90 | loss 21.3672 22.3904 23.4330 63.7520 | lr 1.2e-04 | norm 50.6217 | dt 0.023 | |
| type train | step 100 | loss 19.7931 20.9172 21.9636 59.3617 | lr 1.3e-04 | norm 48.5317 | dt 0.024 | |
| type train | step 110 | loss 18.4489 19.7000 20.9690 55.4989 | lr 1.5e-04 | norm 43.7536 | dt 0.024 | |
| type train | step 120 | loss 17.0286 18.5698 20.0958 51.1061 | lr 1.6e-04 | norm 38.5913 | dt 0.024 | |
| type train | step 130 | loss 15.4081 16.9275 18.6645 47.3916 | lr 1.7e-04 | norm 35.9750 | dt 0.024 | |
| type train | step 140 | loss 14.0786 15.9175 17.5251 42.8245 | lr 1.9e-04 | norm 32.0030 | dt 0.023 | |
| type train | step 150 | loss 12.5426 14.2288 16.0418 39.9460 | lr 2.0e-04 | norm 29.8958 | dt 0.024 | |
| type train | step 160 | loss 11.1670 13.0159 14.8237 35.9313 | lr 2.1e-04 | norm 27.2819 | dt 0.024 | |
| type train | step 170 | loss 9.7650 11.5461 13.4021 32.1525 | lr 2.3e-04 | norm 24.2403 | dt 0.024 | |
| type train | step 180 | loss 8.6342 10.4718 12.4502 28.6411 | lr 2.4e-04 | norm 19.5155 | dt 0.024 | |
| type train | step 190 | loss 7.5240 9.2846 11.3982 25.8706 | lr 2.5e-04 | norm 16.6732 | dt 0.024 | |
| type train | step 200 | loss 6.5025 8.2833 10.2953 22.8981 | lr 2.7e-04 | norm 14.1250 | dt 0.024 | |
| type train | step 210 | loss 5.5695 7.2397 9.1906 20.3499 | lr 2.8e-04 | norm 11.8505 | dt 0.023 | |
| type train | step 220 | loss 4.7115 6.3056 8.2466 17.9755 | lr 2.9e-04 | norm 9.7998 | dt 0.024 | |
| type train | step 230 | loss 3.9999 5.4404 7.2388 15.7891 | lr 3.1e-04 | norm 7.7817 | dt 0.024 | |
| type train | step 240 | loss 3.5065 4.9251 6.6989 14.5437 | lr 3.2e-04 | norm 6.1933 | dt 0.024 | |
| type train | step 250 | loss 3.0500 4.2351 5.8765 13.1159 | lr 3.3e-04 | norm 5.0745 | dt 0.024 | |
| type train | step 260 | loss 2.6897 3.7723 5.2702 11.5561 | lr 3.5e-04 | norm 4.3106 | dt 0.024 | |
| type train | step 270 | loss 2.4436 3.3891 4.7610 10.3131 | lr 3.6e-04 | norm 3.6766 | dt 0.024 | |
| type train | step 280 | loss 2.2066 3.0956 4.3855 9.6164 | lr 3.7e-04 | norm 3.2149 | dt 0.024 | |
| type train | step 290 | loss 1.9555 2.7810 3.9237 8.4641 | lr 3.9e-04 | norm 2.7692 | dt 0.024 | |
| type train | step 300 | loss 1.7836 2.6172 3.6897 7.6784 | lr 4.0e-04 | norm 2.4166 | dt 0.024 | |
| type train | step 310 | loss 1.6270 2.4151 3.4267 7.1627 | lr 4.1e-04 | norm 2.1367 | dt 0.024 | |
| type train | step 320 | loss 1.4610 2.2392 3.1793 6.4219 | lr 4.3e-04 | norm 1.8591 | dt 0.024 | |
| type train | step 330 | loss 1.3079 2.0903 2.9658 5.7340 | lr 4.4e-04 | norm 1.5630 | dt 0.024 | |
| type train | step 340 | loss 1.1623 1.9154 2.7538 5.2850 | lr 4.5e-04 | norm 1.3128 | dt 0.024 | |
| type train | step 350 | loss 1.0621 1.7886 2.6022 4.9742 | lr 4.7e-04 | norm 1.1056 | dt 0.024 | |
| type train | step 360 | loss 0.9429 1.6523 2.4334 4.6088 | lr 4.8e-04 | norm 0.8927 | dt 0.024 | |
| type train | step 370 | loss 0.8389 1.5524 2.3450 4.4006 | lr 4.9e-04 | norm 0.7571 | dt 0.024 | |
| type train | step 380 | loss 0.7878 1.4700 2.2491 4.2190 | lr 5.1e-04 | norm 0.6604 | dt 0.024 | |
| type train | step 390 | loss 0.6722 1.3412 2.0684 3.8211 | lr 5.2e-04 | norm 0.5964 | dt 0.024 | |
| type train | step 400 | loss 0.6111 1.2362 1.9609 3.6590 | lr 5.3e-04 | norm 0.5036 | dt 0.024 | |
| type train | step 410 | loss 0.5497 1.1740 1.8895 3.5040 | lr 5.5e-04 | norm 0.4341 | dt 0.024 | |
| type train | step 420 | loss 0.4968 1.1038 1.7917 3.3964 | lr 5.6e-04 | norm 0.4649 | dt 0.024 | |
| type train | step 430 | loss 0.4312 1.0405 1.7361 3.2469 | lr 5.7e-04 | norm 0.3849 | dt 0.024 | |
| type train | step 440 | loss 0.3844 0.9580 1.6311 3.0879 | lr 5.9e-04 | norm 0.3672 | dt 0.024 | |
| type train | step 450 | loss 0.3497 0.9233 1.5752 2.9625 | lr 6.0e-04 | norm 0.3428 | dt 0.024 | |
| type train | step 460 | loss 0.3064 0.8318 1.4694 2.7960 | lr 6.1e-04 | norm 0.3341 | dt 0.024 | |
| type train | step 470 | loss 0.2849 0.8188 1.4541 2.7396 | lr 6.3e-04 | norm 0.2757 | dt 0.024 | |
| type train | step 480 | loss 0.2478 0.7627 1.3806 2.6587 | lr 6.4e-04 | norm 0.2636 | dt 0.024 | |
| type train | step 490 | loss 0.2204 0.7121 1.3371 2.5669 | lr 6.5e-04 | norm 0.2574 | dt 0.024 | |
| type train | step 500 | loss 0.1990 0.6879 1.2895 2.4867 | lr 6.7e-04 | norm 0.2459 | dt 0.024 | |
| type train | step 510 | loss 0.1762 0.6508 1.2469 2.4128 | lr 6.8e-04 | norm 0.2368 | dt 0.024 | |
| type train | step 520 | loss 0.1551 0.6014 1.2015 2.3451 | lr 6.9e-04 | norm 0.2456 | dt 0.024 | |
| type train | step 530 | loss 0.1503 0.5890 1.1750 2.3069 | lr 7.1e-04 | norm 0.2066 | dt 0.024 | |
| type train | step 540 | loss 0.1227 0.5388 1.0849 2.1875 | lr 7.2e-04 | norm 0.2027 | dt 0.024 | |
| type train | step 550 | loss 0.1128 0.5265 1.0849 2.1702 | lr 7.3e-04 | norm 0.2374 | dt 0.024 | |
| type train | step 560 | loss 0.1125 0.5056 1.0578 2.1181 | lr 7.5e-04 | norm 0.2307 | dt 0.024 | |
| type train | step 570 | loss 0.0976 0.4783 1.0366 2.1040 | lr 7.6e-04 | norm 0.1842 | dt 0.024 | |
| type train | step 580 | loss 0.0912 0.4470 0.9596 1.9976 | lr 7.7e-04 | norm 0.2353 | dt 0.024 | |
| type train | step 590 | loss 0.0856 0.4362 0.9683 1.9961 | lr 7.9e-04 | norm 0.1915 | dt 0.024 | |
| type train | step 600 | loss 0.0732 0.4027 0.9012 1.9050 | lr 8.0e-04 | norm 0.1717 | dt 0.024 | |
| type train | step 610 | loss 0.0710 0.3884 0.9021 1.9219 | lr 8.1e-04 | norm 0.1884 | dt 0.025 | |
| type train | step 620 | loss 0.0656 0.3750 0.8696 1.8586 | lr 8.3e-04 | norm 0.1652 | dt 0.024 | |
| type train | step 630 | loss 0.0621 0.3644 0.8685 1.8616 | lr 8.4e-04 | norm 0.1874 | dt 0.024 | |
| type train | step 640 | loss 0.0595 0.3347 0.8143 1.7750 | lr 8.5e-04 | norm 0.1824 | dt 0.025 | |
| type train | step 650 | loss 0.0626 0.3430 0.8494 1.8381 | lr 8.7e-04 | norm 0.2197 | dt 0.024 | |
| type train | step 660 | loss 0.0549 0.3185 0.7653 1.7058 | lr 8.8e-04 | norm 0.1643 | dt 0.024 | |
| type train | step 670 | loss 0.0505 0.2973 0.7507 1.7069 | lr 8.9e-04 | norm 0.2002 | dt 0.024 | |
| type train | step 680 | loss 0.0486 0.2884 0.7376 1.6727 | lr 9.1e-04 | norm 0.1478 | dt 0.024 | |
| type train | step 690 | loss 0.0498 0.2954 0.7830 1.7476 | lr 9.2e-04 | norm 0.1577 | dt 0.024 | |
| type train | step 700 | loss 0.0470 0.2722 0.7158 1.6412 | lr 9.3e-04 | norm 0.1732 | dt 0.024 | |
| type train | step 710 | loss 0.0437 0.2586 0.6895 1.5995 | lr 9.5e-04 | norm 0.1642 | dt 0.024 | |
| type train | step 720 | loss 0.0454 0.2585 0.6840 1.5939 | lr 9.6e-04 | norm 0.1635 | dt 0.024 | |
| type train | step 730 | loss 0.0446 0.2521 0.6728 1.5797 | lr 9.7e-04 | norm 0.1502 | dt 0.024 | |
| type train | step 740 | loss 0.0413 0.2382 0.6611 1.5733 | lr 9.9e-04 | norm 0.1459 | dt 0.024 | |
| type train | step 750 | loss 0.0478 0.2569 0.7011 1.6059 | lr 1.0e-03 | norm 0.2144 | dt 0.024 | |
| type train | step 760 | loss 0.0416 0.2323 0.6490 1.5623 | lr 1.0e-03 | norm 0.1883 | dt 0.024 | |
| type train | step 770 | loss 0.0388 0.2244 0.6437 1.5470 | lr 1.0e-03 | norm 0.1977 | dt 0.024 | |
| type train | step 780 | loss 0.0380 0.2211 0.6134 1.4893 | lr 1.0e-03 | norm 0.1739 | dt 0.024 | |
| type train | step 790 | loss 0.0377 0.2171 0.6204 1.5110 | lr 1.0e-03 | norm 0.1528 | dt 0.024 | |
| type train | step 800 | loss 0.0368 0.2062 0.6088 1.5106 | lr 1.0e-03 | norm 0.1551 | dt 0.024 | |
| type train | step 810 | loss 0.0384 0.2136 0.6339 1.5368 | lr 1.0e-03 | norm 0.1457 | dt 0.024 | |
| type train | step 820 | loss 0.0368 0.2020 0.5983 1.4794 | lr 1.0e-03 | norm 0.1639 | dt 0.024 | |
| type train | step 830 | loss 0.0346 0.1957 0.5912 1.4621 | lr 1.0e-03 | norm 0.1748 | dt 0.024 | |
| type train | step 840 | loss 0.0326 0.1905 0.5655 1.4299 | lr 1.0e-03 | norm 0.1538 | dt 0.024 | |
| type train | step 850 | loss 0.0348 0.1971 0.5877 1.4578 | lr 1.0e-03 | norm 0.1458 | dt 0.024 | |
| type train | step 860 | loss 0.0332 0.1817 0.5511 1.4200 | lr 1.0e-03 | norm 0.1348 | dt 0.024 | |
| type train | step 870 | loss 0.0322 0.1800 0.5544 1.4106 | lr 1.0e-03 | norm 0.1415 | dt 0.024 | |
| type train | step 880 | loss 0.0331 0.1791 0.5501 1.3907 | lr 1.0e-03 | norm 0.1425 | dt 0.024 | |
| type train | step 890 | loss 0.0330 0.1793 0.5673 1.4434 | lr 1.0e-03 | norm 0.1902 | dt 0.024 | |
| type train | step 900 | loss 0.0310 0.1708 0.5246 1.3807 | lr 1.0e-03 | norm 0.1850 | dt 0.024 | |
| type train | step 910 | loss 0.0313 0.1784 0.5415 1.3937 | lr 1.0e-03 | norm 0.1560 | dt 0.024 | |
| type train | step 920 | loss 0.0312 0.1680 0.5344 1.3918 | lr 1.0e-03 | norm 0.1432 | dt 0.024 | |
| type train | step 930 | loss 0.0302 0.1637 0.5296 1.3849 | lr 1.0e-03 | norm 0.1524 | dt 0.024 | |
| type train | step 940 | loss 0.0302 0.1654 0.5371 1.3907 | lr 1.0e-03 | norm 0.1289 | dt 0.024 | |
| type train | step 950 | loss 0.0291 0.1609 0.5234 1.3775 | lr 1.0e-03 | norm 0.1409 | dt 0.024 | |
| type train | step 960 | loss 0.0292 0.1610 0.5108 1.3662 | lr 1.0e-03 | norm 0.1820 | dt 0.024 | |
| type train | step 970 | loss 0.0284 0.1528 0.4938 1.3200 | lr 1.0e-03 | norm 0.1654 | dt 0.024 | |
| type train | step 980 | loss 0.0285 0.1552 0.5087 1.3511 | lr 1.0e-03 | norm 0.1541 | dt 0.024 | |
| type train | step 990 | loss 0.0291 0.1571 0.5302 1.3857 | lr 1.0e-03 | norm 0.1406 | dt 0.024 | |
| type train | step 1000 | loss 0.0275 0.1519 0.5076 1.3455 | lr 1.0e-03 | norm 0.1288 | dt 0.024 | |
| type train | step 10 | loss 25.9101 27.3303 28.2457 89.3254 | lr 1.3e-05 | norm 69.7924 | dt 0.023 | |
| type train | step 20 | loss 25.4809 27.0391 27.8475 88.5277 | lr 2.7e-05 | norm 71.3301 | dt 0.024 | |
| type train | step 30 | loss 25.2028 26.5788 27.6163 88.3686 | lr 4.0e-05 | norm 70.5554 | dt 0.024 | |
| type train | step 40 | loss 24.7535 26.1581 27.2754 83.7040 | lr 5.3e-05 | norm 63.5945 | dt 0.024 | |
| type train | step 50 | loss 23.8413 25.5250 26.5187 84.0489 | lr 6.7e-05 | norm 66.5081 | dt 0.024 | |
| type train | step 60 | loss 22.7843 24.4819 25.8016 80.0080 | lr 8.0e-05 | norm 63.0724 | dt 0.024 | |
| type train | step 70 | loss 21.7047 23.4984 24.8926 77.7093 | lr 9.3e-05 | norm 61.8568 | dt 0.024 | |
| type train | step 80 | loss 20.7528 22.5655 23.8486 72.3677 | lr 1.1e-04 | norm 55.8134 | dt 0.024 | |
| type train | step 90 | loss 19.6151 21.5008 23.0674 70.6424 | lr 1.2e-04 | norm 55.7926 | dt 0.024 | |
| type train | step 100 | loss 18.0031 20.1146 21.5680 65.5237 | lr 1.3e-04 | norm 53.5105 | dt 0.024 | |
| type train | step 110 | loss 16.8292 18.9982 20.6106 61.6022 | lr 1.5e-04 | norm 48.0362 | dt 0.023 | |
| type train | step 120 | loss 15.6725 17.9229 19.7391 56.1155 | lr 1.6e-04 | norm 42.0378 | dt 0.023 | |
| type train | step 130 | loss 14.0654 16.4335 18.3643 52.7527 | lr 1.7e-04 | norm 39.6577 | dt 0.024 | |
| type train | step 140 | loss 12.8948 15.4512 17.2235 47.4030 | lr 1.9e-04 | norm 35.4235 | dt 0.024 | |
| type train | step 150 | loss 11.3783 13.8342 15.7629 44.6189 | lr 2.0e-04 | norm 33.4340 | dt 0.024 | |
| type train | step 160 | loss 10.0823 12.6123 14.5743 39.8991 | lr 2.1e-04 | norm 30.6982 | dt 0.024 | |
| type train | step 170 | loss 8.7943 11.2589 13.1598 36.2543 | lr 2.3e-04 | norm 27.9405 | dt 0.024 | |
| type train | step 180 | loss 7.8291 10.1944 12.2324 31.8039 | lr 2.4e-04 | norm 22.0405 | dt 0.024 | |
| type train | step 190 | loss 6.7613 9.1196 11.1181 28.9262 | lr 2.5e-04 | norm 19.0480 | dt 0.024 | |
| type train | step 200 | loss 5.9250 8.0817 10.1170 25.5367 | lr 2.7e-04 | norm 16.3699 | dt 0.024 | |
| type train | step 210 | loss 5.0537 7.0587 8.9766 22.7969 | lr 2.8e-04 | norm 13.9290 | dt 0.023 | |
| type train | step 220 | loss 4.2772 6.1701 8.0641 20.1499 | lr 2.9e-04 | norm 11.5359 | dt 0.024 | |
| type train | step 230 | loss 3.6608 5.3427 7.1002 17.8214 | lr 3.1e-04 | norm 9.3338 | dt 0.024 | |
| type train | step 240 | loss 3.2395 4.7941 6.5830 16.3664 | lr 3.2e-04 | norm 7.1309 | dt 0.024 | |
| type train | step 250 | loss 2.8456 4.1453 5.7791 14.6987 | lr 3.3e-04 | norm 5.7499 | dt 0.024 | |
| type train | step 260 | loss 2.5130 3.6866 5.1828 12.8494 | lr 3.5e-04 | norm 4.5753 | dt 0.024 | |
| type train | step 270 | loss 2.2978 3.3101 4.6467 11.4431 | lr 3.6e-04 | norm 3.8523 | dt 0.024 | |
| type train | step 280 | loss 2.1059 3.0287 4.2996 10.4818 | lr 3.7e-04 | norm 3.3851 | dt 0.024 | |
| type train | step 290 | loss 1.8808 2.7472 3.8433 9.2215 | lr 3.9e-04 | norm 2.9509 | dt 0.024 | |
| type train | step 300 | loss 1.7194 2.5949 3.6105 8.2932 | lr 4.0e-04 | norm 2.5409 | dt 0.024 | |
| type train | step 310 | loss 1.5780 2.3867 3.3701 7.6602 | lr 4.1e-04 | norm 2.2140 | dt 0.024 | |
| type train | step 320 | loss 1.4159 2.2124 3.1246 6.8345 | lr 4.3e-04 | norm 1.9187 | dt 0.024 | |
| type train | step 330 | loss 1.2663 2.0713 2.9278 6.0721 | lr 4.4e-04 | norm 1.6122 | dt 0.024 | |
| type train | step 340 | loss 1.1236 1.9020 2.7248 5.5323 | lr 4.5e-04 | norm 1.3842 | dt 0.024 | |
| type train | step 350 | loss 1.0206 1.7815 2.5691 5.1488 | lr 4.7e-04 | norm 1.2075 | dt 0.024 | |
| type train | step 360 | loss 0.9153 1.6481 2.4105 4.7349 | lr 4.8e-04 | norm 1.0269 | dt 0.024 | |
| type train | step 370 | loss 0.8119 1.5494 2.3155 4.4976 | lr 4.9e-04 | norm 0.8727 | dt 0.024 | |
| type train | step 380 | loss 0.7587 1.4662 2.2270 4.3045 | lr 5.1e-04 | norm 0.7138 | dt 0.024 | |
| type train | step 390 | loss 0.6398 1.3384 2.0494 3.8721 | lr 5.2e-04 | norm 0.6313 | dt 0.024 | |
| type train | step 400 | loss 0.5802 1.2434 1.9337 3.7025 | lr 5.3e-04 | norm 0.5410 | dt 0.024 | |
| type train | step 410 | loss 0.5195 1.1818 1.8651 3.5465 | lr 5.5e-04 | norm 0.4816 | dt 0.024 | |
| type train | step 420 | loss 0.4741 1.1122 1.7714 3.4082 | lr 5.6e-04 | norm 0.4816 | dt 0.024 | |
| type train | step 430 | loss 0.4097 1.0436 1.7071 3.2743 | lr 5.7e-04 | norm 0.4094 | dt 0.024 | |
| type train | step 440 | loss 0.3657 0.9608 1.6126 3.0877 | lr 5.9e-04 | norm 0.3719 | dt 0.024 | |
| type train | step 450 | loss 0.3325 0.9240 1.5620 2.9728 | lr 6.0e-04 | norm 0.3103 | dt 0.024 | |
| type train | step 460 | loss 0.2898 0.8427 1.4487 2.8110 | lr 6.1e-04 | norm 0.3102 | dt 0.024 | |
| type train | step 470 | loss 0.2737 0.8266 1.4458 2.7452 | lr 6.3e-04 | norm 0.2626 | dt 0.024 | |
| type train | step 480 | loss 0.2368 0.7745 1.3688 2.6707 | lr 6.4e-04 | norm 0.2448 | dt 0.024 | |
| type train | step 490 | loss 0.2105 0.7224 1.3203 2.5852 | lr 6.5e-04 | norm 0.2436 | dt 0.024 | |
| type train | step 500 | loss 0.1901 0.6939 1.2815 2.5033 | lr 6.7e-04 | norm 0.2515 | dt 0.024 | |
| type train | step 510 | loss 0.1706 0.6568 1.2437 2.4344 | lr 6.8e-04 | norm 0.2315 | dt 0.024 | |
| type train | step 520 | loss 0.1478 0.6124 1.1971 2.3617 | lr 6.9e-04 | norm 0.2442 | dt 0.024 | |
| type train | step 530 | loss 0.1444 0.5972 1.1764 2.3265 | lr 7.1e-04 | norm 0.2173 | dt 0.024 | |
| type train | step 540 | loss 0.1180 0.5481 1.0821 2.2020 | lr 7.2e-04 | norm 0.1873 | dt 0.024 | |
| type train | step 550 | loss 0.1075 0.5316 1.0819 2.1880 | lr 7.3e-04 | norm 0.2222 | dt 0.024 | |
| type train | step 560 | loss 0.1081 0.5126 1.0570 2.1333 | lr 7.5e-04 | norm 0.2326 | dt 0.024 | |
| type train | step 570 | loss 0.0941 0.4852 1.0361 2.1130 | lr 7.6e-04 | norm 0.1736 | dt 0.024 | |
| type train | step 580 | loss 0.0865 0.4532 0.9614 2.0006 | lr 7.7e-04 | norm 0.2344 | dt 0.024 | |
| type train | step 590 | loss 0.0829 0.4431 0.9736 2.0100 | lr 7.9e-04 | norm 0.1956 | dt 0.024 | |
| type train | step 600 | loss 0.0701 0.4090 0.8992 1.9190 | lr 8.0e-04 | norm 0.1634 | dt 0.024 | |
| type train | step 610 | loss 0.0680 0.3954 0.9025 1.9275 | lr 8.1e-04 | norm 0.1942 | dt 0.025 | |
| type train | step 620 | loss 0.0631 0.3805 0.8728 1.8688 | lr 8.3e-04 | norm 0.1600 | dt 0.024 | |
| type train | step 630 | loss 0.0603 0.3697 0.8715 1.8674 | lr 8.4e-04 | norm 0.1726 | dt 0.024 | |
| type train | step 640 | loss 0.0567 0.3409 0.8176 1.7879 | lr 8.5e-04 | norm 0.1546 | dt 0.024 | |
| type train | step 650 | loss 0.0613 0.3495 0.8543 1.8515 | lr 8.7e-04 | norm 0.2045 | dt 0.024 | |
| type train | step 660 | loss 0.0523 0.3236 0.7689 1.7173 | lr 8.8e-04 | norm 0.1619 | dt 0.024 | |
| type train | step 670 | loss 0.0489 0.3016 0.7544 1.7130 | lr 8.9e-04 | norm 0.1840 | dt 0.024 | |
| type train | step 680 | loss 0.0467 0.2948 0.7393 1.6853 | lr 9.1e-04 | norm 0.1333 | dt 0.024 | |
| type train | step 690 | loss 0.0481 0.3003 0.7857 1.7644 | lr 9.2e-04 | norm 0.1516 | dt 0.024 | |
| type train | step 700 | loss 0.0455 0.2764 0.7157 1.6498 | lr 9.3e-04 | norm 0.1650 | dt 0.025 | |
| type train | step 710 | loss 0.0428 0.2610 0.6910 1.6114 | lr 9.5e-04 | norm 0.1613 | dt 0.024 | |
| type train | step 720 | loss 0.0439 0.2623 0.6850 1.6072 | lr 9.6e-04 | norm 0.2165 | dt 0.024 | |
| type train | step 730 | loss 0.0428 0.2571 0.6767 1.5924 | lr 9.7e-04 | norm 0.1559 | dt 0.024 | |
| type train | step 740 | loss 0.0396 0.2411 0.6601 1.5803 | lr 9.9e-04 | norm 0.1442 | dt 0.024 | |
| type train | step 750 | loss 0.0457 0.2560 0.6969 1.6098 | lr 1.0e-03 | norm 0.2112 | dt 0.024 | |
| type train | step 760 | loss 0.0403 0.2358 0.6495 1.5741 | lr 1.0e-03 | norm 0.1977 | dt 0.024 | |
| type train | step 770 | loss 0.0378 0.2258 0.6421 1.5570 | lr 1.0e-03 | norm 0.1901 | dt 0.024 | |
| type train | step 780 | loss 0.0368 0.2218 0.6116 1.5042 | lr 1.0e-03 | norm 0.1838 | dt 0.024 | |
| type train | step 790 | loss 0.0361 0.2188 0.6201 1.5162 | lr 1.0e-03 | norm 0.1445 | dt 0.024 | |
| type train | step 800 | loss 0.0352 0.2077 0.6045 1.5228 | lr 1.0e-03 | norm 0.1457 | dt 0.024 | |
| type train | step 810 | loss 0.0369 0.2143 0.6272 1.5460 | lr 1.0e-03 | norm 0.1407 | dt 0.024 | |
| type train | step 820 | loss 0.0352 0.2004 0.5919 1.4848 | lr 1.0e-03 | norm 0.1656 | dt 0.024 | |
| type train | step 830 | loss 0.0334 0.1957 0.5841 1.4681 | lr 1.0e-03 | norm 0.1711 | dt 0.025 | |
| type train | step 840 | loss 0.0314 0.1909 0.5580 1.4443 | lr 1.0e-03 | norm 0.1470 | dt 0.024 | |
| type train | step 850 | loss 0.0333 0.1982 0.5817 1.4678 | lr 1.0e-03 | norm 0.1433 | dt 0.024 | |
| type train | step 860 | loss 0.0316 0.1811 0.5455 1.4204 | lr 1.0e-03 | norm 0.1194 | dt 0.024 | |
| type train | step 870 | loss 0.0308 0.1799 0.5439 1.4171 | lr 1.0e-03 | norm 0.1296 | dt 0.024 | |
| type train | step 880 | loss 0.0319 0.1779 0.5382 1.4083 | lr 1.0e-03 | norm 0.1359 | dt 0.024 | |
| type train | step 890 | loss 0.0317 0.1778 0.5580 1.4570 | lr 1.0e-03 | norm 0.2148 | dt 0.024 | |
| type train | step 900 | loss 0.0299 0.1698 0.5137 1.3851 | lr 1.0e-03 | norm 0.1704 | dt 0.024 | |
| type train | step 910 | loss 0.0301 0.1783 0.5323 1.4039 | lr 1.0e-03 | norm 0.1530 | dt 0.025 | |
| type train | step 920 | loss 0.0298 0.1671 0.5273 1.3992 | lr 1.0e-03 | norm 0.1404 | dt 0.024 | |
| type train | step 930 | loss 0.0289 0.1637 0.5184 1.3926 | lr 1.0e-03 | norm 0.1368 | dt 0.024 | |
| type train | step 940 | loss 0.0291 0.1644 0.5256 1.3987 | lr 1.0e-03 | norm 0.1311 | dt 0.024 | |
| type train | step 950 | loss 0.0281 0.1600 0.5135 1.3899 | lr 1.0e-03 | norm 0.1460 | dt 0.024 | |
| type train | step 960 | loss 0.0282 0.1595 0.5026 1.3710 | lr 1.0e-03 | norm 0.1792 | dt 0.024 | |
| type train | step 970 | loss 0.0274 0.1519 0.4832 1.3272 | lr 1.0e-03 | norm 0.1422 | dt 0.024 | |
| type train | step 980 | loss 0.0272 0.1553 0.4968 1.3589 | lr 1.0e-03 | norm 0.1377 | dt 0.024 | |
| type train | step 990 | loss 0.0279 0.1548 0.5136 1.3944 | lr 1.0e-03 | norm 0.1469 | dt 0.024 | |
| type train | step 1000 | loss 0.0264 0.1502 0.4985 1.3620 | lr 1.0e-03 | norm 0.1278 | dt 0.024 | |
| type train | step 1010 | loss 0.0275 0.1463 0.4791 1.3262 | lr 1.0e-03 | norm 0.1195 | dt 0.025 | |
| type train | step 1020 | loss 0.0265 0.1481 0.4858 1.3362 | lr 1.0e-03 | norm 0.1504 | dt 0.025 | |
| type train | step 1030 | loss 0.0263 0.1471 0.4787 1.3406 | lr 1.0e-03 | norm 0.1634 | dt 0.025 | |
| type train | step 1040 | loss 0.0262 0.1471 0.4905 1.3533 | lr 1.0e-03 | norm 0.1285 | dt 0.025 | |
| type train | step 1050 | loss 0.0254 0.1393 0.4659 1.3160 | lr 1.0e-03 | norm 0.1301 | dt 0.025 | |
| type train | step 1060 | loss 0.0263 0.1421 0.4803 1.3340 | lr 1.0e-03 | norm 0.1186 | dt 0.025 | |
| type train | step 1070 | loss 0.0250 0.1337 0.4533 1.2813 | lr 1.0e-03 | norm 0.1294 | dt 0.025 | |
| type train | step 1080 | loss 0.0253 0.1374 0.4746 1.3212 | lr 1.0e-03 | norm 0.1279 | dt 0.025 | |
| type train | step 1090 | loss 0.0249 0.1379 0.4697 1.3172 | lr 1.0e-03 | norm 0.1348 | dt 0.025 | |
| type train | step 1100 | loss 0.0250 0.1366 0.4745 1.3305 | lr 1.0e-03 | norm 0.1442 | dt 0.025 | |
| type train | step 1110 | loss 0.0249 0.1351 0.4670 1.3188 | lr 1.0e-03 | norm 0.1347 | dt 0.024 | |
| type train | step 1120 | loss 0.0246 0.1351 0.4747 1.3381 | lr 1.0e-03 | norm 0.1759 | dt 0.025 | |
| type train | step 1130 | loss 0.0242 0.1321 0.4731 1.3241 | lr 1.0e-03 | norm 0.1420 | dt 0.025 | |
| type train | step 1140 | loss 0.0244 0.1318 0.4728 1.3367 | lr 1.0e-03 | norm 0.1402 | dt 0.025 | |
| type train | step 1150 | loss 0.0233 0.1269 0.4422 1.2705 | lr 1.0e-03 | norm 0.1313 | dt 0.025 | |
| type train | step 1160 | loss 0.0235 0.1300 0.4584 1.3022 | lr 1.0e-03 | norm 0.1544 | dt 0.025 | |
| type train | step 1170 | loss 0.0240 0.1289 0.4571 1.2862 | lr 1.0e-03 | norm 0.1575 | dt 0.025 | |
| type train | step 1180 | loss 0.0234 0.1281 0.4715 1.3336 | lr 1.0e-03 | norm 0.1349 | dt 0.025 | |
| type train | step 1190 | loss 0.0237 0.1247 0.4403 1.2662 | lr 1.0e-03 | norm 0.1414 | dt 0.025 | |
| type train | step 1200 | loss 0.0231 0.1240 0.4564 1.3030 | lr 1.0e-03 | norm 0.1396 | dt 0.025 | |
| type train | step 1210 | loss 0.0224 0.1203 0.4321 1.2560 | lr 1.0e-03 | norm 0.1290 | dt 0.025 | |
| type train | step 1220 | loss 0.0226 0.1224 0.4589 1.3053 | lr 1.0e-03 | norm 0.1380 | dt 0.026 | |
| type train | step 1230 | loss 0.0224 0.1216 0.4412 1.2690 | lr 1.0e-03 | norm 0.1363 | dt 0.025 | |
| type train | step 1240 | loss 0.0223 0.1217 0.4508 1.2995 | lr 1.0e-03 | norm 0.1424 | dt 0.025 | |
| type train | step 1250 | loss 0.0225 0.1160 0.4299 1.2427 | lr 1.0e-03 | norm 0.1146 | dt 0.025 | |
| type train | step 1260 | loss 0.0229 0.1218 0.4575 1.3133 | lr 1.0e-03 | norm 0.1506 | dt 0.025 | |
| type train | step 1270 | loss 0.0220 0.1172 0.4266 1.2320 | lr 1.0e-03 | norm 0.1148 | dt 0.025 | |
| type train | step 1280 | loss 0.0220 0.1151 0.4324 1.2565 | lr 1.0e-03 | norm 0.1422 | dt 0.025 | |
| type train | step 1290 | loss 0.0216 0.1147 0.4239 1.2325 | lr 1.0e-03 | norm 0.1104 | dt 0.025 | |
| type train | step 1300 | loss 0.0224 0.1208 0.4582 1.3211 | lr 1.0e-03 | norm 0.1176 | dt 0.025 | |
| type train | step 1310 | loss 0.0217 0.1142 0.4279 1.2420 | lr 1.0e-03 | norm 0.1414 | dt 0.025 | |
| type train | step 1320 | loss 0.0213 0.1088 0.4154 1.2258 | lr 1.0e-03 | norm 0.1532 | dt 0.025 | |
| type train | step 1330 | loss 0.0218 0.1139 0.4283 1.2377 | lr 1.0e-03 | norm 0.1249 | dt 0.025 | |
| type train | step 1340 | loss 0.0219 0.1147 0.4195 1.2222 | lr 1.0e-03 | norm 0.1156 | dt 0.027 | |
| type train | step 1350 | loss 0.0210 0.1097 0.4193 1.2307 | lr 1.0e-03 | norm 0.1176 | dt 0.025 | |
| type train | step 1360 | loss 0.0229 0.1164 0.4353 1.2532 | lr 1.0e-03 | norm 0.1684 | dt 0.025 | |
| type train | step 1370 | loss 0.0218 0.1140 0.4307 1.2612 | lr 1.0e-03 | norm 0.1522 | dt 0.025 | |
| type train | step 1380 | loss 0.0211 0.1099 0.4269 1.2526 | lr 1.0e-03 | norm 0.1499 | dt 0.026 | |
| type train | step 1390 | loss 0.0208 0.1108 0.4175 1.2158 | lr 1.0e-03 | norm 0.1328 | dt 0.025 | |
| type train | step 1400 | loss 0.0208 0.1117 0.4193 1.2290 | lr 1.0e-03 | norm 0.1213 | dt 0.025 | |
| type train | step 1410 | loss 0.0209 0.1078 0.4186 1.2379 | lr 1.0e-03 | norm 0.1252 | dt 0.025 | |
| type train | step 1420 | loss 0.0218 0.1139 0.4386 1.2732 | lr 1.0e-03 | norm 0.1201 | dt 0.025 | |
| type train | step 1430 | loss 0.0212 0.1082 0.4183 1.2300 | lr 1.0e-03 | norm 0.1586 | dt 0.025 | |
| type train | step 1440 | loss 0.0204 0.1078 0.4229 1.2321 | lr 1.0e-03 | norm 0.1441 | dt 0.025 | |
| type train | step 1450 | loss 0.0199 0.1069 0.4103 1.2130 | lr 1.0e-03 | norm 0.1227 | dt 0.025 | |
| type train | step 1460 | loss 0.0208 0.1122 0.4278 1.2388 | lr 1.0e-03 | norm 0.1258 | dt 0.025 | |
| type train | step 1470 | loss 0.0202 0.1049 0.4056 1.2010 | lr 1.0e-03 | norm 0.1116 | dt 0.025 | |
| type train | step 1480 | loss 0.0198 0.1055 0.4090 1.2131 | lr 1.0e-03 | norm 0.1142 | dt 0.025 | |
| type train | step 1490 | loss 0.0206 0.1053 0.4046 1.2013 | lr 1.0e-03 | norm 0.1220 | dt 0.025 | |
| type train | step 1500 | loss 0.0206 0.1070 0.4256 1.2526 | lr 1.0e-03 | norm 0.1687 | dt 0.025 | |
| type train | step 1510 | loss 0.0198 0.1038 0.3978 1.1956 | lr 1.0e-03 | norm 0.1432 | dt 0.025 | |
| type train | step 1520 | loss 0.0201 0.1086 0.4127 1.2147 | lr 1.0e-03 | norm 0.1268 | dt 0.026 | |
| type train | step 1530 | loss 0.0201 0.1044 0.4132 1.2170 | lr 1.0e-03 | norm 0.1256 | dt 0.025 | |
| type train | step 1540 | loss 0.0198 0.1031 0.4109 1.2157 | lr 1.0e-03 | norm 0.1255 | dt 0.025 | |
| type train | step 1550 | loss 0.0199 0.1049 0.4183 1.2291 | lr 1.0e-03 | norm 0.1087 | dt 0.025 | |
| type train | step 1560 | loss 0.0194 0.1039 0.4143 1.2274 | lr 1.0e-03 | norm 0.1453 | dt 0.026 | |
| type train | step 1570 | loss 0.0194 0.1042 0.4077 1.2112 | lr 1.0e-03 | norm 0.1511 | dt 0.025 | |
| type train | step 1580 | loss 0.0191 0.1002 0.3915 1.1714 | lr 1.0e-03 | norm 0.1316 | dt 0.025 | |
| type train | step 1590 | loss 0.0192 0.1037 0.4035 1.2019 | lr 1.0e-03 | norm 0.1290 | dt 0.025 | |
| type train | step 1600 | loss 0.0197 0.1040 0.4181 1.2358 | lr 1.0e-03 | norm 0.1220 | dt 0.026 | |
| type train | step 1610 | loss 0.0189 0.1019 0.4113 1.2183 | lr 1.0e-03 | norm 0.1096 | dt 0.025 | |
| type train | step 1620 | loss 0.0198 0.1010 0.3992 1.1898 | lr 1.0e-03 | norm 0.1138 | dt 0.025 | |
| type train | step 1630 | loss 0.0191 0.1023 0.4061 1.2030 | lr 1.0e-03 | norm 0.1431 | dt 0.025 | |
| type train | step 1640 | loss 0.0190 0.1027 0.4006 1.2047 | lr 1.0e-03 | norm 0.1502 | dt 0.025 | |
| type train | step 1650 | loss 0.0192 0.1032 0.4141 1.2223 | lr 1.0e-03 | norm 0.1304 | dt 0.025 | |
| type train | step 1660 | loss 0.0187 0.0990 0.3956 1.1872 | lr 1.0e-03 | norm 0.1192 | dt 0.025 | |
| type train | step 1670 | loss 0.0195 0.1021 0.4084 1.2126 | lr 9.9e-04 | norm 0.1335 | dt 0.025 | |
| type train | step 1680 | loss 0.0186 0.0969 0.3861 1.1634 | lr 9.9e-04 | norm 0.1156 | dt 0.025 | |
| type train | step 1690 | loss 0.0189 0.0996 0.4064 1.2019 | lr 9.9e-04 | norm 0.1311 | dt 0.026 | |
| type train | step 1700 | loss 0.0186 0.1014 0.4044 1.2031 | lr 9.9e-04 | norm 0.1314 | dt 0.025 | |
| type train | step 1710 | loss 0.0188 0.1012 0.4111 1.2179 | lr 9.9e-04 | norm 0.1246 | dt 0.025 | |
| type train | step 1720 | loss 0.0188 0.1005 0.4038 1.2075 | lr 9.9e-04 | norm 0.1334 | dt 0.026 | |
| type train | step 1730 | loss 0.0187 0.1015 0.4143 1.2299 | lr 9.9e-04 | norm 0.1205 | dt 0.026 | |
| type train | step 1740 | loss 0.0186 0.1002 0.4125 1.2207 | lr 9.9e-04 | norm 0.1331 | dt 0.025 | |
| type train | step 1750 | loss 0.0186 0.1002 0.4124 1.2307 | lr 9.9e-04 | norm 0.1204 | dt 0.025 | |
| type train | step 1760 | loss 0.0178 0.0969 0.3871 1.1680 | lr 9.9e-04 | norm 0.1342 | dt 0.025 | |
| type train | step 1770 | loss 0.0181 0.1001 0.4029 1.1997 | lr 9.9e-04 | norm 0.1474 | dt 0.026 | |
| type train | step 1780 | loss 0.0186 0.0996 0.4009 1.1894 | lr 9.9e-04 | norm 0.1528 | dt 0.026 | |
| type train | step 1790 | loss 0.0182 0.1004 0.4171 1.2403 | lr 9.9e-04 | norm 0.1331 | dt 0.025 | |
| type train | step 1800 | loss 0.0185 0.0972 0.3909 1.1758 | lr 9.9e-04 | norm 0.1409 | dt 0.025 | |
| type train | step 1810 | loss 0.0181 0.0982 0.4051 1.2085 | lr 9.9e-04 | norm 0.1286 | dt 0.025 | |
| type train | step 1820 | loss 0.0176 0.0958 0.3865 1.1695 | lr 9.9e-04 | norm 0.1130 | dt 0.026 | |
| type train | step 1830 | loss 0.0177 0.0981 0.4104 1.2215 | lr 9.9e-04 | norm 0.1301 | dt 0.027 | |
| type train | step 1840 | loss 0.0177 0.0978 0.3960 1.1877 | lr 9.9e-04 | norm 0.1216 | dt 0.025 | |
| type train | step 1850 | loss 0.0177 0.0982 0.4053 1.2162 | lr 9.9e-04 | norm 0.1288 | dt 0.025 | |
| type train | step 1860 | loss 0.0178 0.0942 0.3853 1.1651 | lr 9.9e-04 | norm 0.1141 | dt 0.026 | |
| type train | step 1870 | loss 0.0181 0.0991 0.4108 1.2251 | lr 9.9e-04 | norm 0.1443 | dt 0.025 | |
| type train | step 1880 | loss 0.0174 0.0959 0.3864 1.1539 | lr 9.9e-04 | norm 0.1244 | dt 0.025 | |
| type train | step 1890 | loss 0.0175 0.0947 0.3933 1.1826 | lr 9.9e-04 | norm 0.1375 | dt 0.025 | |
| type train | step 1900 | loss 0.0173 0.0948 0.3852 1.1577 | lr 9.9e-04 | norm 0.0982 | dt 0.025 | |
| type train | step 1910 | loss 0.0180 0.0995 0.4159 1.2440 | lr 9.9e-04 | norm 0.1204 | dt 0.025 | |
| type train | step 1920 | loss 0.0175 0.0950 0.3904 1.1732 | lr 9.9e-04 | norm 0.1278 | dt 0.026 | |
| type train | step 1930 | loss 0.0171 0.0913 0.3784 1.1552 | lr 9.9e-04 | norm 0.1311 | dt 0.025 | |
| type train | step 1940 | loss 0.0176 0.0956 0.3926 1.1708 | lr 9.9e-04 | norm 0.1248 | dt 0.025 | |
| type train | step 1950 | loss 0.0177 0.0963 0.3839 1.1509 | lr 9.9e-04 | norm 0.1141 | dt 0.025 | |
| type train | step 1960 | loss 0.0170 0.0930 0.3838 1.1605 | lr 9.9e-04 | norm 0.1067 | dt 0.025 | |
| type train | step 1970 | loss 0.0184 0.0970 0.3956 1.1886 | lr 9.9e-04 | norm 0.1590 | dt 0.025 | |
| type train | step 1980 | loss 0.0177 0.0967 0.3957 1.1979 | lr 9.9e-04 | norm 0.1455 | dt 0.025 | |
| type train | step 1990 | loss 0.0171 0.0937 0.3937 1.1878 | lr 9.9e-04 | norm 0.1446 | dt 0.025 | |
| type train | step 2000 | loss 0.0169 0.0949 0.3859 1.1526 | lr 9.9e-04 | norm 0.1322 | dt 0.025 | |
| type train | step 2010 | loss 0.0170 0.0951 0.3872 1.1647 | lr 9.9e-04 | norm 0.1253 | dt 0.025 | |
| type train | step 2020 | loss 0.0170 0.0928 0.3872 1.1737 | lr 9.9e-04 | norm 0.1221 | dt 0.026 | |
| type train | step 2030 | loss 0.0179 0.0976 0.4048 1.2115 | lr 9.9e-04 | norm 0.1204 | dt 0.025 | |
| type train | step 2040 | loss 0.0173 0.0929 0.3863 1.1671 | lr 9.9e-04 | norm 0.1315 | dt 0.026 | |
| type train | step 2050 | loss 0.0168 0.0935 0.3935 1.1734 | lr 9.9e-04 | norm 0.1363 | dt 0.026 | |
| type train | step 2060 | loss 0.0164 0.0930 0.3824 1.1570 | lr 9.9e-04 | norm 0.1190 | dt 0.025 | |
| type train | step 2070 | loss 0.0172 0.0976 0.3990 1.1782 | lr 9.9e-04 | norm 0.1185 | dt 0.025 | |
| type train | step 2080 | loss 0.0166 0.0915 0.3774 1.1467 | lr 9.9e-04 | norm 0.1149 | dt 0.025 | |
| type train | step 2090 | loss 0.0165 0.0921 0.3817 1.1597 | lr 9.9e-04 | norm 0.1162 | dt 0.026 | |
| type train | step 2100 | loss 0.0170 0.0917 0.3774 1.1460 | lr 9.9e-04 | norm 0.1103 | dt 0.025 | |
| type train | step 2110 | loss 0.0171 0.0938 0.3969 1.1984 | lr 9.9e-04 | norm 0.1527 | dt 0.025 | |
| type train | step 2120 | loss 0.0164 0.0911 0.3734 1.1451 | lr 9.9e-04 | norm 0.1395 | dt 0.025 | |
| type train | step 2130 | loss 0.0168 0.0951 0.3871 1.1586 | lr 9.9e-04 | norm 0.1251 | dt 0.025 | |
| type train | step 2140 | loss 0.0167 0.0924 0.3854 1.1652 | lr 9.9e-04 | norm 0.1249 | dt 0.025 | |
| type train | step 2150 | loss 0.0165 0.0910 0.3842 1.1654 | lr 9.9e-04 | norm 0.1258 | dt 0.025 | |
| type train | step 2160 | loss 0.0167 0.0930 0.3931 1.1810 | lr 9.9e-04 | norm 0.1251 | dt 0.025 | |
| type train | step 2170 | loss 0.0162 0.0922 0.3903 1.1785 | lr 9.9e-04 | norm 0.1279 | dt 0.025 | |
| type train | step 2180 | loss 0.0163 0.0925 0.3841 1.1623 | lr 9.9e-04 | norm 0.1510 | dt 0.026 | |
| type train | step 2190 | loss 0.0160 0.0889 0.3687 1.1205 | lr 9.9e-04 | norm 0.1287 | dt 0.025 | |
| type train | step 2200 | loss 0.0162 0.0922 0.3783 1.1532 | lr 9.9e-04 | norm 0.1276 | dt 0.025 | |
| type train | step 2210 | loss 0.0166 0.0926 0.3923 1.1892 | lr 9.9e-04 | norm 0.1243 | dt 0.025 | |
| type train | step 2220 | loss 0.0159 0.0913 0.3891 1.1722 | lr 9.9e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 2230 | loss 0.0167 0.0908 0.3772 1.1472 | lr 9.9e-04 | norm 0.1132 | dt 0.025 | |
| type train | step 2240 | loss 0.0161 0.0917 0.3842 1.1586 | lr 9.9e-04 | norm 0.1399 | dt 0.025 | |
| type train | step 2250 | loss 0.0160 0.0920 0.3795 1.1554 | lr 9.9e-04 | norm 0.1411 | dt 0.026 | |
| type train | step 2260 | loss 0.0162 0.0929 0.3926 1.1773 | lr 9.9e-04 | norm 0.1255 | dt 0.032 | |
| type train | step 2270 | loss 0.0159 0.0887 0.3751 1.1447 | lr 9.9e-04 | norm 0.1268 | dt 0.031 | |
| type train | step 2280 | loss 0.0165 0.0922 0.3877 1.1683 | lr 9.9e-04 | norm 0.1234 | dt 0.032 | |
| type train | step 2290 | loss 0.0158 0.0877 0.3661 1.1227 | lr 9.9e-04 | norm 0.1175 | dt 0.030 | |
| type train | step 2300 | loss 0.0160 0.0899 0.3847 1.1595 | lr 9.9e-04 | norm 0.1290 | dt 0.029 | |
| type train | step 2310 | loss 0.0157 0.0919 0.3860 1.1615 | lr 9.9e-04 | norm 0.1333 | dt 0.028 | |
| type train | step 2320 | loss 0.0160 0.0919 0.3918 1.1780 | lr 9.9e-04 | norm 0.1296 | dt 0.026 | |
| type train | step 2330 | loss 0.0160 0.0910 0.3844 1.1642 | lr 9.9e-04 | norm 0.1339 | dt 0.026 | |
| type train | step 2340 | loss 0.0159 0.0924 0.3961 1.1912 | lr 9.8e-04 | norm 0.1273 | dt 0.025 | |
| type train | step 2350 | loss 0.0159 0.0914 0.3931 1.1817 | lr 9.8e-04 | norm 0.1325 | dt 0.025 | |
| type train | step 2360 | loss 0.0158 0.0910 0.3931 1.1903 | lr 9.8e-04 | norm 0.1123 | dt 0.025 | |
| type train | step 2370 | loss 0.0152 0.0888 0.3706 1.1294 | lr 9.8e-04 | norm 0.1290 | dt 0.026 | |
| type train | step 2380 | loss 0.0155 0.0916 0.3837 1.1603 | lr 9.8e-04 | norm 0.1429 | dt 0.026 | |
| type train | step 2390 | loss 0.0159 0.0908 0.3816 1.1510 | lr 9.8e-04 | norm 0.1579 | dt 0.025 | |
| type train | step 2400 | loss 0.0156 0.0919 0.3999 1.2038 | lr 9.8e-04 | norm 0.1280 | dt 0.025 | |
| type train | step 2410 | loss 0.0159 0.0891 0.3740 1.1371 | lr 9.8e-04 | norm 0.1298 | dt 0.026 | |
| type train | step 2420 | loss 0.0156 0.0899 0.3877 1.1704 | lr 9.8e-04 | norm 0.1238 | dt 0.025 | |
| type train | step 2430 | loss 0.0150 0.0885 0.3715 1.1338 | lr 9.8e-04 | norm 0.1156 | dt 0.025 | |
| type train | step 2440 | loss 0.0152 0.0910 0.3936 1.1840 | lr 9.8e-04 | norm 0.1310 | dt 0.027 | |
| type train | step 2450 | loss 0.0153 0.0902 0.3801 1.1536 | lr 9.8e-04 | norm 0.1366 | dt 0.025 | |
| type train | step 2460 | loss 0.0152 0.0908 0.3891 1.1812 | lr 9.8e-04 | norm 0.1379 | dt 0.025 | |
| type train | step 2470 | loss 0.0153 0.0871 0.3701 1.1322 | lr 9.8e-04 | norm 0.1119 | dt 0.026 | |
| type train | step 2480 | loss 0.0156 0.0915 0.3930 1.1903 | lr 9.8e-04 | norm 0.1408 | dt 0.026 | |
| type train | step 2490 | loss 0.0151 0.0889 0.3711 1.1191 | lr 9.8e-04 | norm 0.1099 | dt 0.025 | |
| type train | step 2500 | loss 0.0151 0.0884 0.3791 1.1520 | lr 9.8e-04 | norm 0.1435 | dt 0.025 | |
| type train | step 2510 | loss 0.0150 0.0882 0.3712 1.1252 | lr 9.8e-04 | norm 0.0996 | dt 0.025 | |
| type train | step 2520 | loss 0.0155 0.0928 0.4012 1.2091 | lr 9.8e-04 | norm 0.1234 | dt 0.025 | |
| type train | step 2530 | loss 0.0151 0.0884 0.3752 1.1434 | lr 9.8e-04 | norm 0.1247 | dt 0.026 | |
| type train | step 2540 | loss 0.0149 0.0850 0.3652 1.1239 | lr 9.8e-04 | norm 0.1238 | dt 0.025 | |
| type train | step 2550 | loss 0.0153 0.0891 0.3794 1.1387 | lr 9.8e-04 | norm 0.1197 | dt 0.026 | |
| type train | step 2560 | loss 0.0153 0.0898 0.3705 1.1210 | lr 9.8e-04 | norm 0.1298 | dt 0.025 | |
| type train | step 2570 | loss 0.0148 0.0870 0.3705 1.1309 | lr 9.8e-04 | norm 0.1128 | dt 0.025 | |
| type train | step 2580 | loss 0.0162 0.0904 0.3803 1.1531 | lr 9.8e-04 | norm 0.1608 | dt 0.025 | |
| type train | step 2590 | loss 0.0153 0.0905 0.3823 1.1686 | lr 9.8e-04 | norm 0.1406 | dt 0.026 | |
| type train | step 2600 | loss 0.0148 0.0879 0.3809 1.1599 | lr 9.8e-04 | norm 0.1396 | dt 0.025 | |
| type train | step 2610 | loss 0.0146 0.0892 0.3739 1.1232 | lr 9.8e-04 | norm 0.1413 | dt 0.025 | |
| type train | step 2620 | loss 0.0148 0.0891 0.3741 1.1372 | lr 9.8e-04 | norm 0.1178 | dt 0.025 | |
| type train | step 2630 | loss 0.0148 0.0873 0.3750 1.1462 | lr 9.8e-04 | norm 0.1295 | dt 0.025 | |
| type train | step 2640 | loss 0.0156 0.0916 0.3909 1.1817 | lr 9.8e-04 | norm 0.1161 | dt 0.026 | |
| type train | step 2650 | loss 0.0151 0.0876 0.3742 1.1392 | lr 9.8e-04 | norm 0.1354 | dt 0.026 | |
| type train | step 2660 | loss 0.0146 0.0880 0.3813 1.1483 | lr 9.8e-04 | norm 0.1344 | dt 0.025 | |
| type train | step 2670 | loss 0.0143 0.0879 0.3719 1.1301 | lr 9.8e-04 | norm 0.1205 | dt 0.025 | |
| type train | step 2680 | loss 0.0150 0.0921 0.3863 1.1508 | lr 9.8e-04 | norm 0.1247 | dt 0.025 | |
| type train | step 2690 | loss 0.0145 0.0862 0.3660 1.1218 | lr 9.8e-04 | norm 0.1245 | dt 0.026 | |
| type train | step 2700 | loss 0.0144 0.0869 0.3698 1.1345 | lr 9.8e-04 | norm 0.1162 | dt 0.025 | |
| type train | step 2710 | loss 0.0149 0.0866 0.3665 1.1190 | lr 9.8e-04 | norm 0.1353 | dt 0.026 | |
| type train | step 2720 | loss 0.0149 0.0885 0.3849 1.1714 | lr 9.8e-04 | norm 0.1528 | dt 0.025 | |
| type train | step 2730 | loss 0.0144 0.0864 0.3633 1.1206 | lr 9.8e-04 | norm 0.1337 | dt 0.025 | |
| type train | step 2740 | loss 0.0147 0.0901 0.3758 1.1329 | lr 9.8e-04 | norm 0.1290 | dt 0.025 | |
| type train | step 2750 | loss 0.0147 0.0874 0.3742 1.1412 | lr 9.8e-04 | norm 0.1200 | dt 0.025 | |
| type train | step 2760 | loss 0.0145 0.0858 0.3731 1.1385 | lr 9.8e-04 | norm 0.1251 | dt 0.025 | |
| type train | step 2770 | loss 0.0147 0.0887 0.3831 1.1575 | lr 9.8e-04 | norm 0.1061 | dt 0.025 | |
| type train | step 2780 | loss 0.0142 0.0876 0.3788 1.1542 | lr 9.8e-04 | norm 0.1141 | dt 0.025 | |
| type train | step 2790 | loss 0.0142 0.0881 0.3743 1.1395 | lr 9.8e-04 | norm 0.1457 | dt 0.025 | |
| type train | step 2800 | loss 0.0141 0.0846 0.3588 1.0972 | lr 9.8e-04 | norm 0.1251 | dt 0.025 | |
| type train | step 2810 | loss 0.0143 0.0878 0.3684 1.1300 | lr 9.7e-04 | norm 0.1322 | dt 0.025 | |
| type train | step 2820 | loss 0.0147 0.0878 0.3818 1.1664 | lr 9.7e-04 | norm 0.1243 | dt 0.025 | |
| type train | step 2830 | loss 0.0140 0.0871 0.3796 1.1479 | lr 9.7e-04 | norm 0.1148 | dt 0.026 | |
| type train | step 2840 | loss 0.0146 0.0865 0.3672 1.1229 | lr 9.7e-04 | norm 0.1096 | dt 0.026 | |
| type train | step 2850 | loss 0.0141 0.0875 0.3752 1.1354 | lr 9.7e-04 | norm 0.1342 | dt 0.025 | |
| type train | step 2860 | loss 0.0141 0.0875 0.3704 1.1356 | lr 9.7e-04 | norm 0.1418 | dt 0.025 | |
| type train | step 2870 | loss 0.0142 0.0888 0.3833 1.1549 | lr 9.7e-04 | norm 0.1210 | dt 0.025 | |
| type train | step 2880 | loss 0.0140 0.0847 0.3656 1.1240 | lr 9.7e-04 | norm 0.1309 | dt 0.026 | |
| type train | step 2890 | loss 0.0145 0.0883 0.3786 1.1462 | lr 9.7e-04 | norm 0.1177 | dt 0.025 | |
| type train | step 2900 | loss 0.0139 0.0840 0.3563 1.1027 | lr 9.7e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 2910 | loss 0.0142 0.0859 0.3753 1.1382 | lr 9.7e-04 | norm 0.1205 | dt 0.025 | |
| type train | step 2920 | loss 0.0139 0.0880 0.3777 1.1415 | lr 9.7e-04 | norm 0.1230 | dt 0.025 | |
| type train | step 2930 | loss 0.0141 0.0882 0.3837 1.1578 | lr 9.7e-04 | norm 0.1250 | dt 0.025 | |
| type train | step 2940 | loss 0.0142 0.0870 0.3750 1.1444 | lr 9.7e-04 | norm 0.1220 | dt 0.025 | |
| type train | step 2950 | loss 0.0141 0.0885 0.3871 1.1701 | lr 9.7e-04 | norm 0.1302 | dt 0.025 | |
| type train | step 2960 | loss 0.0141 0.0878 0.3832 1.1608 | lr 9.7e-04 | norm 0.1381 | dt 0.025 | |
| type train | step 2970 | loss 0.0140 0.0872 0.3844 1.1698 | lr 9.7e-04 | norm 0.1146 | dt 0.025 | |
| type train | step 2980 | loss 0.0135 0.0851 0.3626 1.1123 | lr 9.7e-04 | norm 0.1261 | dt 0.025 | |
| type train | step 2990 | loss 0.0137 0.0878 0.3746 1.1405 | lr 9.7e-04 | norm 0.1476 | dt 0.025 | |
| type train | step 3000 | loss 0.0142 0.0868 0.3730 1.1328 | lr 9.7e-04 | norm 0.1462 | dt 0.025 | |
| type train | step 3010 | loss 0.0138 0.0884 0.3912 1.1839 | lr 9.7e-04 | norm 0.1232 | dt 0.025 | |
| type train | step 3020 | loss 0.0140 0.0857 0.3653 1.1198 | lr 9.7e-04 | norm 0.1372 | dt 0.025 | |
| type train | step 3030 | loss 0.0139 0.0863 0.3791 1.1512 | lr 9.7e-04 | norm 0.1255 | dt 0.026 | |
| type train | step 3040 | loss 0.0133 0.0851 0.3642 1.1161 | lr 9.7e-04 | norm 0.1099 | dt 0.025 | |
| type train | step 3050 | loss 0.0136 0.0878 0.3852 1.1677 | lr 9.7e-04 | norm 0.1280 | dt 0.026 | |
| type train | step 3060 | loss 0.0136 0.0868 0.3724 1.1359 | lr 9.7e-04 | norm 0.1172 | dt 0.026 | |
| type train | step 3070 | loss 0.0136 0.0874 0.3817 1.1649 | lr 9.7e-04 | norm 0.1285 | dt 0.026 | |
| type train | step 3080 | loss 0.0136 0.0838 0.3620 1.1146 | lr 9.7e-04 | norm 0.1223 | dt 0.025 | |
| type train | step 3090 | loss 0.0140 0.0882 0.3849 1.1696 | lr 9.7e-04 | norm 0.1386 | dt 0.025 | |
| type train | step 3100 | loss 0.0134 0.0856 0.3636 1.1052 | lr 9.7e-04 | norm 0.1139 | dt 0.026 | |
| type train | step 3110 | loss 0.0134 0.0855 0.3718 1.1361 | lr 9.7e-04 | norm 0.1423 | dt 0.026 | |
| type train | step 3120 | loss 0.0134 0.0852 0.3646 1.1096 | lr 9.7e-04 | norm 0.1006 | dt 0.025 | |
| type train | step 3130 | loss 0.0139 0.0896 0.3938 1.1927 | lr 9.7e-04 | norm 0.1149 | dt 0.026 | |
| type train | step 3140 | loss 0.0134 0.0853 0.3683 1.1260 | lr 9.7e-04 | norm 0.1228 | dt 0.026 | |
| type train | step 3150 | loss 0.0132 0.0824 0.3582 1.1082 | lr 9.7e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 3160 | loss 0.0136 0.0862 0.3726 1.1231 | lr 9.7e-04 | norm 0.1216 | dt 0.026 | |
| type train | step 3170 | loss 0.0137 0.0865 0.3630 1.1076 | lr 9.7e-04 | norm 0.1165 | dt 0.025 | |
| type train | step 3180 | loss 0.0133 0.0844 0.3642 1.1144 | lr 9.7e-04 | norm 0.1159 | dt 0.025 | |
| type train | step 3190 | loss 0.0145 0.0873 0.3735 1.1377 | lr 9.6e-04 | norm 0.1603 | dt 0.026 | |
| type train | step 3200 | loss 0.0137 0.0875 0.3757 1.1523 | lr 9.6e-04 | norm 0.1370 | dt 0.025 | |
| type train | step 3210 | loss 0.0133 0.0852 0.3738 1.1454 | lr 9.6e-04 | norm 0.1399 | dt 0.025 | |
| type train | step 3220 | loss 0.0131 0.0862 0.3678 1.1081 | lr 9.6e-04 | norm 0.1352 | dt 0.025 | |
| type train | step 3230 | loss 0.0133 0.0858 0.3663 1.1236 | lr 9.6e-04 | norm 0.1168 | dt 0.026 | |
| type train | step 3240 | loss 0.0133 0.0846 0.3691 1.1309 | lr 9.6e-04 | norm 0.1244 | dt 0.026 | |
| type train | step 3250 | loss 0.0140 0.0884 0.3839 1.1665 | lr 9.6e-04 | norm 0.1176 | dt 0.026 | |
| type train | step 3260 | loss 0.0137 0.0848 0.3674 1.1265 | lr 9.6e-04 | norm 0.1263 | dt 0.026 | |
| type train | step 3270 | loss 0.0131 0.0852 0.3751 1.1353 | lr 9.6e-04 | norm 0.1344 | dt 0.026 | |
| type train | step 3280 | loss 0.0129 0.0852 0.3663 1.1158 | lr 9.6e-04 | norm 0.1324 | dt 0.025 | |
| type train | step 3290 | loss 0.0135 0.0890 0.3794 1.1368 | lr 9.6e-04 | norm 0.1181 | dt 0.025 | |
| type train | step 3300 | loss 0.0132 0.0835 0.3607 1.1079 | lr 9.6e-04 | norm 0.1089 | dt 0.025 | |
| type train | step 3310 | loss 0.0130 0.0841 0.3633 1.1203 | lr 9.6e-04 | norm 0.1212 | dt 0.025 | |
| type train | step 3320 | loss 0.0135 0.0839 0.3607 1.1057 | lr 9.6e-04 | norm 0.1222 | dt 0.026 | |
| type train | step 3330 | loss 0.0135 0.0857 0.3780 1.1603 | lr 9.6e-04 | norm 0.1519 | dt 0.026 | |
| type train | step 3340 | loss 0.0130 0.0836 0.3581 1.1070 | lr 9.6e-04 | norm 0.1355 | dt 0.026 | |
| type train | step 3350 | loss 0.0134 0.0871 0.3689 1.1204 | lr 9.6e-04 | norm 0.1194 | dt 0.026 | |
| type train | step 3360 | loss 0.0133 0.0846 0.3685 1.1292 | lr 9.6e-04 | norm 0.1253 | dt 0.026 | |
| type train | step 3370 | loss 0.0132 0.0833 0.3670 1.1260 | lr 9.6e-04 | norm 0.1347 | dt 0.025 | |
| type train | step 3380 | loss 0.0133 0.0860 0.3769 1.1452 | lr 9.6e-04 | norm 0.1115 | dt 0.026 | |
| type train | step 3390 | loss 0.0129 0.0850 0.3728 1.1422 | lr 9.6e-04 | norm 0.1177 | dt 0.026 | |
| type train | step 3400 | loss 0.0129 0.0857 0.3686 1.1280 | lr 9.6e-04 | norm 0.1475 | dt 0.025 | |
| type train | step 3410 | loss 0.0128 0.0820 0.3532 1.0865 | lr 9.6e-04 | norm 0.1362 | dt 0.025 | |
| type train | step 3420 | loss 0.0130 0.0851 0.3638 1.1184 | lr 9.6e-04 | norm 0.1313 | dt 0.026 | |
| type train | step 3430 | loss 0.0134 0.0852 0.3763 1.1549 | lr 9.6e-04 | norm 0.1288 | dt 0.025 | |
| type train | step 3440 | loss 0.0127 0.0845 0.3747 1.1368 | lr 9.6e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 3450 | loss 0.0133 0.0840 0.3615 1.1112 | lr 9.6e-04 | norm 0.1161 | dt 0.025 | |
| type train | step 3460 | loss 0.0128 0.0852 0.3698 1.1245 | lr 9.6e-04 | norm 0.1326 | dt 0.025 | |
| type train | step 3470 | loss 0.0128 0.0848 0.3657 1.1252 | lr 9.6e-04 | norm 0.1431 | dt 0.025 | |
| type train | step 3480 | loss 0.0130 0.0863 0.3779 1.1429 | lr 9.6e-04 | norm 0.1259 | dt 0.025 | |
| type train | step 3490 | loss 0.0127 0.0825 0.3602 1.1116 | lr 9.6e-04 | norm 0.1232 | dt 0.025 | |
| type train | step 3500 | loss 0.0132 0.0859 0.3739 1.1336 | lr 9.6e-04 | norm 0.1148 | dt 0.026 | |
| type train | step 3510 | loss 0.0127 0.0816 0.3513 1.0898 | lr 9.6e-04 | norm 0.1097 | dt 0.026 | |
| type train | step 3520 | loss 0.0130 0.0835 0.3693 1.1267 | lr 9.5e-04 | norm 0.1257 | dt 0.025 | |
| type train | step 3530 | loss 0.0127 0.0855 0.3727 1.1301 | lr 9.5e-04 | norm 0.1248 | dt 0.025 | |
| type train | step 3540 | loss 0.0128 0.0859 0.3787 1.1461 | lr 9.5e-04 | norm 0.1250 | dt 0.025 | |
| type train | step 3550 | loss 0.0130 0.0846 0.3698 1.1334 | lr 9.5e-04 | norm 0.1234 | dt 0.025 | |
| type train | step 3560 | loss 0.0129 0.0862 0.3817 1.1593 | lr 9.5e-04 | norm 0.1292 | dt 0.025 | |
| type train | step 3570 | loss 0.0129 0.0853 0.3779 1.1496 | lr 9.5e-04 | norm 0.1292 | dt 0.025 | |
| type train | step 3580 | loss 0.0128 0.0851 0.3796 1.1598 | lr 9.5e-04 | norm 0.1178 | dt 0.025 | |
| type train | step 3590 | loss 0.0123 0.0828 0.3581 1.1018 | lr 9.5e-04 | norm 0.1218 | dt 0.025 | |
| type train | step 3600 | loss 0.0125 0.0856 0.3701 1.1315 | lr 9.5e-04 | norm 0.1486 | dt 0.025 | |
| type train | step 3610 | loss 0.0130 0.0843 0.3681 1.1211 | lr 9.5e-04 | norm 0.1457 | dt 0.026 | |
| type train | step 3620 | loss 0.0127 0.0860 0.3856 1.1730 | lr 9.5e-04 | norm 0.1264 | dt 0.025 | |
| type train | step 3630 | loss 0.0128 0.0836 0.3607 1.1086 | lr 9.5e-04 | norm 0.1299 | dt 0.025 | |
| type train | step 3640 | loss 0.0128 0.0843 0.3739 1.1422 | lr 9.5e-04 | norm 0.1264 | dt 0.025 | |
| type train | step 3650 | loss 0.0123 0.0830 0.3597 1.1063 | lr 9.5e-04 | norm 0.1145 | dt 0.025 | |
| type train | step 3660 | loss 0.0125 0.0855 0.3816 1.1599 | lr 9.5e-04 | norm 0.1317 | dt 0.026 | |
| type train | step 3670 | loss 0.0125 0.0847 0.3679 1.1264 | lr 9.5e-04 | norm 0.1257 | dt 0.025 | |
| type train | step 3680 | loss 0.0125 0.0852 0.3770 1.1546 | lr 9.5e-04 | norm 0.1284 | dt 0.026 | |
| type train | step 3690 | loss 0.0125 0.0818 0.3577 1.1044 | lr 9.5e-04 | norm 0.1161 | dt 0.025 | |
| type train | step 3700 | loss 0.0129 0.0862 0.3801 1.1586 | lr 9.5e-04 | norm 0.1439 | dt 0.025 | |
| type train | step 3710 | loss 0.0124 0.0836 0.3593 1.0961 | lr 9.5e-04 | norm 0.1149 | dt 0.025 | |
| type train | step 3720 | loss 0.0123 0.0834 0.3681 1.1270 | lr 9.5e-04 | norm 0.1441 | dt 0.025 | |
| type train | step 3730 | loss 0.0124 0.0832 0.3604 1.1007 | lr 9.5e-04 | norm 0.1052 | dt 0.026 | |
| type train | step 3740 | loss 0.0128 0.0875 0.3890 1.1825 | lr 9.5e-04 | norm 0.1172 | dt 0.026 | |
| type train | step 3750 | loss 0.0123 0.0833 0.3642 1.1165 | lr 9.5e-04 | norm 0.1232 | dt 0.025 | |
| type train | step 3760 | loss 0.0122 0.0804 0.3540 1.0987 | lr 9.5e-04 | norm 0.1138 | dt 0.025 | |
| type train | step 3770 | loss 0.0126 0.0842 0.3685 1.1139 | lr 9.5e-04 | norm 0.1158 | dt 0.025 | |
| type train | step 3780 | loss 0.0126 0.0843 0.3590 1.0993 | lr 9.5e-04 | norm 0.1196 | dt 0.025 | |
| type train | step 3790 | loss 0.0122 0.0824 0.3605 1.1065 | lr 9.5e-04 | norm 0.1019 | dt 0.025 | |
| type train | step 3800 | loss 0.0134 0.0850 0.3686 1.1269 | lr 9.5e-04 | norm 0.1502 | dt 0.025 | |
| type train | step 3810 | loss 0.0126 0.0857 0.3714 1.1430 | lr 9.5e-04 | norm 0.1384 | dt 0.025 | |
| type train | step 3820 | loss 0.0123 0.0833 0.3695 1.1356 | lr 9.4e-04 | norm 0.1350 | dt 0.025 | |
| type train | step 3830 | loss 0.0121 0.0843 0.3641 1.0986 | lr 9.4e-04 | norm 0.1679 | dt 0.025 | |
| type train | step 3840 | loss 0.0123 0.0838 0.3625 1.1136 | lr 9.4e-04 | norm 0.1151 | dt 0.025 | |
| type train | step 3850 | loss 0.0123 0.0827 0.3651 1.1222 | lr 9.4e-04 | norm 0.1233 | dt 0.026 | |
| type train | step 3860 | loss 0.0130 0.0866 0.3794 1.1585 | lr 9.4e-04 | norm 0.1128 | dt 0.025 | |
| type train | step 3870 | loss 0.0127 0.0830 0.3633 1.1175 | lr 9.4e-04 | norm 0.1408 | dt 0.026 | |
| type train | step 3880 | loss 0.0122 0.0834 0.3710 1.1273 | lr 9.4e-04 | norm 0.1345 | dt 0.026 | |
| type train | step 3890 | loss 0.0120 0.0834 0.3626 1.1058 | lr 9.4e-04 | norm 0.1205 | dt 0.025 | |
| type train | step 3900 | loss 0.0124 0.0868 0.3751 1.1286 | lr 9.4e-04 | norm 0.1181 | dt 0.025 | |
| type train | step 3910 | loss 0.0122 0.0816 0.3566 1.0993 | lr 9.4e-04 | norm 0.1104 | dt 0.025 | |
| type train | step 3920 | loss 0.0121 0.0822 0.3596 1.1111 | lr 9.4e-04 | norm 0.1198 | dt 0.026 | |
| type train | step 3930 | loss 0.0125 0.0819 0.3563 1.0972 | lr 9.4e-04 | norm 0.1082 | dt 0.025 | |
| type train | step 3940 | loss 0.0126 0.0840 0.3737 1.1520 | lr 9.4e-04 | norm 0.1514 | dt 0.025 | |
| type train | step 3950 | loss 0.0120 0.0821 0.3547 1.0974 | lr 9.4e-04 | norm 0.1373 | dt 0.026 | |
| type train | step 3960 | loss 0.0124 0.0850 0.3651 1.1117 | lr 9.4e-04 | norm 0.1242 | dt 0.026 | |
| type train | step 3970 | loss 0.0124 0.0827 0.3648 1.1205 | lr 9.4e-04 | norm 0.1213 | dt 0.025 | |
| type train | step 3980 | loss 0.0123 0.0815 0.3631 1.1170 | lr 9.4e-04 | norm 0.1314 | dt 0.025 | |
| type train | step 3990 | loss 0.0123 0.0843 0.3730 1.1362 | lr 9.4e-04 | norm 0.1173 | dt 0.025 | |
| type train | step 4000 | loss 0.0119 0.0833 0.3693 1.1338 | lr 9.4e-04 | norm 0.1129 | dt 0.026 | |
| type train | step 4010 | loss 0.0120 0.0839 0.3647 1.1182 | lr 9.4e-04 | norm 0.1521 | dt 0.025 | |
| type train | step 4020 | loss 0.0120 0.0801 0.3498 1.0791 | lr 9.4e-04 | norm 0.1285 | dt 0.025 | |
| type train | step 4030 | loss 0.0121 0.0832 0.3604 1.1105 | lr 9.4e-04 | norm 0.1259 | dt 0.025 | |
| type train | step 4040 | loss 0.0125 0.0833 0.3722 1.1459 | lr 9.4e-04 | norm 0.1307 | dt 0.025 | |
| type train | step 4050 | loss 0.0119 0.0827 0.3707 1.1284 | lr 9.4e-04 | norm 0.1142 | dt 0.026 | |
| type train | step 4060 | loss 0.0124 0.0822 0.3581 1.1035 | lr 9.4e-04 | norm 0.1095 | dt 0.026 | |
| type train | step 4070 | loss 0.0120 0.0834 0.3659 1.1165 | lr 9.4e-04 | norm 0.1352 | dt 0.026 | |
| type train | step 4080 | loss 0.0120 0.0831 0.3627 1.1165 | lr 9.4e-04 | norm 0.1410 | dt 0.026 | |
| type train | step 4090 | loss 0.0121 0.0845 0.3744 1.1343 | lr 9.3e-04 | norm 0.1188 | dt 0.026 | |
| type train | step 4100 | loss 0.0119 0.0809 0.3566 1.1029 | lr 9.3e-04 | norm 0.1195 | dt 0.025 | |
| type train | step 4110 | loss 0.0124 0.0841 0.3703 1.1248 | lr 9.3e-04 | norm 0.1133 | dt 0.026 | |
| type train | step 4120 | loss 0.0119 0.0799 0.3481 1.0818 | lr 9.3e-04 | norm 0.1116 | dt 0.026 | |
| type train | step 4130 | loss 0.0121 0.0818 0.3649 1.1203 | lr 9.3e-04 | norm 0.1274 | dt 0.025 | |
| type train | step 4140 | loss 0.0119 0.0837 0.3693 1.1222 | lr 9.3e-04 | norm 0.1281 | dt 0.025 | |
| type train | step 4150 | loss 0.0120 0.0844 0.3754 1.1377 | lr 9.3e-04 | norm 0.1259 | dt 0.026 | |
| type train | step 4160 | loss 0.0122 0.0829 0.3661 1.1259 | lr 9.3e-04 | norm 0.1206 | dt 0.025 | |
| type train | step 4170 | loss 0.0121 0.0847 0.3781 1.1514 | lr 9.3e-04 | norm 0.1281 | dt 0.025 | |
| type train | step 4180 | loss 0.0121 0.0838 0.3747 1.1429 | lr 9.3e-04 | norm 0.1323 | dt 0.026 | |
| type train | step 4190 | loss 0.0120 0.0835 0.3755 1.1541 | lr 9.3e-04 | norm 0.1153 | dt 0.025 | |
| type train | step 4200 | loss 0.0115 0.0813 0.3549 1.0953 | lr 9.3e-04 | norm 0.1287 | dt 0.025 | |
| type train | step 4210 | loss 0.0117 0.0839 0.3675 1.1237 | lr 9.3e-04 | norm 0.1434 | dt 0.026 | |
| type train | step 4220 | loss 0.0122 0.0826 0.3645 1.1138 | lr 9.3e-04 | norm 0.1472 | dt 0.026 | |
| type train | step 4230 | loss 0.0119 0.0843 0.3824 1.1652 | lr 9.3e-04 | norm 0.1251 | dt 0.025 | |
| type train | step 4240 | loss 0.0120 0.0822 0.3578 1.1011 | lr 9.3e-04 | norm 0.1334 | dt 0.026 | |
| type train | step 4250 | loss 0.0120 0.0827 0.3703 1.1366 | lr 9.3e-04 | norm 0.1321 | dt 0.026 | |
| type train | step 4260 | loss 0.0115 0.0814 0.3578 1.0989 | lr 9.3e-04 | norm 0.1137 | dt 0.026 | |
| type train | step 4270 | loss 0.0117 0.0838 0.3787 1.1530 | lr 9.3e-04 | norm 0.1272 | dt 0.027 | |
| type train | step 4280 | loss 0.0117 0.0831 0.3646 1.1186 | lr 9.3e-04 | norm 0.1171 | dt 0.025 | |
| type train | step 4290 | loss 0.0118 0.0838 0.3741 1.1459 | lr 9.3e-04 | norm 0.1313 | dt 0.026 | |
| type train | step 4300 | loss 0.0117 0.0803 0.3546 1.0983 | lr 9.3e-04 | norm 0.1187 | dt 0.025 | |
| type train | step 4310 | loss 0.0121 0.0845 0.3768 1.1531 | lr 9.3e-04 | norm 0.1408 | dt 0.025 | |
| type train | step 4320 | loss 0.0116 0.0821 0.3564 1.0901 | lr 9.3e-04 | norm 0.1219 | dt 0.026 | |
| type train | step 4330 | loss 0.0115 0.0819 0.3652 1.1198 | lr 9.3e-04 | norm 0.1369 | dt 0.025 | |
| type train | step 4340 | loss 0.0116 0.0818 0.3575 1.0947 | lr 9.2e-04 | norm 0.0985 | dt 0.025 | |
| type train | step 4350 | loss 0.0120 0.0860 0.3854 1.1757 | lr 9.2e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 4360 | loss 0.0115 0.0819 0.3619 1.1087 | lr 9.2e-04 | norm 0.1217 | dt 0.026 | |
| type train | step 4370 | loss 0.0115 0.0788 0.3510 1.0923 | lr 9.2e-04 | norm 0.1130 | dt 0.026 | |
| type train | step 4380 | loss 0.0118 0.0827 0.3648 1.1067 | lr 9.2e-04 | norm 0.1193 | dt 0.025 | |
| type train | step 4390 | loss 0.0119 0.0828 0.3563 1.0916 | lr 9.2e-04 | norm 0.1173 | dt 0.026 | |
| type train | step 4400 | loss 0.0115 0.0809 0.3580 1.1001 | lr 9.2e-04 | norm 0.0997 | dt 0.026 | |
| type train | step 4410 | loss 0.0126 0.0835 0.3636 1.1212 | lr 9.2e-04 | norm 0.1491 | dt 0.025 | |
| type train | step 4420 | loss 0.0118 0.0842 0.3685 1.1355 | lr 9.2e-04 | norm 0.1362 | dt 0.025 | |
| type train | step 4430 | loss 0.0116 0.0818 0.3661 1.1279 | lr 9.2e-04 | norm 0.1340 | dt 0.025 | |
| type train | step 4440 | loss 0.0114 0.0828 0.3613 1.0935 | lr 9.2e-04 | norm 0.1292 | dt 0.025 | |
| type train | step 4450 | loss 0.0116 0.0826 0.3596 1.1064 | lr 9.2e-04 | norm 0.1143 | dt 0.025 | |
| type train | step 4460 | loss 0.0115 0.0812 0.3624 1.1151 | lr 9.2e-04 | norm 0.1226 | dt 0.025 | |
| type train | step 4470 | loss 0.0122 0.0851 0.3757 1.1521 | lr 9.2e-04 | norm 0.1247 | dt 0.026 | |
| type train | step 4480 | loss 0.0119 0.0814 0.3597 1.1116 | lr 9.2e-04 | norm 0.1313 | dt 0.026 | |
| type train | step 4490 | loss 0.0115 0.0820 0.3682 1.1198 | lr 9.2e-04 | norm 0.1378 | dt 0.026 | |
| type train | step 4500 | loss 0.0113 0.0821 0.3601 1.0981 | lr 9.2e-04 | norm 0.1199 | dt 0.026 | |
| type train | step 4510 | loss 0.0117 0.0855 0.3718 1.1228 | lr 9.2e-04 | norm 0.1138 | dt 0.025 | |
| type train | step 4520 | loss 0.0115 0.0803 0.3542 1.0926 | lr 9.2e-04 | norm 0.1012 | dt 0.025 | |
| type train | step 4530 | loss 0.0114 0.0809 0.3569 1.1041 | lr 9.2e-04 | norm 0.1178 | dt 0.025 | |
| type train | step 4540 | loss 0.0118 0.0807 0.3531 1.0909 | lr 9.2e-04 | norm 0.1125 | dt 0.025 | |
| type train | step 4550 | loss 0.0119 0.0826 0.3703 1.1457 | lr 9.2e-04 | norm 0.1504 | dt 0.025 | |
| type train | step 4560 | loss 0.0114 0.0807 0.3525 1.0894 | lr 9.2e-04 | norm 0.1309 | dt 0.026 | |
| type train | step 4570 | loss 0.0116 0.0837 0.3620 1.1043 | lr 9.2e-04 | norm 0.1182 | dt 0.026 | |
| type train | step 4580 | loss 0.0117 0.0814 0.3622 1.1141 | lr 9.1e-04 | norm 0.1216 | dt 0.026 | |
| type train | step 4590 | loss 0.0116 0.0802 0.3600 1.1099 | lr 9.1e-04 | norm 0.1205 | dt 0.025 | |
| type train | step 4600 | loss 0.0116 0.0829 0.3702 1.1305 | lr 9.1e-04 | norm 0.1156 | dt 0.025 | |
| type train | step 4610 | loss 0.0113 0.0820 0.3669 1.1279 | lr 9.1e-04 | norm 0.1160 | dt 0.026 | |
| type train | step 4620 | loss 0.0113 0.0826 0.3623 1.1108 | lr 9.1e-04 | norm 0.1470 | dt 0.025 | |
| type train | step 4630 | loss 0.0113 0.0791 0.3474 1.0724 | lr 9.1e-04 | norm 0.1292 | dt 0.025 | |
| type train | step 4640 | loss 0.0114 0.0819 0.3578 1.1044 | lr 9.1e-04 | norm 0.1247 | dt 0.025 | |
| type train | step 4650 | loss 0.0118 0.0820 0.3690 1.1394 | lr 9.1e-04 | norm 0.1201 | dt 0.026 | |
| type train | step 4660 | loss 0.0112 0.0817 0.3678 1.1227 | lr 9.1e-04 | norm 0.1255 | dt 0.025 | |
| type train | step 4670 | loss 0.0117 0.0811 0.3554 1.0972 | lr 9.1e-04 | norm 0.1145 | dt 0.026 | |
| type train | step 4680 | loss 0.0113 0.0823 0.3635 1.1104 | lr 9.1e-04 | norm 0.1314 | dt 0.026 | |
| type train | step 4690 | loss 0.0113 0.0819 0.3600 1.1095 | lr 9.1e-04 | norm 0.1369 | dt 0.026 | |
| type train | step 4700 | loss 0.0114 0.0833 0.3716 1.1284 | lr 9.1e-04 | norm 0.1195 | dt 0.026 | |
| type train | step 4710 | loss 0.0113 0.0799 0.3536 1.0983 | lr 9.1e-04 | norm 0.1207 | dt 0.026 | |
| type train | step 4720 | loss 0.0117 0.0828 0.3678 1.1191 | lr 9.1e-04 | norm 0.1099 | dt 0.026 | |
| type train | step 4730 | loss 0.0112 0.0789 0.3456 1.0757 | lr 9.1e-04 | norm 0.1115 | dt 0.025 | |
| type train | step 4740 | loss 0.0115 0.0808 0.3623 1.1150 | lr 9.1e-04 | norm 0.1235 | dt 0.026 | |
| type train | step 4750 | loss 0.0113 0.0826 0.3665 1.1172 | lr 9.1e-04 | norm 0.1232 | dt 0.026 | |
| type train | step 4760 | loss 0.0113 0.0833 0.3732 1.1320 | lr 9.1e-04 | norm 0.1249 | dt 0.025 | |
| type train | step 4770 | loss 0.0115 0.0817 0.3634 1.1202 | lr 9.1e-04 | norm 0.1220 | dt 0.025 | |
| type train | step 4780 | loss 0.0114 0.0834 0.3756 1.1455 | lr 9.1e-04 | norm 0.1240 | dt 0.025 | |
| type train | step 4790 | loss 0.0114 0.0828 0.3725 1.1361 | lr 9.1e-04 | norm 0.1323 | dt 0.026 | |
| type train | step 4800 | loss 0.0114 0.0824 0.3723 1.1482 | lr 9.1e-04 | norm 0.1075 | dt 0.026 | |
| type train | step 4810 | loss 0.0109 0.0802 0.3525 1.0903 | lr 9.0e-04 | norm 0.1210 | dt 0.026 | |
| type train | step 4820 | loss 0.0111 0.0828 0.3652 1.1186 | lr 9.0e-04 | norm 0.1405 | dt 0.026 | |
| type train | step 4830 | loss 0.0116 0.0815 0.3617 1.1078 | lr 9.0e-04 | norm 0.1446 | dt 0.027 | |
| type train | step 4840 | loss 0.0113 0.0833 0.3795 1.1611 | lr 9.0e-04 | norm 0.1309 | dt 0.027 | |
| type train | step 4850 | loss 0.0113 0.0812 0.3562 1.0960 | lr 9.0e-04 | norm 0.1358 | dt 0.026 | |
| type train | step 4860 | loss 0.0114 0.0817 0.3680 1.1304 | lr 9.0e-04 | norm 0.1228 | dt 0.026 | |
| type train | step 4870 | loss 0.0109 0.0803 0.3557 1.0929 | lr 9.0e-04 | norm 0.1113 | dt 0.026 | |
| type train | step 4880 | loss 0.0111 0.0828 0.3767 1.1499 | lr 9.0e-04 | norm 0.1287 | dt 0.028 | |
| type train | step 4890 | loss 0.0111 0.0820 0.3621 1.1132 | lr 9.0e-04 | norm 0.1194 | dt 0.026 | |
| type train | step 4900 | loss 0.0112 0.0828 0.3713 1.1394 | lr 9.0e-04 | norm 0.1337 | dt 0.027 | |
| type train | step 4910 | loss 0.0111 0.0793 0.3520 1.0932 | lr 9.0e-04 | norm 0.1125 | dt 0.026 | |
| type train | step 4920 | loss 0.0115 0.0834 0.3740 1.1486 | lr 9.0e-04 | norm 0.1429 | dt 0.026 | |
| type train | step 4930 | loss 0.0110 0.0810 0.3543 1.0843 | lr 9.0e-04 | norm 0.1091 | dt 0.026 | |
| type train | step 4940 | loss 0.0109 0.0809 0.3633 1.1150 | lr 9.0e-04 | norm 0.1345 | dt 0.026 | |
| type train | step 4950 | loss 0.0110 0.0807 0.3551 1.0892 | lr 9.0e-04 | norm 0.1012 | dt 0.026 | |
| type train | step 4960 | loss 0.0115 0.0848 0.3826 1.1694 | lr 9.0e-04 | norm 0.1110 | dt 0.026 | |
| type train | step 4970 | loss 0.0109 0.0809 0.3597 1.1039 | lr 9.0e-04 | norm 0.1232 | dt 0.026 | |
| type train | step 4980 | loss 0.0109 0.0779 0.3488 1.0878 | lr 9.0e-04 | norm 0.1158 | dt 0.025 | |
| type train | step 4990 | loss 0.0112 0.0817 0.3625 1.1017 | lr 9.0e-04 | norm 0.1167 | dt 0.026 | |
| type train | step 5000 | loss 0.0113 0.0817 0.3539 1.0860 | lr 9.0e-04 | norm 0.1111 | dt 0.026 | |
| type train | step 5010 | loss 0.0110 0.0798 0.3558 1.0959 | lr 9.0e-04 | norm 0.1018 | dt 0.026 | |
| type train | step 5020 | loss 0.0119 0.0822 0.3605 1.1148 | lr 9.0e-04 | norm 0.1483 | dt 0.025 | |
| type train | step 5030 | loss 0.0112 0.0831 0.3660 1.1305 | lr 8.9e-04 | norm 0.1362 | dt 0.026 | |
| type train | step 5040 | loss 0.0111 0.0806 0.3639 1.1232 | lr 8.9e-04 | norm 0.1297 | dt 0.026 | |
| type train | step 5050 | loss 0.0108 0.0817 0.3587 1.0886 | lr 8.9e-04 | norm 0.1312 | dt 0.026 | |
| type train | step 5060 | loss 0.0111 0.0816 0.3575 1.1016 | lr 8.9e-04 | norm 0.1126 | dt 0.026 | |
| type train | step 5070 | loss 0.0110 0.0802 0.3600 1.1102 | lr 8.9e-04 | norm 0.1234 | dt 0.026 | |
| type train | step 5080 | loss 0.0117 0.0840 0.3728 1.1460 | lr 8.9e-04 | norm 0.1125 | dt 0.025 | |
| type train | step 5090 | loss 0.0114 0.0805 0.3571 1.1068 | lr 8.9e-04 | norm 0.1258 | dt 0.026 | |
| type train | step 5100 | loss 0.0109 0.0808 0.3657 1.1136 | lr 8.9e-04 | norm 0.1314 | dt 0.026 | |
| type train | step 5110 | loss 0.0108 0.0809 0.3575 1.0931 | lr 8.9e-04 | norm 0.1180 | dt 0.025 | |
| type train | step 5120 | loss 0.0112 0.0845 0.3696 1.1194 | lr 8.9e-04 | norm 0.1177 | dt 0.027 | |
| type train | step 5130 | loss 0.0110 0.0792 0.3518 1.0882 | lr 8.9e-04 | norm 0.1033 | dt 0.026 | |
| type train | step 5140 | loss 0.0109 0.0799 0.3548 1.1008 | lr 8.9e-04 | norm 0.1077 | dt 0.025 | |
| type train | step 5150 | loss 0.0113 0.0797 0.3508 1.0863 | lr 8.9e-04 | norm 0.1078 | dt 0.026 | |
| type train | step 5160 | loss 0.0114 0.0815 0.3679 1.1399 | lr 8.9e-04 | norm 0.1511 | dt 0.025 | |
| type train | step 5170 | loss 0.0109 0.0795 0.3501 1.0845 | lr 8.9e-04 | norm 0.1274 | dt 0.025 | |
| type train | step 5180 | loss 0.0112 0.0825 0.3600 1.1000 | lr 8.9e-04 | norm 0.1193 | dt 0.025 | |
| type train | step 5190 | loss 0.0112 0.0803 0.3600 1.1110 | lr 8.9e-04 | norm 0.1160 | dt 0.026 | |
| type train | step 5200 | loss 0.0111 0.0791 0.3572 1.1059 | lr 8.9e-04 | norm 0.1228 | dt 0.025 | |
| type train | step 5210 | loss 0.0112 0.0819 0.3678 1.1265 | lr 8.9e-04 | norm 0.1071 | dt 0.025 | |
| type train | step 5220 | loss 0.0108 0.0811 0.3647 1.1234 | lr 8.9e-04 | norm 0.1146 | dt 0.026 | |
| type train | step 5230 | loss 0.0109 0.0814 0.3598 1.1074 | lr 8.8e-04 | norm 0.1473 | dt 0.026 | |
| type train | step 5240 | loss 0.0108 0.0781 0.3450 1.0677 | lr 8.8e-04 | norm 0.1276 | dt 0.025 | |
| type train | step 5250 | loss 0.0109 0.0808 0.3554 1.0997 | lr 8.8e-04 | norm 0.1304 | dt 0.026 | |
| type train | step 5260 | loss 0.0113 0.0810 0.3664 1.1352 | lr 8.8e-04 | norm 0.1492 | dt 0.026 | |
| type train | step 5270 | loss 0.0108 0.0807 0.3657 1.1179 | lr 8.8e-04 | norm 0.1165 | dt 0.025 | |
| type train | step 5280 | loss 0.0112 0.0802 0.3530 1.0933 | lr 8.8e-04 | norm 0.1385 | dt 0.025 | |
| type train | step 5290 | loss 0.0109 0.0813 0.3610 1.1064 | lr 8.8e-04 | norm 0.1412 | dt 0.025 | |
| type train | step 5300 | loss 0.0108 0.0808 0.3577 1.1040 | lr 8.8e-04 | norm 0.1402 | dt 0.026 | |
| type train | step 5310 | loss 0.0110 0.0822 0.3689 1.1249 | lr 8.8e-04 | norm 0.1190 | dt 0.025 | |
| type train | step 5320 | loss 0.0108 0.0789 0.3512 1.0940 | lr 8.8e-04 | norm 0.1203 | dt 0.025 | |
| type train | step 5330 | loss 0.0112 0.0817 0.3656 1.1157 | lr 8.8e-04 | norm 0.1165 | dt 0.026 | |
| type train | step 5340 | loss 0.0108 0.0778 0.3433 1.0715 | lr 8.8e-04 | norm 0.1105 | dt 0.025 | |
| type train | step 5350 | loss 0.0110 0.0798 0.3603 1.1107 | lr 8.8e-04 | norm 0.1236 | dt 0.026 | |
| type train | step 5360 | loss 0.0108 0.0816 0.3641 1.1130 | lr 8.8e-04 | norm 0.1191 | dt 0.026 | |
| type train | step 5370 | loss 0.0109 0.0823 0.3706 1.1285 | lr 8.8e-04 | norm 0.1261 | dt 0.026 | |
| type train | step 5380 | loss 0.0111 0.0807 0.3611 1.1158 | lr 8.8e-04 | norm 0.1215 | dt 0.025 | |
| type train | step 5390 | loss 0.0110 0.0824 0.3739 1.1406 | lr 8.8e-04 | norm 0.1285 | dt 0.026 | |
| type train | step 5400 | loss 0.0110 0.0818 0.3705 1.1320 | lr 8.8e-04 | norm 0.1345 | dt 0.026 | |
| type train | step 5410 | loss 0.0110 0.0814 0.3695 1.1444 | lr 8.8e-04 | norm 0.1162 | dt 0.025 | |
| type train | step 5420 | loss 0.0106 0.0793 0.3505 1.0866 | lr 8.8e-04 | norm 0.1316 | dt 0.026 | |
| type train | step 5430 | loss 0.0107 0.0817 0.3633 1.1134 | lr 8.8e-04 | norm 0.1420 | dt 0.025 | |
| type train | step 5440 | loss 0.0112 0.0804 0.3593 1.1037 | lr 8.7e-04 | norm 0.1463 | dt 0.025 | |
| type train | step 5450 | loss 0.0108 0.0822 0.3773 1.1564 | lr 8.7e-04 | norm 0.1299 | dt 0.026 | |
| type train | step 5460 | loss 0.0109 0.0803 0.3541 1.0933 | lr 8.7e-04 | norm 0.1303 | dt 0.025 | |
| type train | step 5470 | loss 0.0110 0.0807 0.3658 1.1263 | lr 8.7e-04 | norm 0.1190 | dt 0.025 | |
| type train | step 5480 | loss 0.0105 0.0794 0.3540 1.0892 | lr 8.7e-04 | norm 0.1115 | dt 0.026 | |
| type train | step 5490 | loss 0.0107 0.0819 0.3749 1.1474 | lr 8.7e-04 | norm 0.1262 | dt 0.030 | |
| type train | step 5500 | loss 0.0107 0.0811 0.3598 1.1083 | lr 8.7e-04 | norm 0.1166 | dt 0.026 | |
| type train | step 5510 | loss 0.0108 0.0821 0.3694 1.1350 | lr 8.7e-04 | norm 0.1315 | dt 0.025 | |
| type train | step 5520 | loss 0.0108 0.0783 0.3499 1.0898 | lr 8.7e-04 | norm 0.1138 | dt 0.025 | |
| type train | step 5530 | loss 0.0111 0.0825 0.3724 1.1452 | lr 8.7e-04 | norm 0.1343 | dt 0.025 | |
| type train | step 5540 | loss 0.0106 0.0800 0.3526 1.0797 | lr 8.7e-04 | norm 0.1186 | dt 0.025 | |
| type train | step 5550 | loss 0.0105 0.0799 0.3614 1.1115 | lr 8.7e-04 | norm 0.1309 | dt 0.025 | |
| type train | step 5560 | loss 0.0107 0.0798 0.3533 1.0856 | lr 8.7e-04 | norm 0.0979 | dt 0.027 | |
| type train | step 5570 | loss 0.0111 0.0838 0.3804 1.1654 | lr 8.7e-04 | norm 0.1184 | dt 0.026 | |
| type train | step 5580 | loss 0.0106 0.0799 0.3580 1.0988 | lr 8.7e-04 | norm 0.1228 | dt 0.026 | |
| type train | step 5590 | loss 0.0105 0.0771 0.3475 1.0852 | lr 8.7e-04 | norm 0.1150 | dt 0.025 | |
| type train | step 5600 | loss 0.0108 0.0807 0.3610 1.0984 | lr 8.7e-04 | norm 0.1155 | dt 0.025 | |
| type train | step 5610 | loss 0.0109 0.0808 0.3523 1.0827 | lr 8.7e-04 | norm 0.1120 | dt 0.026 | |
| type train | step 5620 | loss 0.0106 0.0790 0.3541 1.0923 | lr 8.7e-04 | norm 0.1034 | dt 0.025 | |
| type train | step 5630 | loss 0.0115 0.0810 0.3586 1.1112 | lr 8.6e-04 | norm 0.1495 | dt 0.026 | |
| type train | step 5640 | loss 0.0109 0.0820 0.3645 1.1265 | lr 8.6e-04 | norm 0.1368 | dt 0.026 | |
| type train | step 5650 | loss 0.0107 0.0799 0.3623 1.1203 | lr 8.6e-04 | norm 0.1312 | dt 0.025 | |
| type train | step 5660 | loss 0.0105 0.0808 0.3573 1.0856 | lr 8.6e-04 | norm 0.1323 | dt 0.025 | |
| type train | step 5670 | loss 0.0107 0.0808 0.3560 1.0973 | lr 8.6e-04 | norm 0.1147 | dt 0.026 | |
| type train | step 5680 | loss 0.0106 0.0793 0.3585 1.1068 | lr 8.6e-04 | norm 0.1292 | dt 0.026 | |
| type train | step 5690 | loss 0.0113 0.0831 0.3708 1.1424 | lr 8.6e-04 | norm 0.1125 | dt 0.026 | |
| type train | step 5700 | loss 0.0109 0.0797 0.3557 1.1041 | lr 8.6e-04 | norm 0.1274 | dt 0.026 | |
| type train | step 5710 | loss 0.0106 0.0801 0.3638 1.1090 | lr 8.6e-04 | norm 0.1366 | dt 0.025 | |
| type train | step 5720 | loss 0.0104 0.0800 0.3559 1.0892 | lr 8.6e-04 | norm 0.1291 | dt 0.026 | |
| type train | step 5730 | loss 0.0109 0.0837 0.3676 1.1157 | lr 8.6e-04 | norm 0.1314 | dt 0.027 | |
| type train | step 5740 | loss 0.0106 0.0785 0.3504 1.0858 | lr 8.6e-04 | norm 0.1003 | dt 0.030 | |
| type train | step 5750 | loss 0.0105 0.0791 0.3529 1.0981 | lr 8.6e-04 | norm 0.1063 | dt 0.035 | |
| type train | step 5760 | loss 0.0109 0.0789 0.3493 1.0823 | lr 8.6e-04 | norm 0.1207 | dt 0.029 | |
| type train | step 5770 | loss 0.0110 0.0806 0.3659 1.1363 | lr 8.6e-04 | norm 0.1623 | dt 0.028 | |
| type train | step 5780 | loss 0.0105 0.0787 0.3484 1.0821 | lr 8.6e-04 | norm 0.1320 | dt 0.026 | |
| type train | step 5790 | loss 0.0108 0.0818 0.3582 1.0965 | lr 8.6e-04 | norm 0.1185 | dt 0.026 | |
| type train | step 5800 | loss 0.0108 0.0797 0.3588 1.1068 | lr 8.6e-04 | norm 0.1196 | dt 0.025 | |
| type train | step 5810 | loss 0.0107 0.0784 0.3551 1.1040 | lr 8.6e-04 | norm 0.1217 | dt 0.025 | |
| type train | step 5820 | loss 0.0108 0.0811 0.3663 1.1238 | lr 8.5e-04 | norm 0.1081 | dt 0.025 | |
| type train | step 5830 | loss 0.0105 0.0803 0.3631 1.1195 | lr 8.5e-04 | norm 0.1120 | dt 0.026 | |
| type train | step 5840 | loss 0.0105 0.0804 0.3581 1.1028 | lr 8.5e-04 | norm 0.1451 | dt 0.025 | |
| type train | step 5850 | loss 0.0105 0.0775 0.3434 1.0640 | lr 8.5e-04 | norm 0.1317 | dt 0.025 | |
| type train | step 5860 | loss 0.0106 0.0802 0.3534 1.0970 | lr 8.5e-04 | norm 0.1252 | dt 0.026 | |
| type train | step 5870 | loss 0.0110 0.0802 0.3644 1.1330 | lr 8.5e-04 | norm 0.1247 | dt 0.025 | |
| type train | step 5880 | loss 0.0105 0.0801 0.3643 1.1150 | lr 8.5e-04 | norm 0.1153 | dt 0.025 | |
| type train | step 5890 | loss 0.0109 0.0794 0.3519 1.0897 | lr 8.5e-04 | norm 0.1113 | dt 0.025 | |
| type train | step 5900 | loss 0.0105 0.0804 0.3595 1.1028 | lr 8.5e-04 | norm 0.1349 | dt 0.025 | |
| type train | step 5910 | loss 0.0105 0.0803 0.3557 1.1002 | lr 8.5e-04 | norm 0.1398 | dt 0.025 | |
| type train | step 5920 | loss 0.0106 0.0815 0.3672 1.1214 | lr 8.5e-04 | norm 0.1232 | dt 0.025 | |
| type train | step 5930 | loss 0.0105 0.0782 0.3492 1.0905 | lr 8.5e-04 | norm 0.1193 | dt 0.026 | |
| type train | step 5940 | loss 0.0109 0.0811 0.3636 1.1119 | lr 8.5e-04 | norm 0.1081 | dt 0.025 | |
| type train | step 5950 | loss 0.0104 0.0772 0.3419 1.0681 | lr 8.5e-04 | norm 0.1097 | dt 0.025 | |
| type train | step 5960 | loss 0.0107 0.0791 0.3588 1.1078 | lr 8.5e-04 | norm 0.1202 | dt 0.025 | |
| type train | step 5970 | loss 0.0105 0.0810 0.3623 1.1100 | lr 8.5e-04 | norm 0.1230 | dt 0.025 | |
| type train | step 5980 | loss 0.0105 0.0816 0.3690 1.1249 | lr 8.5e-04 | norm 0.1256 | dt 0.026 | |
| type train | step 5990 | loss 0.0108 0.0800 0.3592 1.1128 | lr 8.5e-04 | norm 0.1224 | dt 0.026 | |
| type train | step 6000 | loss 0.0106 0.0816 0.3723 1.1376 | lr 8.4e-04 | norm 0.1223 | dt 0.025 | |
| type train | step 6010 | loss 0.0106 0.0812 0.3693 1.1285 | lr 8.4e-04 | norm 0.1310 | dt 0.025 | |
| type train | step 6020 | loss 0.0106 0.0807 0.3681 1.1417 | lr 8.4e-04 | norm 0.1259 | dt 0.025 | |
| type train | step 6030 | loss 0.0102 0.0786 0.3490 1.0837 | lr 8.4e-04 | norm 0.1225 | dt 0.025 | |
| type train | step 6040 | loss 0.0103 0.0812 0.3616 1.1101 | lr 8.4e-04 | norm 0.1362 | dt 0.026 | |
| type train | step 6050 | loss 0.0108 0.0798 0.3575 1.1022 | lr 8.4e-04 | norm 0.1477 | dt 0.025 | |
| type train | step 6060 | loss 0.0105 0.0815 0.3756 1.1526 | lr 8.4e-04 | norm 0.1267 | dt 0.025 | |
| type train | step 6070 | loss 0.0105 0.0795 0.3526 1.0911 | lr 8.4e-04 | norm 0.1368 | dt 0.025 | |
| type train | step 6080 | loss 0.0106 0.0799 0.3646 1.1239 | lr 8.4e-04 | norm 0.1213 | dt 0.025 | |
| type train | step 6090 | loss 0.0102 0.0787 0.3530 1.0858 | lr 8.4e-04 | norm 0.1129 | dt 0.025 | |
| type train | step 6100 | loss 0.0104 0.0813 0.3736 1.1443 | lr 8.4e-04 | norm 0.1275 | dt 0.027 | |
| type train | step 6110 | loss 0.0103 0.0806 0.3582 1.1052 | lr 8.4e-04 | norm 0.1167 | dt 0.025 | |
| type train | step 6120 | loss 0.0104 0.0815 0.3681 1.1319 | lr 8.4e-04 | norm 0.1252 | dt 0.025 | |
| type train | step 6130 | loss 0.0104 0.0777 0.3486 1.0863 | lr 8.4e-04 | norm 0.1122 | dt 0.025 | |
| type train | step 6140 | loss 0.0107 0.0818 0.3711 1.1430 | lr 8.4e-04 | norm 0.1369 | dt 0.025 | |
| type train | step 6150 | loss 0.0103 0.0794 0.3515 1.0767 | lr 8.4e-04 | norm 0.1235 | dt 0.026 | |
| type train | step 6160 | loss 0.0102 0.0794 0.3604 1.1083 | lr 8.4e-04 | norm 0.1341 | dt 0.025 | |
| type train | step 6170 | loss 0.0103 0.0792 0.3521 1.0834 | lr 8.4e-04 | norm 0.1044 | dt 0.025 | |
| type train | step 6180 | loss 0.0107 0.0831 0.3786 1.1624 | lr 8.3e-04 | norm 0.1113 | dt 0.025 | |
| type train | step 6190 | loss 0.0102 0.0790 0.3564 1.0953 | lr 8.3e-04 | norm 0.1145 | dt 0.025 | |
| type train | step 6200 | loss 0.0102 0.0766 0.3464 1.0823 | lr 8.3e-04 | norm 0.1187 | dt 0.025 | |
| type train | step 6210 | loss 0.0105 0.0801 0.3601 1.0953 | lr 8.3e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 6220 | loss 0.0105 0.0803 0.3509 1.0810 | lr 8.3e-04 | norm 0.1093 | dt 0.026 | |
| type train | step 6230 | loss 0.0102 0.0785 0.3526 1.0891 | lr 8.3e-04 | norm 0.0972 | dt 0.025 | |
| type train | step 6240 | loss 0.0112 0.0804 0.3571 1.1088 | lr 8.3e-04 | norm 0.1447 | dt 0.025 | |
| type train | step 6250 | loss 0.0105 0.0813 0.3627 1.1225 | lr 8.3e-04 | norm 0.1386 | dt 0.026 | |
| type train | step 6260 | loss 0.0104 0.0794 0.3616 1.1172 | lr 8.3e-04 | norm 0.1314 | dt 0.035 | |
| type train | step 6270 | loss 0.0101 0.0801 0.3559 1.0827 | lr 8.3e-04 | norm 0.1262 | dt 0.035 | |
| type train | step 6280 | loss 0.0104 0.0802 0.3546 1.0943 | lr 8.3e-04 | norm 0.1130 | dt 0.035 | |
| type train | step 6290 | loss 0.0103 0.0789 0.3576 1.1037 | lr 8.3e-04 | norm 0.1245 | dt 0.035 | |
| type train | step 6300 | loss 0.0109 0.0825 0.3693 1.1403 | lr 8.3e-04 | norm 0.1142 | dt 0.035 | |
| type train | step 6310 | loss 0.0106 0.0792 0.3546 1.1010 | lr 8.3e-04 | norm 0.1252 | dt 0.034 | |
| type train | step 6320 | loss 0.0102 0.0795 0.3628 1.1059 | lr 8.3e-04 | norm 0.1331 | dt 0.034 | |
| type train | step 6330 | loss 0.0101 0.0794 0.3548 1.0856 | lr 8.3e-04 | norm 0.1248 | dt 0.034 | |
| type train | step 6340 | loss 0.0105 0.0831 0.3658 1.1126 | lr 8.3e-04 | norm 0.1135 | dt 0.033 | |
| type train | step 6350 | loss 0.0103 0.0781 0.3493 1.0830 | lr 8.2e-04 | norm 0.1040 | dt 0.033 | |
| type train | step 6360 | loss 0.0102 0.0787 0.3515 1.0958 | lr 8.2e-04 | norm 0.1109 | dt 0.032 | |
| type train | step 6370 | loss 0.0105 0.0784 0.3484 1.0795 | lr 8.2e-04 | norm 0.1098 | dt 0.032 | |
| type train | step 6380 | loss 0.0107 0.0801 0.3646 1.1332 | lr 8.2e-04 | norm 0.1532 | dt 0.032 | |
| type train | step 6390 | loss 0.0102 0.0781 0.3469 1.0788 | lr 8.2e-04 | norm 0.1326 | dt 0.032 | |
| type train | step 6400 | loss 0.0105 0.0815 0.3571 1.0937 | lr 8.2e-04 | norm 0.1179 | dt 0.032 | |
| type train | step 6410 | loss 0.0105 0.0792 0.3576 1.1042 | lr 8.2e-04 | norm 0.1232 | dt 0.032 | |
| type train | step 6420 | loss 0.0104 0.0779 0.3538 1.1023 | lr 8.2e-04 | norm 0.1228 | dt 0.032 | |
| type train | step 6430 | loss 0.0105 0.0805 0.3651 1.1217 | lr 8.2e-04 | norm 0.1142 | dt 0.032 | |
| type train | step 6440 | loss 0.0101 0.0797 0.3617 1.1174 | lr 8.2e-04 | norm 0.1134 | dt 0.031 | |
| type train | step 6450 | loss 0.0102 0.0799 0.3569 1.0999 | lr 8.2e-04 | norm 0.1489 | dt 0.031 | |
| type train | step 6460 | loss 0.0102 0.0771 0.3421 1.0622 | lr 8.2e-04 | norm 0.1354 | dt 0.031 | |
| type train | step 6470 | loss 0.0103 0.0799 0.3521 1.0947 | lr 8.2e-04 | norm 0.1281 | dt 0.031 | |
| type train | step 6480 | loss 0.0107 0.0796 0.3629 1.1322 | lr 8.2e-04 | norm 0.1279 | dt 0.031 | |
| type train | step 6490 | loss 0.0102 0.0798 0.3631 1.1131 | lr 8.2e-04 | norm 0.1212 | dt 0.031 | |
| type train | step 6500 | loss 0.0106 0.0789 0.3503 1.0870 | lr 8.2e-04 | norm 0.1103 | dt 0.030 | |
| type train | step 6510 | loss 0.0102 0.0799 0.3585 1.0989 | lr 8.2e-04 | norm 0.1323 | dt 0.026 | |
| type train | step 6520 | loss 0.0102 0.0798 0.3546 1.0983 | lr 8.1e-04 | norm 0.1349 | dt 0.026 | |
| type train | step 6530 | loss 0.0103 0.0811 0.3657 1.1193 | lr 8.1e-04 | norm 0.1262 | dt 0.025 | |
| type train | step 6540 | loss 0.0102 0.0777 0.3475 1.0880 | lr 8.1e-04 | norm 0.1232 | dt 0.026 | |
| type train | step 6550 | loss 0.0106 0.0807 0.3624 1.1101 | lr 8.1e-04 | norm 0.1100 | dt 0.026 | |
| type train | step 6560 | loss 0.0101 0.0767 0.3410 1.0660 | lr 8.1e-04 | norm 0.1148 | dt 0.025 | |
| type train | step 6570 | loss 0.0104 0.0786 0.3579 1.1047 | lr 8.1e-04 | norm 0.1149 | dt 0.025 | |
| type train | step 6580 | loss 0.0102 0.0806 0.3612 1.1073 | lr 8.1e-04 | norm 0.1167 | dt 0.025 | |
| type train | step 6590 | loss 0.0102 0.0812 0.3677 1.1227 | lr 8.1e-04 | norm 0.1195 | dt 0.026 | |
| type train | step 6600 | loss 0.0105 0.0794 0.3579 1.1101 | lr 8.1e-04 | norm 0.1248 | dt 0.025 | |
| type train | step 6610 | loss 0.0103 0.0811 0.3711 1.1351 | lr 8.1e-04 | norm 0.1176 | dt 0.025 | |
| type train | step 6620 | loss 0.0103 0.0808 0.3683 1.1270 | lr 8.1e-04 | norm 0.1289 | dt 0.026 | |
| type train | step 6630 | loss 0.0103 0.0802 0.3670 1.1392 | lr 8.1e-04 | norm 0.1149 | dt 0.026 | |
| type train | step 6640 | loss 0.0099 0.0783 0.3479 1.0815 | lr 8.1e-04 | norm 0.1214 | dt 0.026 | |
| type train | step 6650 | loss 0.0101 0.0807 0.3605 1.1078 | lr 8.1e-04 | norm 0.1371 | dt 0.026 | |
| type train | step 6660 | loss 0.0105 0.0792 0.3560 1.1008 | lr 8.1e-04 | norm 0.1424 | dt 0.026 | |
| type train | step 6670 | loss 0.0102 0.0811 0.3742 1.1496 | lr 8.1e-04 | norm 0.1265 | dt 0.025 | |
| type train | step 6680 | loss 0.0102 0.0790 0.3511 1.0886 | lr 8.1e-04 | norm 0.1292 | dt 0.026 | |
| type train | step 6690 | loss 0.0103 0.0794 0.3634 1.1224 | lr 8.0e-04 | norm 0.1172 | dt 0.026 | |
| type train | step 6700 | loss 0.0099 0.0783 0.3517 1.0835 | lr 8.0e-04 | norm 0.1096 | dt 0.025 | |
| type train | step 6710 | loss 0.0101 0.0809 0.3722 1.1418 | lr 8.0e-04 | norm 0.1249 | dt 0.028 | |
| type train | step 6720 | loss 0.0101 0.0800 0.3568 1.1019 | lr 8.0e-04 | norm 0.1196 | dt 0.026 | |
| type train | step 6730 | loss 0.0102 0.0812 0.3668 1.1295 | lr 8.0e-04 | norm 0.1248 | dt 0.025 | |
| type train | step 6740 | loss 0.0101 0.0774 0.3476 1.0846 | lr 8.0e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 6750 | loss 0.0104 0.0813 0.3699 1.1399 | lr 8.0e-04 | norm 0.1357 | dt 0.026 | |
| type train | step 6760 | loss 0.0100 0.0789 0.3507 1.0746 | lr 8.0e-04 | norm 0.1075 | dt 0.026 | |
| type train | step 6770 | loss 0.0099 0.0790 0.3591 1.1058 | lr 8.0e-04 | norm 0.1304 | dt 0.025 | |
| type train | step 6780 | loss 0.0100 0.0788 0.3512 1.0802 | lr 8.0e-04 | norm 0.0997 | dt 0.025 | |
| type train | step 6790 | loss 0.0105 0.0827 0.3775 1.1606 | lr 8.0e-04 | norm 0.1184 | dt 0.026 | |
| type train | step 6800 | loss 0.0099 0.0786 0.3555 1.0923 | lr 8.0e-04 | norm 0.1194 | dt 0.025 | |
| type train | step 6810 | loss 0.0099 0.0762 0.3455 1.0789 | lr 8.0e-04 | norm 0.1210 | dt 0.025 | |
| type train | step 6820 | loss 0.0102 0.0797 0.3591 1.0926 | lr 8.0e-04 | norm 0.1152 | dt 0.026 | |
| type train | step 6830 | loss 0.0103 0.0799 0.3496 1.0794 | lr 8.0e-04 | norm 0.1088 | dt 0.026 | |
| type train | step 6840 | loss 0.0100 0.0781 0.3518 1.0872 | lr 8.0e-04 | norm 0.1032 | dt 0.025 | |
| type train | step 6850 | loss 0.0109 0.0799 0.3553 1.1076 | lr 7.9e-04 | norm 0.1477 | dt 0.025 | |
| type train | step 6860 | loss 0.0102 0.0809 0.3615 1.1195 | lr 7.9e-04 | norm 0.1375 | dt 0.026 | |
| type train | step 6870 | loss 0.0101 0.0791 0.3605 1.1152 | lr 7.9e-04 | norm 0.1339 | dt 0.026 | |
| type train | step 6880 | loss 0.0098 0.0797 0.3548 1.0811 | lr 7.9e-04 | norm 0.1284 | dt 0.025 | |
| type train | step 6890 | loss 0.0101 0.0798 0.3537 1.0910 | lr 7.9e-04 | norm 0.1138 | dt 0.026 | |
| type train | step 6900 | loss 0.0100 0.0785 0.3570 1.1012 | lr 7.9e-04 | norm 0.1175 | dt 0.026 | |
| type train | step 6910 | loss 0.0106 0.0820 0.3680 1.1380 | lr 7.9e-04 | norm 0.1150 | dt 0.026 | |
| type train | step 6920 | loss 0.0103 0.0788 0.3537 1.0984 | lr 7.9e-04 | norm 0.1213 | dt 0.025 | |
| type train | step 6930 | loss 0.0099 0.0789 0.3617 1.1049 | lr 7.9e-04 | norm 0.1289 | dt 0.026 | |
| type train | step 6940 | loss 0.0098 0.0790 0.3537 1.0827 | lr 7.9e-04 | norm 0.1264 | dt 0.026 | |
| type train | step 6950 | loss 0.0102 0.0826 0.3644 1.1101 | lr 7.9e-04 | norm 0.1130 | dt 0.025 | |
| type train | step 6960 | loss 0.0101 0.0777 0.3482 1.0804 | lr 7.9e-04 | norm 0.1012 | dt 0.026 | |
| type train | step 6970 | loss 0.0100 0.0782 0.3505 1.0940 | lr 7.9e-04 | norm 0.1154 | dt 0.026 | |
| type train | step 6980 | loss 0.0102 0.0780 0.3476 1.0768 | lr 7.9e-04 | norm 0.1071 | dt 0.025 | |
| type train | step 6990 | loss 0.0104 0.0795 0.3635 1.1307 | lr 7.9e-04 | norm 0.1537 | dt 0.026 | |
| type train | step 7000 | loss 0.0099 0.0777 0.3458 1.0766 | lr 7.9e-04 | norm 0.1360 | dt 0.026 | |
| type train | step 7010 | loss 0.0102 0.0811 0.3563 1.0913 | lr 7.8e-04 | norm 0.1250 | dt 0.033 | |
| type train | step 7020 | loss 0.0102 0.0788 0.3565 1.1011 | lr 7.8e-04 | norm 0.1185 | dt 0.032 | |
| type train | step 7030 | loss 0.0101 0.0774 0.3528 1.0996 | lr 7.8e-04 | norm 0.1213 | dt 0.032 | |
| type train | step 7040 | loss 0.0101 0.0802 0.3638 1.1191 | lr 7.8e-04 | norm 0.1037 | dt 0.032 | |
| type train | step 7050 | loss 0.0099 0.0793 0.3606 1.1144 | lr 7.8e-04 | norm 0.1164 | dt 0.033 | |
| type train | step 7060 | loss 0.0099 0.0796 0.3559 1.0973 | lr 7.8e-04 | norm 0.1531 | dt 0.031 | |
| type train | step 7070 | loss 0.0099 0.0767 0.3413 1.0603 | lr 7.8e-04 | norm 0.1250 | dt 0.032 | |
| type train | step 7080 | loss 0.0100 0.0794 0.3509 1.0928 | lr 7.8e-04 | norm 0.1305 | dt 0.032 | |
| type train | step 7090 | loss 0.0104 0.0790 0.3618 1.1297 | lr 7.8e-04 | norm 0.1231 | dt 0.031 | |
| type train | step 7100 | loss 0.0099 0.0793 0.3621 1.1112 | lr 7.8e-04 | norm 0.1210 | dt 0.031 | |
| type train | step 7110 | loss 0.0103 0.0786 0.3493 1.0853 | lr 7.8e-04 | norm 0.1084 | dt 0.030 | |
| type train | step 7120 | loss 0.0099 0.0795 0.3574 1.0963 | lr 7.8e-04 | norm 0.1386 | dt 0.028 | |
| type train | step 7130 | loss 0.0099 0.0795 0.3530 1.0964 | lr 7.8e-04 | norm 0.1387 | dt 0.026 | |
| type train | step 7140 | loss 0.0100 0.0808 0.3646 1.1165 | lr 7.8e-04 | norm 0.1218 | dt 0.026 | |
| type train | step 7150 | loss 0.0099 0.0773 0.3466 1.0859 | lr 7.8e-04 | norm 0.1273 | dt 0.025 | |
| type train | step 7160 | loss 0.0102 0.0802 0.3614 1.1080 | lr 7.8e-04 | norm 0.1097 | dt 0.025 | |
| type train | step 7170 | loss 0.0098 0.0763 0.3404 1.0633 | lr 7.7e-04 | norm 0.1117 | dt 0.025 | |
| type train | step 7180 | loss 0.0101 0.0781 0.3569 1.1022 | lr 7.7e-04 | norm 0.1235 | dt 0.025 | |
| type train | step 7190 | loss 0.0099 0.0802 0.3602 1.1052 | lr 7.7e-04 | norm 0.1155 | dt 0.025 | |
| type train | step 7200 | loss 0.0099 0.0809 0.3666 1.1205 | lr 7.7e-04 | norm 0.1193 | dt 0.025 | |
| type train | step 7210 | loss 0.0102 0.0789 0.3569 1.1078 | lr 7.7e-04 | norm 0.1392 | dt 0.025 | |
| type train | step 7220 | loss 0.0100 0.0807 0.3702 1.1331 | lr 7.7e-04 | norm 0.1305 | dt 0.026 | |
| type train | step 7230 | loss 0.0100 0.0801 0.3675 1.1240 | lr 7.7e-04 | norm 0.1305 | dt 0.025 | |
| type train | step 7240 | loss 0.0100 0.0798 0.3662 1.1371 | lr 7.7e-04 | norm 0.1097 | dt 0.026 | |
| type train | step 7250 | loss 0.0096 0.0780 0.3470 1.0796 | lr 7.7e-04 | norm 0.1143 | dt 0.025 | |
| type train | step 7260 | loss 0.0098 0.0802 0.3596 1.1055 | lr 7.7e-04 | norm 0.1316 | dt 0.025 | |
| type train | step 7270 | loss 0.0102 0.0787 0.3550 1.0997 | lr 7.7e-04 | norm 0.1467 | dt 0.026 | |
| type train | step 7280 | loss 0.0099 0.0807 0.3733 1.1478 | lr 7.7e-04 | norm 0.1210 | dt 0.025 | |
| type train | step 7290 | loss 0.0099 0.0786 0.3503 1.0849 | lr 7.7e-04 | norm 0.1372 | dt 0.025 | |
| type train | step 7300 | loss 0.0100 0.0790 0.3622 1.1213 | lr 7.7e-04 | norm 0.1216 | dt 0.025 | |
| type train | step 7310 | loss 0.0096 0.0780 0.3507 1.0813 | lr 7.7e-04 | norm 0.1127 | dt 0.025 | |
| type train | step 7320 | loss 0.0098 0.0804 0.3711 1.1404 | lr 7.7e-04 | norm 0.1250 | dt 0.027 | |
| type train | step 7330 | loss 0.0098 0.0796 0.3560 1.1009 | lr 7.6e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 7340 | loss 0.0099 0.0807 0.3656 1.1272 | lr 7.6e-04 | norm 0.1206 | dt 0.025 | |
| type train | step 7350 | loss 0.0098 0.0770 0.3472 1.0822 | lr 7.6e-04 | norm 0.1115 | dt 0.025 | |
| type train | step 7360 | loss 0.0101 0.0809 0.3688 1.1384 | lr 7.6e-04 | norm 0.1367 | dt 0.026 | |
| type train | step 7370 | loss 0.0097 0.0786 0.3498 1.0731 | lr 7.6e-04 | norm 0.1257 | dt 0.026 | |
| type train | step 7380 | loss 0.0096 0.0786 0.3579 1.1045 | lr 7.6e-04 | norm 0.1319 | dt 0.025 | |
| type train | step 7390 | loss 0.0097 0.0784 0.3506 1.0793 | lr 7.6e-04 | norm 0.1095 | dt 0.025 | |
| type train | step 7400 | loss 0.0102 0.0823 0.3765 1.1588 | lr 7.6e-04 | norm 0.1081 | dt 0.026 | |
| type train | step 7410 | loss 0.0096 0.0782 0.3546 1.0905 | lr 7.6e-04 | norm 0.1213 | dt 0.026 | |
| type train | step 7420 | loss 0.0096 0.0759 0.3445 1.0764 | lr 7.6e-04 | norm 0.1136 | dt 0.025 | |
| type train | step 7430 | loss 0.0099 0.0792 0.3582 1.0902 | lr 7.6e-04 | norm 0.1191 | dt 0.025 | |
| type train | step 7440 | loss 0.0100 0.0795 0.3492 1.0779 | lr 7.6e-04 | norm 0.1146 | dt 0.026 | |
| type train | step 7450 | loss 0.0097 0.0777 0.3508 1.0860 | lr 7.6e-04 | norm 0.1075 | dt 0.025 | |
| type train | step 7460 | loss 0.0105 0.0795 0.3543 1.1053 | lr 7.6e-04 | norm 0.1461 | dt 0.026 | |
| type train | step 7470 | loss 0.0099 0.0804 0.3604 1.1168 | lr 7.6e-04 | norm 0.1304 | dt 0.026 | |
| type train | step 7480 | loss 0.0098 0.0787 0.3598 1.1131 | lr 7.5e-04 | norm 0.1336 | dt 0.025 | |
| type train | step 7490 | loss 0.0095 0.0793 0.3541 1.0785 | lr 7.5e-04 | norm 0.1280 | dt 0.026 | |
| type train | step 7500 | loss 0.0098 0.0794 0.3532 1.0887 | lr 7.5e-04 | norm 0.1157 | dt 0.026 | |
| type train | step 7510 | loss 0.0097 0.0780 0.3559 1.1002 | lr 7.5e-04 | norm 0.1159 | dt 0.026 | |
| type train | step 7520 | loss 0.0103 0.0817 0.3672 1.1360 | lr 7.5e-04 | norm 0.1131 | dt 0.026 | |
| type train | step 7530 | loss 0.0101 0.0785 0.3525 1.0966 | lr 7.5e-04 | norm 0.1214 | dt 0.027 | |
| type train | step 7540 | loss 0.0097 0.0785 0.3609 1.1033 | lr 7.5e-04 | norm 0.1248 | dt 0.027 | |
| type train | step 7550 | loss 0.0095 0.0786 0.3527 1.0810 | lr 7.5e-04 | norm 0.1260 | dt 0.026 | |
| type train | step 7560 | loss 0.0099 0.0823 0.3639 1.1079 | lr 7.5e-04 | norm 0.1180 | dt 0.027 | |
| type train | step 7570 | loss 0.0098 0.0773 0.3472 1.0790 | lr 7.5e-04 | norm 0.1246 | dt 0.030 | |
| type train | step 7580 | loss 0.0097 0.0778 0.3496 1.0936 | lr 7.5e-04 | norm 0.1096 | dt 0.035 | |
| type train | step 7590 | loss 0.0100 0.0777 0.3467 1.0753 | lr 7.5e-04 | norm 0.1173 | dt 0.035 | |
| type train | step 7600 | loss 0.0101 0.0790 0.3629 1.1281 | lr 7.5e-04 | norm 0.1616 | dt 0.035 | |
| type train | step 7610 | loss 0.0096 0.0774 0.3448 1.0742 | lr 7.5e-04 | norm 0.1383 | dt 0.035 | |
| type train | step 7620 | loss 0.0099 0.0807 0.3558 1.0889 | lr 7.5e-04 | norm 0.1260 | dt 0.035 | |
| type train | step 7630 | loss 0.0100 0.0784 0.3555 1.0995 | lr 7.4e-04 | norm 0.1295 | dt 0.035 | |
| type train | step 7640 | loss 0.0099 0.0770 0.3519 1.0981 | lr 7.4e-04 | norm 0.1200 | dt 0.035 | |
| type train | step 7650 | loss 0.0099 0.0797 0.3629 1.1172 | lr 7.4e-04 | norm 0.1191 | dt 0.035 | |
| type train | step 7660 | loss 0.0096 0.0790 0.3596 1.1124 | lr 7.4e-04 | norm 0.1151 | dt 0.035 | |
| type train | step 7670 | loss 0.0096 0.0793 0.3551 1.0953 | lr 7.4e-04 | norm 0.1493 | dt 0.034 | |
| type train | step 7680 | loss 0.0097 0.0764 0.3406 1.0587 | lr 7.4e-04 | norm 0.1258 | dt 0.034 | |
| type train | step 7690 | loss 0.0097 0.0790 0.3501 1.0916 | lr 7.4e-04 | norm 0.1275 | dt 0.033 | |
| type train | step 7700 | loss 0.0102 0.0786 0.3611 1.1274 | lr 7.4e-04 | norm 0.1252 | dt 0.033 | |
| type train | step 7710 | loss 0.0096 0.0789 0.3611 1.1096 | lr 7.4e-04 | norm 0.1180 | dt 0.033 | |
| type train | step 7720 | loss 0.0100 0.0782 0.3488 1.0839 | lr 7.4e-04 | norm 0.1081 | dt 0.033 | |
| type train | step 7730 | loss 0.0097 0.0793 0.3568 1.0948 | lr 7.4e-04 | norm 0.1281 | dt 0.033 | |
| type train | step 7740 | loss 0.0096 0.0792 0.3519 1.0954 | lr 7.4e-04 | norm 0.1361 | dt 0.032 | |
| type train | step 7750 | loss 0.0097 0.0804 0.3638 1.1146 | lr 7.4e-04 | norm 0.1196 | dt 0.032 | |
| type train | step 7760 | loss 0.0097 0.0769 0.3455 1.0833 | lr 7.4e-04 | norm 0.1259 | dt 0.025 | |
| type train | step 7770 | loss 0.0100 0.0798 0.3607 1.1058 | lr 7.4e-04 | norm 0.1119 | dt 0.025 | |
| type train | step 7780 | loss 0.0096 0.0759 0.3395 1.0618 | lr 7.3e-04 | norm 0.1165 | dt 0.025 | |
| type train | step 7790 | loss 0.0098 0.0776 0.3565 1.0998 | lr 7.3e-04 | norm 0.1192 | dt 0.025 | |
| type train | step 7800 | loss 0.0097 0.0798 0.3595 1.1028 | lr 7.3e-04 | norm 0.1171 | dt 0.025 | |
| type train | step 7810 | loss 0.0097 0.0805 0.3656 1.1195 | lr 7.3e-04 | norm 0.1174 | dt 0.025 | |
| type train | step 7820 | loss 0.0099 0.0785 0.3559 1.1062 | lr 7.3e-04 | norm 0.1218 | dt 0.025 | |
| type train | step 7830 | loss 0.0098 0.0803 0.3691 1.1314 | lr 7.3e-04 | norm 0.1197 | dt 0.026 | |
| type train | step 7840 | loss 0.0098 0.0797 0.3666 1.1214 | lr 7.3e-04 | norm 0.1328 | dt 0.025 | |
| type train | step 7850 | loss 0.0098 0.0793 0.3653 1.1347 | lr 7.3e-04 | norm 0.1134 | dt 0.025 | |
| type train | step 7860 | loss 0.0094 0.0775 0.3463 1.0779 | lr 7.3e-04 | norm 0.1169 | dt 0.025 | |
| type train | step 7870 | loss 0.0095 0.0797 0.3587 1.1043 | lr 7.3e-04 | norm 0.1296 | dt 0.025 | |
| type train | step 7880 | loss 0.0100 0.0784 0.3546 1.0979 | lr 7.3e-04 | norm 0.1423 | dt 0.026 | |
| type train | step 7890 | loss 0.0097 0.0803 0.3726 1.1464 | lr 7.3e-04 | norm 0.1190 | dt 0.025 | |
| type train | step 7900 | loss 0.0097 0.0782 0.3498 1.0834 | lr 7.3e-04 | norm 0.1305 | dt 0.025 | |
| type train | step 7910 | loss 0.0098 0.0787 0.3616 1.1195 | lr 7.3e-04 | norm 0.1245 | dt 0.025 | |
| type train | step 7920 | loss 0.0094 0.0775 0.3497 1.0797 | lr 7.3e-04 | norm 0.1094 | dt 0.026 | |
| type train | step 7930 | loss 0.0096 0.0800 0.3702 1.1396 | lr 7.2e-04 | norm 0.1196 | dt 0.027 | |
| type train | step 7940 | loss 0.0095 0.0792 0.3560 1.0994 | lr 7.2e-04 | norm 0.1134 | dt 0.025 | |
| type train | step 7950 | loss 0.0096 0.0803 0.3648 1.1250 | lr 7.2e-04 | norm 0.1171 | dt 0.025 | |
| type train | step 7960 | loss 0.0096 0.0766 0.3466 1.0803 | lr 7.2e-04 | norm 0.1115 | dt 0.026 | |
| type train | step 7970 | loss 0.0099 0.0804 0.3680 1.1373 | lr 7.2e-04 | norm 0.1375 | dt 0.026 | |
| type train | step 7980 | loss 0.0095 0.0782 0.3491 1.0723 | lr 7.2e-04 | norm 0.1223 | dt 0.026 | |
| type train | step 7990 | loss 0.0094 0.0782 0.3572 1.1036 | lr 7.2e-04 | norm 0.1342 | dt 0.025 | |
| type train | step 8000 | loss 0.0095 0.0781 0.3501 1.0785 | lr 7.2e-04 | norm 0.1032 | dt 0.026 | |
| type train | step 8010 | loss 0.0100 0.0818 0.3755 1.1566 | lr 7.2e-04 | norm 0.1086 | dt 0.025 | |
| type train | step 8020 | loss 0.0094 0.0779 0.3540 1.0882 | lr 7.2e-04 | norm 0.1195 | dt 0.025 | |
| type train | step 8030 | loss 0.0094 0.0755 0.3437 1.0745 | lr 7.2e-04 | norm 0.1117 | dt 0.026 | |
| type train | step 8040 | loss 0.0097 0.0790 0.3575 1.0894 | lr 7.2e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 8050 | loss 0.0098 0.0790 0.3486 1.0760 | lr 7.2e-04 | norm 0.1113 | dt 0.026 | |
| type train | step 8060 | loss 0.0095 0.0774 0.3502 1.0843 | lr 7.2e-04 | norm 0.1027 | dt 0.026 | |
| type train | step 8070 | loss 0.0103 0.0790 0.3535 1.1032 | lr 7.2e-04 | norm 0.1438 | dt 0.026 | |
| type train | step 8080 | loss 0.0097 0.0801 0.3597 1.1152 | lr 7.1e-04 | norm 0.1324 | dt 0.025 | |
| type train | step 8090 | loss 0.0096 0.0783 0.3592 1.1111 | lr 7.1e-04 | norm 0.1254 | dt 0.026 | |
| type train | step 8100 | loss 0.0094 0.0789 0.3537 1.0773 | lr 7.1e-04 | norm 0.1272 | dt 0.026 | |
| type train | step 8110 | loss 0.0096 0.0791 0.3526 1.0870 | lr 7.1e-04 | norm 0.1142 | dt 0.026 | |
| type train | step 8120 | loss 0.0095 0.0777 0.3553 1.0991 | lr 7.1e-04 | norm 0.1207 | dt 0.026 | |
| type train | step 8130 | loss 0.0101 0.0813 0.3664 1.1338 | lr 7.1e-04 | norm 0.1149 | dt 0.026 | |
| type train | step 8140 | loss 0.0099 0.0781 0.3522 1.0950 | lr 7.1e-04 | norm 0.1242 | dt 0.025 | |
| type train | step 8150 | loss 0.0095 0.0781 0.3600 1.1012 | lr 7.1e-04 | norm 0.1236 | dt 0.025 | |
| type train | step 8160 | loss 0.0093 0.0782 0.3517 1.0791 | lr 7.1e-04 | norm 0.1214 | dt 0.026 | |
| type train | step 8170 | loss 0.0097 0.0818 0.3634 1.1061 | lr 7.1e-04 | norm 0.1132 | dt 0.026 | |
| type train | step 8180 | loss 0.0096 0.0770 0.3464 1.0769 | lr 7.1e-04 | norm 0.1002 | dt 0.025 | |
| type train | step 8190 | loss 0.0095 0.0775 0.3491 1.0925 | lr 7.1e-04 | norm 0.1070 | dt 0.025 | |
| type train | step 8200 | loss 0.0098 0.0774 0.3463 1.0732 | lr 7.1e-04 | norm 0.1067 | dt 0.026 | |
| type train | step 8210 | loss 0.0099 0.0787 0.3623 1.1255 | lr 7.1e-04 | norm 0.1468 | dt 0.025 | |
| type train | step 8220 | loss 0.0094 0.0771 0.3438 1.0725 | lr 7.1e-04 | norm 0.1339 | dt 0.025 | |
| type train | step 8230 | loss 0.0097 0.0803 0.3553 1.0880 | lr 7.0e-04 | norm 0.1191 | dt 0.026 | |
| type train | step 8240 | loss 0.0098 0.0780 0.3549 1.0983 | lr 7.0e-04 | norm 0.1236 | dt 0.025 | |
| type train | step 8250 | loss 0.0097 0.0768 0.3513 1.0968 | lr 7.0e-04 | norm 0.1248 | dt 0.025 | |
| type train | step 8260 | loss 0.0097 0.0793 0.3624 1.1154 | lr 7.0e-04 | norm 0.1134 | dt 0.034 | |
| type train | step 8270 | loss 0.0094 0.0786 0.3592 1.1109 | lr 7.0e-04 | norm 0.1149 | dt 0.033 | |
| type train | step 8280 | loss 0.0095 0.0790 0.3541 1.0939 | lr 7.0e-04 | norm 0.1477 | dt 0.033 | |
| type train | step 8290 | loss 0.0095 0.0760 0.3399 1.0575 | lr 7.0e-04 | norm 0.1241 | dt 0.033 | |
| type train | step 8300 | loss 0.0095 0.0786 0.3495 1.0899 | lr 7.0e-04 | norm 0.1348 | dt 0.032 | |
| type train | step 8310 | loss 0.0099 0.0783 0.3607 1.1259 | lr 7.0e-04 | norm 0.1257 | dt 0.032 | |
| type train | step 8320 | loss 0.0095 0.0786 0.3606 1.1079 | lr 7.0e-04 | norm 0.1166 | dt 0.032 | |
| type train | step 8330 | loss 0.0098 0.0780 0.3481 1.0819 | lr 7.0e-04 | norm 0.1155 | dt 0.033 | |
| type train | step 8340 | loss 0.0095 0.0788 0.3561 1.0932 | lr 7.0e-04 | norm 0.1391 | dt 0.032 | |
| type train | step 8350 | loss 0.0094 0.0788 0.3511 1.0945 | lr 7.0e-04 | norm 0.1419 | dt 0.032 | |
| type train | step 8360 | loss 0.0095 0.0801 0.3630 1.1132 | lr 7.0e-04 | norm 0.1192 | dt 0.032 | |
| type train | step 8370 | loss 0.0095 0.0765 0.3447 1.0809 | lr 6.9e-04 | norm 0.1193 | dt 0.032 | |
| type train | step 8380 | loss 0.0098 0.0795 0.3599 1.1049 | lr 6.9e-04 | norm 0.1056 | dt 0.030 | |
| type train | step 8390 | loss 0.0094 0.0757 0.3392 1.0600 | lr 6.9e-04 | norm 0.1239 | dt 0.031 | |
| type train | step 8400 | loss 0.0097 0.0772 0.3558 1.0988 | lr 6.9e-04 | norm 0.1167 | dt 0.031 | |
| type train | step 8410 | loss 0.0095 0.0794 0.3587 1.1014 | lr 6.9e-04 | norm 0.1107 | dt 0.031 | |
| type train | step 8420 | loss 0.0095 0.0802 0.3649 1.1189 | lr 6.9e-04 | norm 0.1194 | dt 0.032 | |
| type train | step 8430 | loss 0.0097 0.0781 0.3552 1.1054 | lr 6.9e-04 | norm 0.1180 | dt 0.031 | |
| type train | step 8440 | loss 0.0096 0.0800 0.3684 1.1301 | lr 6.9e-04 | norm 0.1235 | dt 0.031 | |
| type train | step 8450 | loss 0.0096 0.0794 0.3658 1.1199 | lr 6.9e-04 | norm 0.1277 | dt 0.030 | |
| type train | step 8460 | loss 0.0096 0.0790 0.3647 1.1344 | lr 6.9e-04 | norm 0.1079 | dt 0.027 | |
| type train | step 8470 | loss 0.0092 0.0772 0.3458 1.0760 | lr 6.9e-04 | norm 0.1086 | dt 0.025 | |
| type train | step 8480 | loss 0.0094 0.0793 0.3579 1.1025 | lr 6.9e-04 | norm 0.1300 | dt 0.026 | |
| type train | step 8490 | loss 0.0098 0.0779 0.3541 1.0961 | lr 6.9e-04 | norm 0.1411 | dt 0.025 | |
| type train | step 8500 | loss 0.0095 0.0800 0.3717 1.1453 | lr 6.9e-04 | norm 0.1175 | dt 0.025 | |
| type train | step 8510 | loss 0.0095 0.0779 0.3492 1.0826 | lr 6.8e-04 | norm 0.1345 | dt 0.025 | |
| type train | step 8520 | loss 0.0096 0.0784 0.3606 1.1184 | lr 6.8e-04 | norm 0.1180 | dt 0.025 | |
| type train | step 8530 | loss 0.0093 0.0772 0.3490 1.0787 | lr 6.8e-04 | norm 0.1021 | dt 0.025 | |
| type train | step 8540 | loss 0.0094 0.0797 0.3700 1.1374 | lr 6.8e-04 | norm 0.1218 | dt 0.027 | |
| type train | step 8550 | loss 0.0094 0.0788 0.3554 1.0984 | lr 6.8e-04 | norm 0.1185 | dt 0.026 | |
| type train | step 8560 | loss 0.0095 0.0800 0.3640 1.1243 | lr 6.8e-04 | norm 0.1181 | dt 0.026 | |
| type train | step 8570 | loss 0.0094 0.0763 0.3460 1.0787 | lr 6.8e-04 | norm 0.1113 | dt 0.026 | |
| type train | step 8580 | loss 0.0097 0.0800 0.3673 1.1357 | lr 6.8e-04 | norm 0.1337 | dt 0.026 | |
| type train | step 8590 | loss 0.0093 0.0780 0.3485 1.0712 | lr 6.8e-04 | norm 0.1161 | dt 0.026 | |
| type train | step 8600 | loss 0.0092 0.0778 0.3564 1.1025 | lr 6.8e-04 | norm 0.1307 | dt 0.025 | |
| type train | step 8610 | loss 0.0093 0.0778 0.3494 1.0778 | lr 6.8e-04 | norm 0.0940 | dt 0.026 | |
| type train | step 8620 | loss 0.0098 0.0814 0.3746 1.1559 | lr 6.8e-04 | norm 0.1117 | dt 0.026 | |
| type train | step 8630 | loss 0.0093 0.0777 0.3534 1.0870 | lr 6.8e-04 | norm 0.1190 | dt 0.026 | |
| type train | step 8640 | loss 0.0092 0.0753 0.3429 1.0730 | lr 6.8e-04 | norm 0.1107 | dt 0.025 | |
| type train | step 8650 | loss 0.0095 0.0786 0.3569 1.0882 | lr 6.8e-04 | norm 0.1134 | dt 0.026 | |
| type train | step 8660 | loss 0.0096 0.0787 0.3479 1.0744 | lr 6.7e-04 | norm 0.1089 | dt 0.025 | |
| type train | step 8670 | loss 0.0093 0.0770 0.3496 1.0829 | lr 6.7e-04 | norm 0.0976 | dt 0.026 | |
| type train | step 8680 | loss 0.0101 0.0787 0.3529 1.1016 | lr 6.7e-04 | norm 0.1499 | dt 0.026 | |
| type train | step 8690 | loss 0.0096 0.0798 0.3592 1.1132 | lr 6.7e-04 | norm 0.1431 | dt 0.025 | |
| type train | step 8700 | loss 0.0094 0.0779 0.3588 1.1101 | lr 6.7e-04 | norm 0.1422 | dt 0.027 | |
| type train | step 8710 | loss 0.0092 0.0787 0.3532 1.0756 | lr 6.7e-04 | norm 0.1321 | dt 0.026 | |
| type train | step 8720 | loss 0.0094 0.0787 0.3519 1.0857 | lr 6.7e-04 | norm 0.1100 | dt 0.026 | |
| type train | step 8730 | loss 0.0093 0.0774 0.3545 1.0992 | lr 6.7e-04 | norm 0.1218 | dt 0.026 | |
| type train | step 8740 | loss 0.0099 0.0809 0.3656 1.1321 | lr 6.7e-04 | norm 0.1109 | dt 0.025 | |
| type train | step 8750 | loss 0.0097 0.0778 0.3514 1.0938 | lr 6.7e-04 | norm 0.1160 | dt 0.025 | |
| type train | step 8760 | loss 0.0093 0.0778 0.3593 1.0999 | lr 6.7e-04 | norm 0.1253 | dt 0.035 | |
| type train | step 8770 | loss 0.0092 0.0779 0.3510 1.0782 | lr 6.7e-04 | norm 0.1272 | dt 0.035 | |
| type train | step 8780 | loss 0.0095 0.0814 0.3630 1.1048 | lr 6.7e-04 | norm 0.1177 | dt 0.035 | |
| type train | step 8790 | loss 0.0094 0.0767 0.3458 1.0754 | lr 6.7e-04 | norm 0.0966 | dt 0.035 | |
| type train | step 8800 | loss 0.0093 0.0771 0.3484 1.0911 | lr 6.6e-04 | norm 0.1095 | dt 0.035 | |
| type train | step 8810 | loss 0.0096 0.0771 0.3456 1.0719 | lr 6.6e-04 | norm 0.1038 | dt 0.035 | |
| type train | step 8820 | loss 0.0097 0.0784 0.3615 1.1238 | lr 6.6e-04 | norm 0.1435 | dt 0.034 | |
| type train | step 8830 | loss 0.0093 0.0767 0.3432 1.0709 | lr 6.6e-04 | norm 0.1357 | dt 0.034 | |
| type train | step 8840 | loss 0.0095 0.0800 0.3545 1.0874 | lr 6.6e-04 | norm 0.1178 | dt 0.033 | |
| type train | step 8850 | loss 0.0096 0.0777 0.3542 1.0969 | lr 6.6e-04 | norm 0.1184 | dt 0.033 | |
| type train | step 8860 | loss 0.0096 0.0764 0.3505 1.0960 | lr 6.6e-04 | norm 0.1197 | dt 0.033 | |
| type train | step 8870 | loss 0.0095 0.0790 0.3619 1.1145 | lr 6.6e-04 | norm 0.1108 | dt 0.033 | |
| type train | step 8880 | loss 0.0092 0.0783 0.3587 1.1088 | lr 6.6e-04 | norm 0.1109 | dt 0.033 | |
| type train | step 8890 | loss 0.0093 0.0787 0.3534 1.0930 | lr 6.6e-04 | norm 0.1431 | dt 0.033 | |
| type train | step 8900 | loss 0.0093 0.0757 0.3395 1.0561 | lr 6.6e-04 | norm 0.1239 | dt 0.032 | |
| type train | step 8910 | loss 0.0094 0.0783 0.3487 1.0889 | lr 6.6e-04 | norm 0.1264 | dt 0.033 | |
| type train | step 8920 | loss 0.0097 0.0779 0.3599 1.1247 | lr 6.6e-04 | norm 0.1236 | dt 0.032 | |
| type train | step 8930 | loss 0.0093 0.0781 0.3599 1.1067 | lr 6.6e-04 | norm 0.1109 | dt 0.032 | |
| type train | step 8940 | loss 0.0097 0.0778 0.3475 1.0796 | lr 6.5e-04 | norm 0.1104 | dt 0.032 | |
| type train | step 8950 | loss 0.0093 0.0785 0.3554 1.0919 | lr 6.5e-04 | norm 0.1231 | dt 0.032 | |
| type train | step 8960 | loss 0.0093 0.0786 0.3504 1.0940 | lr 6.5e-04 | norm 0.1338 | dt 0.029 | |
| type train | step 8970 | loss 0.0094 0.0798 0.3623 1.1121 | lr 6.5e-04 | norm 0.1187 | dt 0.026 | |
| type train | step 8980 | loss 0.0093 0.0762 0.3441 1.0791 | lr 6.5e-04 | norm 0.1170 | dt 0.026 | |
| type train | step 8990 | loss 0.0096 0.0790 0.3593 1.1044 | lr 6.5e-04 | norm 0.1060 | dt 0.025 | |
| type train | step 9000 | loss 0.0092 0.0754 0.3389 1.0583 | lr 6.5e-04 | norm 0.1181 | dt 0.025 | |
| type train | step 9010 | loss 0.0095 0.0769 0.3552 1.0976 | lr 6.5e-04 | norm 0.1122 | dt 0.025 | |
| type train | step 9020 | loss 0.0093 0.0791 0.3583 1.1005 | lr 6.5e-04 | norm 0.1069 | dt 0.025 | |
| type train | step 9030 | loss 0.0093 0.0798 0.3641 1.1180 | lr 6.5e-04 | norm 0.1168 | dt 0.025 | |
| type train | step 9040 | loss 0.0096 0.0778 0.3546 1.1036 | lr 6.5e-04 | norm 0.1217 | dt 0.025 | |
| type train | step 9050 | loss 0.0094 0.0797 0.3676 1.1294 | lr 6.5e-04 | norm 0.1156 | dt 0.026 | |
| type train | step 9060 | loss 0.0094 0.0792 0.3651 1.1188 | lr 6.5e-04 | norm 0.1288 | dt 0.026 | |
| type train | step 9070 | loss 0.0094 0.0786 0.3639 1.1334 | lr 6.5e-04 | norm 0.1198 | dt 0.025 | |
| type train | step 9080 | loss 0.0091 0.0770 0.3452 1.0747 | lr 6.4e-04 | norm 0.1064 | dt 0.026 | |
| type train | step 9090 | loss 0.0092 0.0790 0.3572 1.1014 | lr 6.4e-04 | norm 0.1301 | dt 0.026 | |
| type train | step 9100 | loss 0.0096 0.0776 0.3534 1.0958 | lr 6.4e-04 | norm 0.1396 | dt 0.025 | |
| type train | step 9110 | loss 0.0093 0.0797 0.3709 1.1442 | lr 6.4e-04 | norm 0.1179 | dt 0.025 | |
| type train | step 9120 | loss 0.0093 0.0777 0.3489 1.0803 | lr 6.4e-04 | norm 0.1365 | dt 0.026 | |
| type train | step 9130 | loss 0.0095 0.0781 0.3600 1.1171 | lr 6.4e-04 | norm 0.1205 | dt 0.025 | |
| type train | step 9140 | loss 0.0091 0.0770 0.3483 1.0778 | lr 6.4e-04 | norm 0.1011 | dt 0.025 | |
| type train | step 9150 | loss 0.0093 0.0794 0.3694 1.1366 | lr 6.4e-04 | norm 0.1220 | dt 0.027 | |
| type train | step 9160 | loss 0.0092 0.0785 0.3550 1.0976 | lr 6.4e-04 | norm 0.1127 | dt 0.025 | |
| type train | step 9170 | loss 0.0093 0.0797 0.3633 1.1233 | lr 6.4e-04 | norm 0.1190 | dt 0.026 | |
| type train | step 9180 | loss 0.0093 0.0760 0.3454 1.0771 | lr 6.4e-04 | norm 0.1199 | dt 0.026 | |
| type train | step 9190 | loss 0.0095 0.0797 0.3666 1.1354 | lr 6.4e-04 | norm 0.1321 | dt 0.026 | |
| type train | step 9200 | loss 0.0092 0.0777 0.3481 1.0698 | lr 6.4e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 9210 | loss 0.0091 0.0776 0.3556 1.1016 | lr 6.4e-04 | norm 0.1269 | dt 0.026 | |
| type train | step 9220 | loss 0.0092 0.0774 0.3489 1.0774 | lr 6.3e-04 | norm 0.0954 | dt 0.026 | |
| type train | step 9230 | loss 0.0096 0.0812 0.3740 1.1553 | lr 6.3e-04 | norm 0.1030 | dt 0.025 | |
| type train | step 9240 | loss 0.0091 0.0774 0.3530 1.0859 | lr 6.3e-04 | norm 0.1179 | dt 0.025 | |
| type train | step 9250 | loss 0.0091 0.0750 0.3424 1.0720 | lr 6.3e-04 | norm 0.1110 | dt 0.026 | |
| type train | step 9260 | loss 0.0093 0.0784 0.3562 1.0868 | lr 6.3e-04 | norm 0.1182 | dt 0.035 | |
| type train | step 9270 | loss 0.0095 0.0785 0.3474 1.0723 | lr 6.3e-04 | norm 0.1140 | dt 0.035 | |
| type train | step 9280 | loss 0.0092 0.0769 0.3489 1.0823 | lr 6.3e-04 | norm 0.0978 | dt 0.035 | |
| type train | step 9290 | loss 0.0100 0.0784 0.3525 1.1003 | lr 6.3e-04 | norm 0.1450 | dt 0.035 | |
| type train | step 9300 | loss 0.0094 0.0795 0.3586 1.1119 | lr 6.3e-04 | norm 0.1295 | dt 0.035 | |
| type train | step 9310 | loss 0.0093 0.0777 0.3584 1.1088 | lr 6.3e-04 | norm 0.1318 | dt 0.035 | |
| type train | step 9320 | loss 0.0091 0.0785 0.3527 1.0747 | lr 6.3e-04 | norm 0.1280 | dt 0.034 | |
| type train | step 9330 | loss 0.0093 0.0785 0.3511 1.0849 | lr 6.3e-04 | norm 0.1145 | dt 0.034 | |
| type train | step 9340 | loss 0.0092 0.0772 0.3536 1.0990 | lr 6.3e-04 | norm 0.1266 | dt 0.033 | |
| type train | step 9350 | loss 0.0098 0.0807 0.3649 1.1303 | lr 6.2e-04 | norm 0.1088 | dt 0.034 | |
| type train | step 9360 | loss 0.0095 0.0775 0.3509 1.0940 | lr 6.2e-04 | norm 0.1188 | dt 0.033 | |
| type train | step 9370 | loss 0.0092 0.0776 0.3586 1.0983 | lr 6.2e-04 | norm 0.1245 | dt 0.033 | |
| type train | step 9380 | loss 0.0090 0.0776 0.3503 1.0762 | lr 6.2e-04 | norm 0.1214 | dt 0.033 | |
| type train | step 9390 | loss 0.0094 0.0812 0.3624 1.1030 | lr 6.2e-04 | norm 0.1114 | dt 0.032 | |
| type train | step 9400 | loss 0.0093 0.0765 0.3452 1.0752 | lr 6.2e-04 | norm 0.1042 | dt 0.033 | |
| type train | step 9410 | loss 0.0092 0.0768 0.3478 1.0894 | lr 6.2e-04 | norm 0.1103 | dt 0.032 | |
| type train | step 9420 | loss 0.0094 0.0768 0.3450 1.0716 | lr 6.2e-04 | norm 0.1093 | dt 0.032 | |
| type train | step 9430 | loss 0.0095 0.0782 0.3606 1.1233 | lr 6.2e-04 | norm 0.1451 | dt 0.032 | |
| type train | step 9440 | loss 0.0092 0.0765 0.3424 1.0693 | lr 6.2e-04 | norm 0.1318 | dt 0.032 | |
| type train | step 9450 | loss 0.0094 0.0799 0.3538 1.0860 | lr 6.2e-04 | norm 0.1169 | dt 0.032 | |
| type train | step 9460 | loss 0.0095 0.0775 0.3536 1.0957 | lr 6.2e-04 | norm 0.1198 | dt 0.032 | |
| type train | step 9470 | loss 0.0094 0.0763 0.3498 1.0944 | lr 6.2e-04 | norm 0.1203 | dt 0.031 | |
| type train | step 9480 | loss 0.0094 0.0788 0.3612 1.1136 | lr 6.2e-04 | norm 0.1148 | dt 0.030 | |
| type train | step 9490 | loss 0.0091 0.0780 0.3583 1.1073 | lr 6.1e-04 | norm 0.1077 | dt 0.027 | |
| type train | step 9500 | loss 0.0092 0.0784 0.3526 1.0914 | lr 6.1e-04 | norm 0.1422 | dt 0.026 | |
| type train | step 9510 | loss 0.0092 0.0754 0.3388 1.0547 | lr 6.1e-04 | norm 0.1268 | dt 0.025 | |
| type train | step 9520 | loss 0.0092 0.0780 0.3483 1.0881 | lr 6.1e-04 | norm 0.1239 | dt 0.025 | |
| type train | step 9530 | loss 0.0096 0.0775 0.3591 1.1240 | lr 6.1e-04 | norm 0.1247 | dt 0.025 | |
| type train | step 9540 | loss 0.0092 0.0778 0.3590 1.1060 | lr 6.1e-04 | norm 0.1142 | dt 0.025 | |
| type train | step 9550 | loss 0.0095 0.0776 0.3471 1.0786 | lr 6.1e-04 | norm 0.1059 | dt 0.025 | |
| type train | step 9560 | loss 0.0092 0.0782 0.3551 1.0907 | lr 6.1e-04 | norm 0.1185 | dt 0.026 | |
| type train | step 9570 | loss 0.0092 0.0783 0.3497 1.0936 | lr 6.1e-04 | norm 0.1318 | dt 0.025 | |
| type train | step 9580 | loss 0.0093 0.0796 0.3616 1.1115 | lr 6.1e-04 | norm 0.1107 | dt 0.025 | |
| type train | step 9590 | loss 0.0092 0.0759 0.3432 1.0775 | lr 6.1e-04 | norm 0.1199 | dt 0.026 | |
| type train | step 9600 | loss 0.0095 0.0787 0.3586 1.1029 | lr 6.1e-04 | norm 0.1103 | dt 0.026 | |
| type train | step 9610 | loss 0.0091 0.0753 0.3383 1.0567 | lr 6.1e-04 | norm 0.1210 | dt 0.025 | |
| type train | step 9620 | loss 0.0094 0.0767 0.3552 1.0962 | lr 6.1e-04 | norm 0.1088 | dt 0.026 | |
| type train | step 9630 | loss 0.0092 0.0788 0.3576 1.1000 | lr 6.0e-04 | norm 0.1084 | dt 0.026 | |
| type train | step 9640 | loss 0.0092 0.0796 0.3635 1.1175 | lr 6.0e-04 | norm 0.1142 | dt 0.025 | |
| type train | step 9650 | loss 0.0095 0.0775 0.3540 1.1022 | lr 6.0e-04 | norm 0.1152 | dt 0.026 | |
| type train | step 9660 | loss 0.0093 0.0794 0.3667 1.1287 | lr 6.0e-04 | norm 0.1261 | dt 0.025 | |
| type train | step 9670 | loss 0.0093 0.0789 0.3645 1.1175 | lr 6.0e-04 | norm 0.1267 | dt 0.025 | |
| type train | step 9680 | loss 0.0093 0.0784 0.3633 1.1330 | lr 6.0e-04 | norm 0.1067 | dt 0.026 | |
| type train | step 9690 | loss 0.0090 0.0768 0.3446 1.0729 | lr 6.0e-04 | norm 0.1104 | dt 0.025 | |
| type train | step 9700 | loss 0.0091 0.0788 0.3564 1.1006 | lr 6.0e-04 | norm 0.1274 | dt 0.025 | |
| type train | step 9710 | loss 0.0095 0.0773 0.3526 1.0945 | lr 6.0e-04 | norm 0.1385 | dt 0.026 | |
| type train | step 9720 | loss 0.0092 0.0794 0.3704 1.1435 | lr 6.0e-04 | norm 0.1232 | dt 0.025 | |
| type train | step 9730 | loss 0.0092 0.0776 0.3482 1.0785 | lr 6.0e-04 | norm 0.1285 | dt 0.025 | |
| type train | step 9740 | loss 0.0093 0.0779 0.3595 1.1165 | lr 6.0e-04 | norm 0.1288 | dt 0.026 | |
| type train | step 9750 | loss 0.0090 0.0768 0.3479 1.0759 | lr 6.0e-04 | norm 0.1119 | dt 0.026 | |
| type train | step 9760 | loss 0.0091 0.0792 0.3692 1.1357 | lr 6.0e-04 | norm 0.1173 | dt 0.027 | |
| type train | step 9770 | loss 0.0091 0.0782 0.3547 1.0976 | lr 5.9e-04 | norm 0.1068 | dt 0.025 | |
| type train | step 9780 | loss 0.0092 0.0794 0.3626 1.1221 | lr 5.9e-04 | norm 0.1277 | dt 0.026 | |
| type train | step 9790 | loss 0.0092 0.0758 0.3447 1.0753 | lr 5.9e-04 | norm 0.1089 | dt 0.026 | |
| type train | step 9800 | loss 0.0094 0.0795 0.3662 1.1343 | lr 5.9e-04 | norm 0.1336 | dt 0.026 | |
| type train | step 9810 | loss 0.0091 0.0775 0.3474 1.0690 | lr 5.9e-04 | norm 0.1160 | dt 0.027 | |
| type train | step 9820 | loss 0.0089 0.0774 0.3551 1.0996 | lr 5.9e-04 | norm 0.1309 | dt 0.026 | |
| type train | step 9830 | loss 0.0091 0.0772 0.3485 1.0771 | lr 5.9e-04 | norm 0.0999 | dt 0.027 | |
| type train | step 9840 | loss 0.0095 0.0809 0.3736 1.1548 | lr 5.9e-04 | norm 0.1039 | dt 0.027 | |
| type train | step 9850 | loss 0.0090 0.0771 0.3525 1.0840 | lr 5.9e-04 | norm 0.1190 | dt 0.026 | |
| type train | step 9860 | loss 0.0090 0.0749 0.3418 1.0710 | lr 5.9e-04 | norm 0.1150 | dt 0.026 | |
| type train | step 9870 | loss 0.0092 0.0781 0.3555 1.0854 | lr 5.9e-04 | norm 0.1148 | dt 0.026 | |
| type train | step 9880 | loss 0.0093 0.0784 0.3469 1.0714 | lr 5.9e-04 | norm 0.1061 | dt 0.026 | |
| type train | step 9890 | loss 0.0091 0.0767 0.3486 1.0809 | lr 5.9e-04 | norm 0.1051 | dt 0.026 | |
| type train | step 9900 | loss 0.0098 0.0781 0.3518 1.0989 | lr 5.8e-04 | norm 0.1473 | dt 0.026 | |
| type train | step 9910 | loss 0.0092 0.0793 0.3580 1.1108 | lr 5.8e-04 | norm 0.1303 | dt 0.027 | |
| type train | step 9920 | loss 0.0092 0.0775 0.3576 1.1081 | lr 5.8e-04 | norm 0.1296 | dt 0.026 | |
| type train | step 9930 | loss 0.0089 0.0783 0.3519 1.0727 | lr 5.8e-04 | norm 0.1308 | dt 0.026 | |
| type train | step 9940 | loss 0.0091 0.0784 0.3505 1.0839 | lr 5.8e-04 | norm 0.1132 | dt 0.026 | |
| type train | step 9950 | loss 0.0091 0.0769 0.3530 1.0980 | lr 5.8e-04 | norm 0.1241 | dt 0.026 | |
| type train | step 9960 | loss 0.0097 0.0803 0.3644 1.1287 | lr 5.8e-04 | norm 0.1068 | dt 0.026 | |
| type train | step 9970 | loss 0.0094 0.0773 0.3503 1.0932 | lr 5.8e-04 | norm 0.1236 | dt 0.026 | |
| type train | step 9980 | loss 0.0090 0.0774 0.3579 1.0976 | lr 5.8e-04 | norm 0.1205 | dt 0.026 | |
| type train | step 9990 | loss 0.0089 0.0775 0.3498 1.0759 | lr 5.8e-04 | norm 0.1169 | dt 0.027 | |
| type train | step 10000 | loss 0.0093 0.0809 0.3618 1.1021 | lr 5.8e-04 | norm 0.1157 | dt 0.026 | |
| type train | step 10010 | loss 0.0092 0.0763 0.3446 1.0745 | lr 5.8e-04 | norm 0.0987 | dt 0.025 | |
| type train | step 10020 | loss 0.0091 0.0765 0.3475 1.0888 | lr 5.8e-04 | norm 0.1030 | dt 0.026 | |
| type train | step 10030 | loss 0.0093 0.0766 0.3442 1.0706 | lr 5.8e-04 | norm 0.0999 | dt 0.026 | |
| type train | step 10040 | loss 0.0094 0.0780 0.3598 1.1223 | lr 5.7e-04 | norm 0.1444 | dt 0.026 | |
| type train | step 10050 | loss 0.0090 0.0764 0.3418 1.0683 | lr 5.7e-04 | norm 0.1296 | dt 0.027 | |
| type train | step 10060 | loss 0.0092 0.0797 0.3531 1.0852 | lr 5.7e-04 | norm 0.1214 | dt 0.026 | |
| type train | step 10070 | loss 0.0093 0.0772 0.3530 1.0945 | lr 5.7e-04 | norm 0.1131 | dt 0.028 | |
| type train | step 10080 | loss 0.0093 0.0761 0.3494 1.0931 | lr 5.7e-04 | norm 0.1243 | dt 0.029 | |
| type train | step 10090 | loss 0.0093 0.0785 0.3606 1.1124 | lr 5.7e-04 | norm 0.1083 | dt 0.035 | |
| type train | step 10100 | loss 0.0090 0.0778 0.3579 1.1060 | lr 5.7e-04 | norm 0.1078 | dt 0.035 | |
| type train | step 10110 | loss 0.0090 0.0781 0.3521 1.0906 | lr 5.7e-04 | norm 0.1358 | dt 0.035 | |
| type train | step 10120 | loss 0.0091 0.0752 0.3382 1.0534 | lr 5.7e-04 | norm 0.1194 | dt 0.035 | |
| type train | step 10130 | loss 0.0091 0.0778 0.3478 1.0868 | lr 5.7e-04 | norm 0.1221 | dt 0.035 | |
| type train | step 10140 | loss 0.0095 0.0774 0.3582 1.1240 | lr 5.7e-04 | norm 0.1203 | dt 0.035 | |
| type train | step 10150 | loss 0.0090 0.0777 0.3583 1.1052 | lr 5.7e-04 | norm 0.1147 | dt 0.035 | |
| type train | step 10160 | loss 0.0094 0.0774 0.3465 1.0766 | lr 5.7e-04 | norm 0.1075 | dt 0.035 | |
| type train | step 10170 | loss 0.0091 0.0780 0.3544 1.0903 | lr 5.7e-04 | norm 0.1184 | dt 0.035 | |
| type train | step 10180 | loss 0.0090 0.0781 0.3491 1.0924 | lr 5.6e-04 | norm 0.1345 | dt 0.034 | |
| type train | step 10190 | loss 0.0091 0.0793 0.3612 1.1108 | lr 5.6e-04 | norm 0.1085 | dt 0.033 | |
| type train | step 10200 | loss 0.0091 0.0757 0.3428 1.0771 | lr 5.6e-04 | norm 0.1202 | dt 0.034 | |
| type train | step 10210 | loss 0.0094 0.0785 0.3577 1.1028 | lr 5.6e-04 | norm 0.1055 | dt 0.034 | |
| type train | step 10220 | loss 0.0090 0.0751 0.3379 1.0555 | lr 5.6e-04 | norm 0.1092 | dt 0.033 | |
| type train | step 10230 | loss 0.0092 0.0765 0.3548 1.0955 | lr 5.6e-04 | norm 0.1058 | dt 0.033 | |
| type train | step 10240 | loss 0.0091 0.0787 0.3568 1.0990 | lr 5.6e-04 | norm 0.1065 | dt 0.033 | |
| type train | step 10250 | loss 0.0091 0.0794 0.3626 1.1169 | lr 5.6e-04 | norm 0.1119 | dt 0.032 | |
| type train | step 10260 | loss 0.0093 0.0773 0.3535 1.1016 | lr 5.6e-04 | norm 0.1136 | dt 0.025 | |
| type train | step 10270 | loss 0.0092 0.0792 0.3661 1.1282 | lr 5.6e-04 | norm 0.1202 | dt 0.026 | |
| type train | step 10280 | loss 0.0091 0.0787 0.3638 1.1165 | lr 5.6e-04 | norm 0.1215 | dt 0.025 | |
| type train | step 10290 | loss 0.0092 0.0782 0.3628 1.1320 | lr 5.6e-04 | norm 0.1042 | dt 0.025 | |
| type train | step 10300 | loss 0.0089 0.0766 0.3442 1.0715 | lr 5.6e-04 | norm 0.1003 | dt 0.026 | |
| type train | step 10310 | loss 0.0089 0.0786 0.3557 1.0998 | lr 5.5e-04 | norm 0.1283 | dt 0.026 | |
| type train | step 10320 | loss 0.0094 0.0771 0.3519 1.0931 | lr 5.5e-04 | norm 0.1392 | dt 0.025 | |
| type train | step 10330 | loss 0.0090 0.0793 0.3700 1.1426 | lr 5.5e-04 | norm 0.1136 | dt 0.025 | |
| type train | step 10340 | loss 0.0091 0.0774 0.3477 1.0776 | lr 5.5e-04 | norm 0.1286 | dt 0.026 | |
| type train | step 10350 | loss 0.0092 0.0778 0.3589 1.1156 | lr 5.5e-04 | norm 0.1189 | dt 0.026 | |
| type train | step 10360 | loss 0.0089 0.0766 0.3475 1.0748 | lr 5.5e-04 | norm 0.1020 | dt 0.025 | |
| type train | step 10370 | loss 0.0090 0.0792 0.3688 1.1346 | lr 5.5e-04 | norm 0.1201 | dt 0.027 | |
| type train | step 10380 | loss 0.0090 0.0780 0.3544 1.0970 | lr 5.5e-04 | norm 0.1075 | dt 0.026 | |
| type train | step 10390 | loss 0.0091 0.0792 0.3621 1.1211 | lr 5.5e-04 | norm 0.1143 | dt 0.026 | |
| type train | step 10400 | loss 0.0091 0.0757 0.3441 1.0742 | lr 5.5e-04 | norm 0.1069 | dt 0.026 | |
| type train | step 10410 | loss 0.0093 0.0793 0.3658 1.1324 | lr 5.5e-04 | norm 0.1338 | dt 0.025 | |
| type train | step 10420 | loss 0.0090 0.0772 0.3470 1.0683 | lr 5.5e-04 | norm 0.1089 | dt 0.025 | |
| type train | step 10430 | loss 0.0088 0.0773 0.3547 1.0983 | lr 5.5e-04 | norm 0.1232 | dt 0.026 | |
| type train | step 10440 | loss 0.0090 0.0770 0.3480 1.0763 | lr 5.5e-04 | norm 0.0969 | dt 0.026 | |
| type train | step 10450 | loss 0.0094 0.0806 0.3729 1.1540 | lr 5.4e-04 | norm 0.1126 | dt 0.025 | |
| type train | step 10460 | loss 0.0089 0.0770 0.3519 1.0830 | lr 5.4e-04 | norm 0.1171 | dt 0.026 | |
| type train | step 10470 | loss 0.0088 0.0747 0.3415 1.0686 | lr 5.4e-04 | norm 0.1104 | dt 0.026 | |
| type train | step 10480 | loss 0.0091 0.0780 0.3547 1.0841 | lr 5.4e-04 | norm 0.1128 | dt 0.025 | |
| type train | step 10490 | loss 0.0092 0.0782 0.3463 1.0707 | lr 5.4e-04 | norm 0.1082 | dt 0.026 | |
| type train | step 10500 | loss 0.0090 0.0766 0.3480 1.0805 | lr 5.4e-04 | norm 0.0945 | dt 0.026 | |
| type train | step 10510 | loss 0.0097 0.0778 0.3513 1.0975 | lr 5.4e-04 | norm 0.1438 | dt 0.026 | |
| type train | step 10520 | loss 0.0091 0.0791 0.3575 1.1102 | lr 5.4e-04 | norm 0.1286 | dt 0.025 | |
| type train | step 10530 | loss 0.0091 0.0773 0.3571 1.1065 | lr 5.4e-04 | norm 0.1307 | dt 0.025 | |
| type train | step 10540 | loss 0.0088 0.0781 0.3513 1.0716 | lr 5.4e-04 | norm 0.1279 | dt 0.026 | |
| type train | step 10550 | loss 0.0091 0.0782 0.3499 1.0829 | lr 5.4e-04 | norm 0.1155 | dt 0.027 | |
| type train | step 10560 | loss 0.0090 0.0768 0.3523 1.0972 | lr 5.4e-04 | norm 0.1182 | dt 0.026 | |
| type train | step 10570 | loss 0.0096 0.0800 0.3637 1.1280 | lr 5.4e-04 | norm 0.1057 | dt 0.026 | |
| type train | step 10580 | loss 0.0093 0.0772 0.3500 1.0927 | lr 5.3e-04 | norm 0.1161 | dt 0.026 | |
| type train | step 10590 | loss 0.0089 0.0773 0.3573 1.0963 | lr 5.3e-04 | norm 0.1236 | dt 0.027 | |
| type train | step 10600 | loss 0.0088 0.0773 0.3493 1.0744 | lr 5.3e-04 | norm 0.1177 | dt 0.028 | |
| type train | step 10610 | loss 0.0092 0.0807 0.3614 1.1009 | lr 5.3e-04 | norm 0.1126 | dt 0.035 | |
| type train | step 10620 | loss 0.0091 0.0762 0.3442 1.0734 | lr 5.3e-04 | norm 0.0982 | dt 0.035 | |
| type train | step 10630 | loss 0.0090 0.0763 0.3469 1.0877 | lr 5.3e-04 | norm 0.0982 | dt 0.035 | |
| type train | step 10640 | loss 0.0092 0.0765 0.3436 1.0696 | lr 5.3e-04 | norm 0.0956 | dt 0.035 | |
| type train | step 10650 | loss 0.0093 0.0777 0.3592 1.1208 | lr 5.3e-04 | norm 0.1393 | dt 0.035 | |
| type train | step 10660 | loss 0.0089 0.0762 0.3413 1.0668 | lr 5.3e-04 | norm 0.1256 | dt 0.035 | |
| type train | step 10670 | loss 0.0092 0.0796 0.3525 1.0831 | lr 5.3e-04 | norm 0.1232 | dt 0.035 | |
| type train | step 10680 | loss 0.0092 0.0771 0.3527 1.0941 | lr 5.3e-04 | norm 0.1140 | dt 0.035 | |
| type train | step 10690 | loss 0.0092 0.0759 0.3491 1.0924 | lr 5.3e-04 | norm 0.1184 | dt 0.035 | |
| type train | step 10700 | loss 0.0092 0.0784 0.3603 1.1111 | lr 5.3e-04 | norm 0.1103 | dt 0.035 | |
| type train | step 10710 | loss 0.0089 0.0776 0.3575 1.1050 | lr 5.3e-04 | norm 0.1007 | dt 0.034 | |
| type train | step 10720 | loss 0.0089 0.0780 0.3513 1.0891 | lr 5.2e-04 | norm 0.1338 | dt 0.034 | |
| type train | step 10730 | loss 0.0089 0.0751 0.3380 1.0516 | lr 5.2e-04 | norm 0.1181 | dt 0.033 | |
| type train | step 10740 | loss 0.0090 0.0777 0.3471 1.0862 | lr 5.2e-04 | norm 0.1226 | dt 0.034 | |
| type train | step 10750 | loss 0.0094 0.0771 0.3574 1.1234 | lr 5.2e-04 | norm 0.1204 | dt 0.033 | |
| type train | step 10760 | loss 0.0089 0.0775 0.3580 1.1044 | lr 5.2e-04 | norm 0.1136 | dt 0.028 | |
| type train | step 10770 | loss 0.0093 0.0773 0.3463 1.0743 | lr 5.2e-04 | norm 0.1066 | dt 0.027 | |
| type train | step 10780 | loss 0.0090 0.0779 0.3540 1.0885 | lr 5.2e-04 | norm 0.1119 | dt 0.026 | |
| type train | step 10790 | loss 0.0090 0.0780 0.3488 1.0907 | lr 5.2e-04 | norm 0.1356 | dt 0.026 | |
| type train | step 10800 | loss 0.0090 0.0791 0.3606 1.1095 | lr 5.2e-04 | norm 0.1048 | dt 0.025 | |
| type train | step 10810 | loss 0.0090 0.0755 0.3425 1.0756 | lr 5.2e-04 | norm 0.1196 | dt 0.025 | |
| type train | step 10820 | loss 0.0093 0.0784 0.3572 1.1022 | lr 5.2e-04 | norm 0.1031 | dt 0.026 | |
| type train | step 10830 | loss 0.0089 0.0750 0.3376 1.0544 | lr 5.2e-04 | norm 0.1109 | dt 0.025 | |
| type train | step 10840 | loss 0.0091 0.0763 0.3546 1.0945 | lr 5.2e-04 | norm 0.1020 | dt 0.026 | |
| type train | step 10850 | loss 0.0090 0.0784 0.3562 1.0979 | lr 5.2e-04 | norm 0.1066 | dt 0.026 | |
| type train | step 10860 | loss 0.0090 0.0793 0.3622 1.1156 | lr 5.1e-04 | norm 0.1096 | dt 0.025 | |
| type train | step 10870 | loss 0.0092 0.0771 0.3531 1.1004 | lr 5.1e-04 | norm 0.1190 | dt 0.025 | |
| type train | step 10880 | loss 0.0091 0.0790 0.3659 1.1274 | lr 5.1e-04 | norm 0.1173 | dt 0.026 | |
| type train | step 10890 | loss 0.0090 0.0785 0.3635 1.1159 | lr 5.1e-04 | norm 0.1222 | dt 0.026 | |
| type train | step 10900 | loss 0.0091 0.0781 0.3625 1.1303 | lr 5.1e-04 | norm 0.1078 | dt 0.025 | |
| type train | step 10910 | loss 0.0088 0.0765 0.3439 1.0706 | lr 5.1e-04 | norm 0.1026 | dt 0.026 | |
| type train | step 10920 | loss 0.0088 0.0784 0.3551 1.0989 | lr 5.1e-04 | norm 0.1254 | dt 0.026 | |
| type train | step 10930 | loss 0.0093 0.0768 0.3514 1.0916 | lr 5.1e-04 | norm 0.1354 | dt 0.025 | |
| type train | step 10940 | loss 0.0089 0.0790 0.3697 1.1417 | lr 5.1e-04 | norm 0.1149 | dt 0.026 | |
| type train | step 10950 | loss 0.0090 0.0773 0.3471 1.0759 | lr 5.1e-04 | norm 0.1333 | dt 0.026 | |
| type train | step 10960 | loss 0.0091 0.0776 0.3584 1.1144 | lr 5.1e-04 | norm 0.1197 | dt 0.025 | |
| type train | step 10970 | loss 0.0088 0.0765 0.3472 1.0746 | lr 5.1e-04 | norm 0.0998 | dt 0.025 | |
| type train | step 10980 | loss 0.0089 0.0789 0.3686 1.1330 | lr 5.1e-04 | norm 0.1161 | dt 0.029 | |
| type train | step 10990 | loss 0.0089 0.0777 0.3541 1.0961 | lr 5.0e-04 | norm 0.1006 | dt 0.026 | |
| type train | step 11000 | loss 0.0090 0.0790 0.3616 1.1197 | lr 5.0e-04 | norm 0.1119 | dt 0.025 | |
| type train | step 11010 | loss 0.0090 0.0756 0.3437 1.0730 | lr 5.0e-04 | norm 0.1127 | dt 0.026 | |
| type train | step 11020 | loss 0.0092 0.0791 0.3654 1.1307 | lr 5.0e-04 | norm 0.1359 | dt 0.026 | |
| type train | step 11030 | loss 0.0089 0.0771 0.3467 1.0668 | lr 5.0e-04 | norm 0.1110 | dt 0.025 | |
| type train | step 11040 | loss 0.0087 0.0771 0.3545 1.0967 | lr 5.0e-04 | norm 0.1298 | dt 0.026 | |
| type train | step 11050 | loss 0.0089 0.0770 0.3477 1.0753 | lr 5.0e-04 | norm 0.0942 | dt 0.026 | |
| type train | step 11060 | loss 0.0093 0.0804 0.3727 1.1535 | lr 5.0e-04 | norm 0.1018 | dt 0.025 | |
| type train | step 11070 | loss 0.0088 0.0769 0.3512 1.0818 | lr 5.0e-04 | norm 0.1202 | dt 0.025 | |
| type train | step 11080 | loss 0.0087 0.0746 0.3411 1.0675 | lr 5.0e-04 | norm 0.1063 | dt 0.026 | |
| type train | step 11090 | loss 0.0090 0.0779 0.3542 1.0828 | lr 5.0e-04 | norm 0.1213 | dt 0.026 | |
| type train | step 11100 | loss 0.0091 0.0781 0.3460 1.0689 | lr 5.0e-04 | norm 0.1095 | dt 0.026 | |
| type train | step 11110 | loss 0.0089 0.0765 0.3477 1.0791 | lr 5.0e-04 | norm 0.0966 | dt 0.025 | |
| type train | step 11120 | loss 0.0096 0.0776 0.3509 1.0965 | lr 5.0e-04 | norm 0.1441 | dt 0.026 | |
| type train | step 11130 | loss 0.0090 0.0789 0.3571 1.1090 | lr 4.9e-04 | norm 0.1325 | dt 0.025 | |
| type train | step 11140 | loss 0.0090 0.0771 0.3566 1.1055 | lr 4.9e-04 | norm 0.1270 | dt 0.025 | |
| type train | step 11150 | loss 0.0087 0.0779 0.3508 1.0699 | lr 4.9e-04 | norm 0.1278 | dt 0.025 | |
| type train | step 11160 | loss 0.0089 0.0781 0.3497 1.0815 | lr 4.9e-04 | norm 0.1178 | dt 0.026 | |
| type train | step 11170 | loss 0.0089 0.0766 0.3519 1.0960 | lr 4.9e-04 | norm 0.1161 | dt 0.027 | |
| type train | step 11180 | loss 0.0094 0.0798 0.3631 1.1274 | lr 4.9e-04 | norm 0.1063 | dt 0.035 | |
| type train | step 11190 | loss 0.0092 0.0770 0.3497 1.0923 | lr 4.9e-04 | norm 0.1127 | dt 0.035 | |
| type train | step 11200 | loss 0.0088 0.0771 0.3569 1.0956 | lr 4.9e-04 | norm 0.1159 | dt 0.035 | |
| type train | step 11210 | loss 0.0087 0.0771 0.3489 1.0735 | lr 4.9e-04 | norm 0.1189 | dt 0.035 | |
| type train | step 11220 | loss 0.0091 0.0806 0.3613 1.0991 | lr 4.9e-04 | norm 0.1168 | dt 0.035 | |
| type train | step 11230 | loss 0.0090 0.0761 0.3438 1.0729 | lr 4.9e-04 | norm 0.1057 | dt 0.035 | |
| type train | step 11240 | loss 0.0089 0.0762 0.3464 1.0865 | lr 4.9e-04 | norm 0.0970 | dt 0.035 | |
| type train | step 11250 | loss 0.0091 0.0763 0.3434 1.0690 | lr 4.9e-04 | norm 0.1032 | dt 0.035 | |
| type train | step 11260 | loss 0.0092 0.0775 0.3586 1.1200 | lr 4.9e-04 | norm 0.1445 | dt 0.032 | |
| type train | step 11270 | loss 0.0089 0.0759 0.3407 1.0654 | lr 4.8e-04 | norm 0.1256 | dt 0.031 | |
| type train | step 11280 | loss 0.0091 0.0794 0.3520 1.0814 | lr 4.8e-04 | norm 0.1151 | dt 0.031 | |
| type train | step 11290 | loss 0.0091 0.0769 0.3522 1.0921 | lr 4.8e-04 | norm 0.1176 | dt 0.031 | |
| type train | step 11300 | loss 0.0091 0.0757 0.3490 1.0919 | lr 4.8e-04 | norm 0.1139 | dt 0.031 | |
| type train | step 11310 | loss 0.0091 0.0782 0.3599 1.1100 | lr 4.8e-04 | norm 0.1066 | dt 0.031 | |
| type train | step 11320 | loss 0.0088 0.0774 0.3574 1.1040 | lr 4.8e-04 | norm 0.1036 | dt 0.031 | |
| type train | step 11330 | loss 0.0088 0.0777 0.3508 1.0876 | lr 4.8e-04 | norm 0.1367 | dt 0.029 | |
| type train | step 11340 | loss 0.0088 0.0749 0.3377 1.0496 | lr 4.8e-04 | norm 0.1107 | dt 0.029 | |
| type train | step 11350 | loss 0.0089 0.0775 0.3468 1.0850 | lr 4.8e-04 | norm 0.1199 | dt 0.025 | |
| type train | step 11360 | loss 0.0093 0.0769 0.3568 1.1223 | lr 4.8e-04 | norm 0.1161 | dt 0.026 | |
| type train | step 11370 | loss 0.0089 0.0772 0.3577 1.1034 | lr 4.8e-04 | norm 0.1067 | dt 0.026 | |
| type train | step 11380 | loss 0.0092 0.0771 0.3461 1.0726 | lr 4.8e-04 | norm 0.1093 | dt 0.025 | |
| type train | step 11390 | loss 0.0089 0.0776 0.3537 1.0871 | lr 4.8e-04 | norm 0.1086 | dt 0.025 | |
| type train | step 11400 | loss 0.0089 0.0778 0.3484 1.0879 | lr 4.8e-04 | norm 0.1273 | dt 0.025 | |
| type train | step 11410 | loss 0.0089 0.0789 0.3603 1.1076 | lr 4.7e-04 | norm 0.1057 | dt 0.026 | |
| type train | step 11420 | loss 0.0089 0.0753 0.3420 1.0744 | lr 4.7e-04 | norm 0.1127 | dt 0.025 | |
| type train | step 11430 | loss 0.0092 0.0783 0.3568 1.1012 | lr 4.7e-04 | norm 0.1005 | dt 0.025 | |
| type train | step 11440 | loss 0.0088 0.0749 0.3375 1.0530 | lr 4.7e-04 | norm 0.1082 | dt 0.025 | |
| type train | step 11450 | loss 0.0091 0.0761 0.3546 1.0931 | lr 4.7e-04 | norm 0.1087 | dt 0.025 | |
| type train | step 11460 | loss 0.0089 0.0782 0.3558 1.0966 | lr 4.7e-04 | norm 0.1047 | dt 0.025 | |
| type train | step 11470 | loss 0.0089 0.0791 0.3615 1.1141 | lr 4.7e-04 | norm 0.1103 | dt 0.026 | |
| type train | step 11480 | loss 0.0092 0.0769 0.3527 1.0994 | lr 4.7e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 11490 | loss 0.0090 0.0789 0.3655 1.1265 | lr 4.7e-04 | norm 0.1101 | dt 0.025 | |
| type train | step 11500 | loss 0.0089 0.0783 0.3631 1.1153 | lr 4.7e-04 | norm 0.1257 | dt 0.025 | |
| type train | step 11510 | loss 0.0090 0.0779 0.3620 1.1286 | lr 4.7e-04 | norm 0.1082 | dt 0.025 | |
| type train | step 11520 | loss 0.0087 0.0763 0.3437 1.0691 | lr 4.7e-04 | norm 0.0993 | dt 0.025 | |
| type train | step 11530 | loss 0.0087 0.0783 0.3547 1.0977 | lr 4.7e-04 | norm 0.1323 | dt 0.025 | |
| type train | step 11540 | loss 0.0092 0.0766 0.3509 1.0901 | lr 4.6e-04 | norm 0.1331 | dt 0.026 | |
| type train | step 11550 | loss 0.0089 0.0789 0.3694 1.1403 | lr 4.6e-04 | norm 0.1106 | dt 0.025 | |
| type train | step 11560 | loss 0.0089 0.0772 0.3467 1.0750 | lr 4.6e-04 | norm 0.1434 | dt 0.026 | |
| type train | step 11570 | loss 0.0090 0.0774 0.3580 1.1122 | lr 4.6e-04 | norm 0.1255 | dt 0.026 | |
| type train | step 11580 | loss 0.0087 0.0763 0.3470 1.0738 | lr 4.6e-04 | norm 0.1065 | dt 0.025 | |
| type train | step 11590 | loss 0.0088 0.0788 0.3683 1.1320 | lr 4.6e-04 | norm 0.1190 | dt 0.027 | |
| type train | step 11600 | loss 0.0088 0.0775 0.3538 1.0954 | lr 4.6e-04 | norm 0.1000 | dt 0.026 | |
| type train | step 11610 | loss 0.0089 0.0788 0.3612 1.1188 | lr 4.6e-04 | norm 0.1141 | dt 0.025 | |
| type train | step 11620 | loss 0.0089 0.0754 0.3433 1.0721 | lr 4.6e-04 | norm 0.1079 | dt 0.026 | |
| type train | step 11630 | loss 0.0091 0.0790 0.3651 1.1291 | lr 4.6e-04 | norm 0.1345 | dt 0.026 | |
| type train | step 11640 | loss 0.0088 0.0770 0.3463 1.0652 | lr 4.6e-04 | norm 0.1151 | dt 0.026 | |
| type train | step 11650 | loss 0.0086 0.0769 0.3540 1.0950 | lr 4.6e-04 | norm 0.1190 | dt 0.026 | |
| type train | step 11660 | loss 0.0088 0.0768 0.3473 1.0749 | lr 4.6e-04 | norm 0.1037 | dt 0.025 | |
| type train | step 11670 | loss 0.0092 0.0803 0.3722 1.1525 | lr 4.6e-04 | norm 0.1011 | dt 0.025 | |
| type train | step 11680 | loss 0.0087 0.0768 0.3507 1.0808 | lr 4.5e-04 | norm 0.1182 | dt 0.026 | |
| type train | step 11690 | loss 0.0087 0.0745 0.3409 1.0662 | lr 4.5e-04 | norm 0.1067 | dt 0.026 | |
| type train | step 11700 | loss 0.0089 0.0776 0.3539 1.0812 | lr 4.5e-04 | norm 0.1118 | dt 0.025 | |
| type train | step 11710 | loss 0.0090 0.0780 0.3456 1.0669 | lr 4.5e-04 | norm 0.1071 | dt 0.026 | |
| type train | step 11720 | loss 0.0088 0.0764 0.3473 1.0779 | lr 4.5e-04 | norm 0.0915 | dt 0.026 | |
| type train | step 11730 | loss 0.0095 0.0773 0.3506 1.0946 | lr 4.5e-04 | norm 0.1411 | dt 0.026 | |
| type train | step 11740 | loss 0.0090 0.0787 0.3564 1.1074 | lr 4.5e-04 | norm 0.1284 | dt 0.028 | |
| type train | step 11750 | loss 0.0089 0.0769 0.3562 1.1047 | lr 4.5e-04 | norm 0.1261 | dt 0.035 | |
| type train | step 11760 | loss 0.0086 0.0777 0.3507 1.0686 | lr 4.5e-04 | norm 0.1239 | dt 0.032 | |
| type train | step 11770 | loss 0.0089 0.0779 0.3494 1.0799 | lr 4.5e-04 | norm 0.1118 | dt 0.032 | |
| type train | step 11780 | loss 0.0088 0.0765 0.3515 1.0952 | lr 4.5e-04 | norm 0.1149 | dt 0.032 | |
| type train | step 11790 | loss 0.0093 0.0796 0.3627 1.1260 | lr 4.5e-04 | norm 0.1068 | dt 0.032 | |
| type train | step 11800 | loss 0.0091 0.0768 0.3494 1.0921 | lr 4.5e-04 | norm 0.1172 | dt 0.033 | |
| type train | step 11810 | loss 0.0088 0.0769 0.3564 1.0941 | lr 4.5e-04 | norm 0.1141 | dt 0.032 | |
| type train | step 11820 | loss 0.0086 0.0770 0.3485 1.0719 | lr 4.4e-04 | norm 0.1234 | dt 0.032 | |
| type train | step 11830 | loss 0.0090 0.0804 0.3610 1.0978 | lr 4.4e-04 | norm 0.1105 | dt 0.031 | |
| type train | step 11840 | loss 0.0089 0.0759 0.3435 1.0724 | lr 4.4e-04 | norm 0.0937 | dt 0.031 | |
| type train | step 11850 | loss 0.0088 0.0760 0.3459 1.0857 | lr 4.4e-04 | norm 0.0994 | dt 0.031 | |
| type train | step 11860 | loss 0.0090 0.0762 0.3431 1.0681 | lr 4.4e-04 | norm 0.0934 | dt 0.032 | |
| type train | step 11870 | loss 0.0091 0.0774 0.3582 1.1180 | lr 4.4e-04 | norm 0.1392 | dt 0.031 | |
| type train | step 11880 | loss 0.0088 0.0758 0.3404 1.0644 | lr 4.4e-04 | norm 0.1238 | dt 0.031 | |
| type train | step 11890 | loss 0.0090 0.0793 0.3516 1.0797 | lr 4.4e-04 | norm 0.1213 | dt 0.031 | |
| type train | step 11900 | loss 0.0091 0.0768 0.3518 1.0917 | lr 4.4e-04 | norm 0.1123 | dt 0.031 | |
| type train | step 11910 | loss 0.0090 0.0756 0.3486 1.0906 | lr 4.4e-04 | norm 0.1141 | dt 0.031 | |
| type train | step 11920 | loss 0.0090 0.0780 0.3597 1.1087 | lr 4.4e-04 | norm 0.0995 | dt 0.030 | |
| type train | step 11930 | loss 0.0087 0.0772 0.3569 1.1031 | lr 4.4e-04 | norm 0.0977 | dt 0.031 | |
| type train | step 11940 | loss 0.0087 0.0775 0.3503 1.0867 | lr 4.4e-04 | norm 0.1364 | dt 0.030 | |
| type train | step 11950 | loss 0.0088 0.0747 0.3374 1.0482 | lr 4.4e-04 | norm 0.1173 | dt 0.031 | |
| type train | step 11960 | loss 0.0088 0.0773 0.3465 1.0840 | lr 4.3e-04 | norm 0.1136 | dt 0.029 | |
| type train | step 11970 | loss 0.0092 0.0767 0.3565 1.1206 | lr 4.3e-04 | norm 0.1183 | dt 0.027 | |
| type train | step 11980 | loss 0.0088 0.0771 0.3573 1.1025 | lr 4.3e-04 | norm 0.1060 | dt 0.025 | |
| type train | step 11990 | loss 0.0091 0.0770 0.3458 1.0714 | lr 4.3e-04 | norm 0.1079 | dt 0.025 | |
| type train | step 12000 | loss 0.0088 0.0774 0.3534 1.0867 | lr 4.3e-04 | norm 0.1076 | dt 0.025 | |
| type train | step 12010 | loss 0.0088 0.0777 0.3481 1.0865 | lr 4.3e-04 | norm 0.1249 | dt 0.026 | |
| type train | step 12020 | loss 0.0088 0.0787 0.3597 1.1066 | lr 4.3e-04 | norm 0.1152 | dt 0.025 | |
| type train | step 12030 | loss 0.0088 0.0751 0.3418 1.0724 | lr 4.3e-04 | norm 0.1173 | dt 0.025 | |
| type train | step 12040 | loss 0.0091 0.0782 0.3563 1.1004 | lr 4.3e-04 | norm 0.0982 | dt 0.026 | |
| type train | step 12050 | loss 0.0087 0.0747 0.3372 1.0514 | lr 4.3e-04 | norm 0.1069 | dt 0.025 | |
| type train | step 12060 | loss 0.0089 0.0760 0.3543 1.0926 | lr 4.3e-04 | norm 0.0953 | dt 0.025 | |
| type train | step 12070 | loss 0.0088 0.0780 0.3554 1.0953 | lr 4.3e-04 | norm 0.1015 | dt 0.025 | |
| type train | step 12080 | loss 0.0088 0.0789 0.3611 1.1125 | lr 4.3e-04 | norm 0.1113 | dt 0.025 | |
| type train | step 12090 | loss 0.0091 0.0768 0.3523 1.0982 | lr 4.3e-04 | norm 0.1145 | dt 0.026 | |
| type train | step 12100 | loss 0.0089 0.0787 0.3652 1.1247 | lr 4.2e-04 | norm 0.1131 | dt 0.026 | |
| type train | step 12110 | loss 0.0088 0.0781 0.3630 1.1145 | lr 4.2e-04 | norm 0.1234 | dt 0.025 | |
| type train | step 12120 | loss 0.0089 0.0778 0.3617 1.1268 | lr 4.2e-04 | norm 0.1075 | dt 0.026 | |
| type train | step 12130 | loss 0.0086 0.0761 0.3432 1.0678 | lr 4.2e-04 | norm 0.1013 | dt 0.026 | |
| type train | step 12140 | loss 0.0087 0.0780 0.3541 1.0960 | lr 4.2e-04 | norm 0.1233 | dt 0.025 | |
| type train | step 12150 | loss 0.0091 0.0763 0.3505 1.0886 | lr 4.2e-04 | norm 0.1333 | dt 0.025 | |
| type train | step 12160 | loss 0.0088 0.0787 0.3690 1.1393 | lr 4.2e-04 | norm 0.1070 | dt 0.026 | |
| type train | step 12170 | loss 0.0088 0.0770 0.3464 1.0744 | lr 4.2e-04 | norm 0.1359 | dt 0.026 | |
| type train | step 12180 | loss 0.0089 0.0773 0.3577 1.1107 | lr 4.2e-04 | norm 0.1215 | dt 0.025 | |
| type train | step 12190 | loss 0.0086 0.0762 0.3466 1.0724 | lr 4.2e-04 | norm 0.1012 | dt 0.025 | |
| type train | step 12200 | loss 0.0087 0.0786 0.3678 1.1307 | lr 4.2e-04 | norm 0.1181 | dt 0.027 | |
| type train | step 12210 | loss 0.0087 0.0774 0.3533 1.0944 | lr 4.2e-04 | norm 0.0992 | dt 0.026 | |
| type train | step 12220 | loss 0.0088 0.0786 0.3607 1.1176 | lr 4.2e-04 | norm 0.1095 | dt 0.026 | |
| type train | step 12230 | loss 0.0088 0.0753 0.3430 1.0711 | lr 4.2e-04 | norm 0.1178 | dt 0.025 | |
| type train | step 12240 | loss 0.0090 0.0788 0.3648 1.1278 | lr 4.2e-04 | norm 0.1353 | dt 0.026 | |
| type train | step 12250 | loss 0.0087 0.0768 0.3459 1.0646 | lr 4.1e-04 | norm 0.1086 | dt 0.026 | |
| type train | step 12260 | loss 0.0085 0.0767 0.3538 1.0943 | lr 4.1e-04 | norm 0.1150 | dt 0.035 | |
| type train | step 12270 | loss 0.0087 0.0766 0.3471 1.0742 | lr 4.1e-04 | norm 0.0910 | dt 0.035 | |
| type train | step 12280 | loss 0.0091 0.0800 0.3718 1.1517 | lr 4.1e-04 | norm 0.1019 | dt 0.034 | |
| type train | step 12290 | loss 0.0086 0.0767 0.3502 1.0800 | lr 4.1e-04 | norm 0.1261 | dt 0.034 | |
| type train | step 12300 | loss 0.0086 0.0743 0.3405 1.0653 | lr 4.1e-04 | norm 0.1022 | dt 0.034 | |
| type train | step 12310 | loss 0.0088 0.0775 0.3534 1.0797 | lr 4.1e-04 | norm 0.1149 | dt 0.033 | |
| type train | step 12320 | loss 0.0090 0.0778 0.3453 1.0651 | lr 4.1e-04 | norm 0.1081 | dt 0.033 | |
| type train | step 12330 | loss 0.0087 0.0762 0.3471 1.0771 | lr 4.1e-04 | norm 0.0891 | dt 0.034 | |
| type train | step 12340 | loss 0.0094 0.0770 0.3502 1.0927 | lr 4.1e-04 | norm 0.1422 | dt 0.033 | |
| type train | step 12350 | loss 0.0089 0.0785 0.3560 1.1069 | lr 4.1e-04 | norm 0.1254 | dt 0.032 | |
| type train | step 12360 | loss 0.0088 0.0767 0.3557 1.1031 | lr 4.1e-04 | norm 0.1208 | dt 0.032 | |
| type train | step 12370 | loss 0.0086 0.0775 0.3502 1.0676 | lr 4.1e-04 | norm 0.1229 | dt 0.032 | |
| type train | step 12380 | loss 0.0088 0.0778 0.3490 1.0791 | lr 4.1e-04 | norm 0.1092 | dt 0.032 | |
| type train | step 12390 | loss 0.0087 0.0763 0.3511 1.0955 | lr 4.0e-04 | norm 0.1110 | dt 0.031 | |
| type train | step 12400 | loss 0.0092 0.0793 0.3624 1.1248 | lr 4.0e-04 | norm 0.1017 | dt 0.032 | |
| type train | step 12410 | loss 0.0090 0.0766 0.3493 1.0917 | lr 4.0e-04 | norm 0.1106 | dt 0.032 | |
| type train | step 12420 | loss 0.0087 0.0768 0.3559 1.0936 | lr 4.0e-04 | norm 0.1134 | dt 0.032 | |
| type train | step 12430 | loss 0.0085 0.0768 0.3481 1.0706 | lr 4.0e-04 | norm 0.1065 | dt 0.032 | |
| type train | step 12440 | loss 0.0089 0.0802 0.3605 1.0963 | lr 4.0e-04 | norm 0.1091 | dt 0.031 | |
| type train | step 12450 | loss 0.0088 0.0758 0.3431 1.0718 | lr 4.0e-04 | norm 0.0971 | dt 0.031 | |
| type train | step 12460 | loss 0.0087 0.0759 0.3456 1.0845 | lr 4.0e-04 | norm 0.0991 | dt 0.032 | |
| type train | step 12470 | loss 0.0089 0.0760 0.3430 1.0675 | lr 4.0e-04 | norm 0.0935 | dt 0.031 | |
| type train | step 12480 | loss 0.0090 0.0771 0.3578 1.1171 | lr 4.0e-04 | norm 0.1314 | dt 0.032 | |
| type train | step 12490 | loss 0.0087 0.0756 0.3400 1.0633 | lr 4.0e-04 | norm 0.1242 | dt 0.031 | |
| type train | step 12500 | loss 0.0089 0.0792 0.3511 1.0790 | lr 4.0e-04 | norm 0.1259 | dt 0.031 | |
| type train | step 12510 | loss 0.0090 0.0766 0.3513 1.0908 | lr 4.0e-04 | norm 0.1145 | dt 0.026 | |
| type train | step 12520 | loss 0.0089 0.0754 0.3483 1.0896 | lr 4.0e-04 | norm 0.1106 | dt 0.025 | |
| type train | step 12530 | loss 0.0089 0.0778 0.3594 1.1079 | lr 3.9e-04 | norm 0.1020 | dt 0.025 | |
| type train | step 12540 | loss 0.0086 0.0770 0.3567 1.1024 | lr 3.9e-04 | norm 0.0930 | dt 0.026 | |
| type train | step 12550 | loss 0.0087 0.0773 0.3498 1.0854 | lr 3.9e-04 | norm 0.1261 | dt 0.025 | |
| type train | step 12560 | loss 0.0087 0.0746 0.3371 1.0472 | lr 3.9e-04 | norm 0.1080 | dt 0.025 | |
| type train | step 12570 | loss 0.0087 0.0771 0.3460 1.0830 | lr 3.9e-04 | norm 0.1160 | dt 0.026 | |
| type train | step 12580 | loss 0.0091 0.0764 0.3559 1.1193 | lr 3.9e-04 | norm 0.1189 | dt 0.026 | |
| type train | step 12590 | loss 0.0087 0.0770 0.3569 1.1022 | lr 3.9e-04 | norm 0.1071 | dt 0.025 | |
| type train | step 12600 | loss 0.0090 0.0769 0.3457 1.0704 | lr 3.9e-04 | norm 0.1081 | dt 0.025 | |
| type train | step 12610 | loss 0.0087 0.0772 0.3529 1.0851 | lr 3.9e-04 | norm 0.1047 | dt 0.026 | |
| type train | step 12620 | loss 0.0087 0.0775 0.3476 1.0863 | lr 3.9e-04 | norm 0.1301 | dt 0.025 | |
| type train | step 12630 | loss 0.0088 0.0785 0.3591 1.1060 | lr 3.9e-04 | norm 0.1043 | dt 0.025 | |
| type train | step 12640 | loss 0.0087 0.0750 0.3416 1.0709 | lr 3.9e-04 | norm 0.1174 | dt 0.025 | |
| type train | step 12650 | loss 0.0090 0.0779 0.3559 1.0990 | lr 3.9e-04 | norm 0.0976 | dt 0.026 | |
| type train | step 12660 | loss 0.0086 0.0746 0.3370 1.0502 | lr 3.9e-04 | norm 0.1102 | dt 0.026 | |
| type train | step 12670 | loss 0.0089 0.0757 0.3542 1.0920 | lr 3.9e-04 | norm 0.0961 | dt 0.025 | |
| type train | step 12680 | loss 0.0088 0.0778 0.3550 1.0940 | lr 3.8e-04 | norm 0.0948 | dt 0.026 | |
| type train | step 12690 | loss 0.0087 0.0787 0.3605 1.1110 | lr 3.8e-04 | norm 0.1098 | dt 0.026 | |
| type train | step 12700 | loss 0.0090 0.0767 0.3518 1.0966 | lr 3.8e-04 | norm 0.1131 | dt 0.026 | |
| type train | step 12710 | loss 0.0088 0.0784 0.3648 1.1242 | lr 3.8e-04 | norm 0.1141 | dt 0.026 | |
| type train | step 12720 | loss 0.0087 0.0780 0.3628 1.1138 | lr 3.8e-04 | norm 0.1269 | dt 0.025 | |
| type train | step 12730 | loss 0.0088 0.0776 0.3617 1.1257 | lr 3.8e-04 | norm 0.1066 | dt 0.025 | |
| type train | step 12740 | loss 0.0085 0.0759 0.3427 1.0663 | lr 3.8e-04 | norm 0.0946 | dt 0.026 | |
| type train | step 12750 | loss 0.0086 0.0778 0.3536 1.0948 | lr 3.8e-04 | norm 0.1214 | dt 0.026 | |
| type train | step 12760 | loss 0.0090 0.0761 0.3499 1.0872 | lr 3.8e-04 | norm 0.1334 | dt 0.028 | |
| type train | step 12770 | loss 0.0087 0.0785 0.3685 1.1383 | lr 3.8e-04 | norm 0.1073 | dt 0.029 | |
| type train | step 12780 | loss 0.0087 0.0769 0.3460 1.0732 | lr 3.8e-04 | norm 0.1365 | dt 0.035 | |
| type train | step 12790 | loss 0.0088 0.0772 0.3574 1.1095 | lr 3.8e-04 | norm 0.1191 | dt 0.035 | |
| type train | step 12800 | loss 0.0085 0.0761 0.3463 1.0710 | lr 3.8e-04 | norm 0.1004 | dt 0.035 | |
| type train | step 12810 | loss 0.0086 0.0784 0.3675 1.1295 | lr 3.8e-04 | norm 0.1108 | dt 0.037 | |
| type train | step 12820 | loss 0.0086 0.0772 0.3530 1.0937 | lr 3.8e-04 | norm 0.1009 | dt 0.035 | |
| type train | step 12830 | loss 0.0087 0.0785 0.3602 1.1168 | lr 3.7e-04 | norm 0.1134 | dt 0.035 | |
| type train | step 12840 | loss 0.0087 0.0751 0.3426 1.0703 | lr 3.7e-04 | norm 0.1056 | dt 0.035 | |
| type train | step 12850 | loss 0.0090 0.0786 0.3643 1.1264 | lr 3.7e-04 | norm 0.1333 | dt 0.035 | |
| type train | step 12860 | loss 0.0086 0.0767 0.3456 1.0635 | lr 3.7e-04 | norm 0.1071 | dt 0.035 | |
| type train | step 12870 | loss 0.0084 0.0765 0.3533 1.0936 | lr 3.7e-04 | norm 0.1116 | dt 0.035 | |
| type train | step 12880 | loss 0.0086 0.0765 0.3468 1.0734 | lr 3.7e-04 | norm 0.0893 | dt 0.035 | |
| type train | step 12890 | loss 0.0090 0.0798 0.3715 1.1501 | lr 3.7e-04 | norm 0.1086 | dt 0.035 | |
| type train | step 12900 | loss 0.0085 0.0766 0.3499 1.0791 | lr 3.7e-04 | norm 0.1248 | dt 0.035 | |
| type train | step 12910 | loss 0.0085 0.0741 0.3402 1.0643 | lr 3.7e-04 | norm 0.0997 | dt 0.034 | |
| type train | step 12920 | loss 0.0087 0.0773 0.3530 1.0785 | lr 3.7e-04 | norm 0.1072 | dt 0.034 | |
| type train | step 12930 | loss 0.0089 0.0777 0.3451 1.0643 | lr 3.7e-04 | norm 0.1032 | dt 0.033 | |
| type train | step 12940 | loss 0.0086 0.0761 0.3466 1.0763 | lr 3.7e-04 | norm 0.0921 | dt 0.034 | |
| type train | step 12950 | loss 0.0093 0.0767 0.3500 1.0912 | lr 3.7e-04 | norm 0.1403 | dt 0.033 | |
| type train | step 12960 | loss 0.0088 0.0783 0.3559 1.1053 | lr 3.7e-04 | norm 0.1258 | dt 0.033 | |
| type train | step 12970 | loss 0.0087 0.0765 0.3554 1.1017 | lr 3.7e-04 | norm 0.1185 | dt 0.033 | |
| type train | step 12980 | loss 0.0085 0.0773 0.3497 1.0666 | lr 3.6e-04 | norm 0.1255 | dt 0.032 | |
| type train | step 12990 | loss 0.0087 0.0777 0.3487 1.0780 | lr 3.6e-04 | norm 0.1066 | dt 0.032 | |
| type train | step 13000 | loss 0.0086 0.0762 0.3508 1.0951 | lr 3.6e-04 | norm 0.1189 | dt 0.033 | |
| type train | step 13010 | loss 0.0092 0.0791 0.3620 1.1235 | lr 3.6e-04 | norm 0.1022 | dt 0.026 | |
| type train | step 13020 | loss 0.0089 0.0764 0.3489 1.0915 | lr 3.6e-04 | norm 0.1082 | dt 0.026 | |
| type train | step 13030 | loss 0.0086 0.0766 0.3555 1.0925 | lr 3.6e-04 | norm 0.1118 | dt 0.026 | |
| type train | step 13040 | loss 0.0085 0.0766 0.3477 1.0696 | lr 3.6e-04 | norm 0.1088 | dt 0.025 | |
| type train | step 13050 | loss 0.0088 0.0801 0.3601 1.0957 | lr 3.6e-04 | norm 0.1027 | dt 0.026 | |
| type train | step 13060 | loss 0.0087 0.0757 0.3426 1.0711 | lr 3.6e-04 | norm 0.1030 | dt 0.025 | |
| type train | step 13070 | loss 0.0086 0.0757 0.3454 1.0829 | lr 3.6e-04 | norm 0.0997 | dt 0.025 | |
| type train | step 13080 | loss 0.0089 0.0759 0.3426 1.0666 | lr 3.6e-04 | norm 0.0924 | dt 0.026 | |
| type train | step 13090 | loss 0.0090 0.0770 0.3574 1.1153 | lr 3.6e-04 | norm 0.1301 | dt 0.026 | |
| type train | step 13100 | loss 0.0086 0.0754 0.3395 1.0624 | lr 3.6e-04 | norm 0.1201 | dt 0.025 | |
| type train | step 13110 | loss 0.0088 0.0791 0.3506 1.0774 | lr 3.6e-04 | norm 0.1166 | dt 0.025 | |
| type train | step 13120 | loss 0.0089 0.0765 0.3508 1.0897 | lr 3.6e-04 | norm 0.1115 | dt 0.026 | |
| type train | step 13130 | loss 0.0088 0.0753 0.3479 1.0882 | lr 3.5e-04 | norm 0.1112 | dt 0.025 | |
| type train | step 13140 | loss 0.0088 0.0777 0.3590 1.1066 | lr 3.5e-04 | norm 0.0986 | dt 0.025 | |
| type train | step 13150 | loss 0.0085 0.0769 0.3563 1.1014 | lr 3.5e-04 | norm 0.0930 | dt 0.025 | |
| type train | step 13160 | loss 0.0086 0.0771 0.3492 1.0842 | lr 3.5e-04 | norm 0.1222 | dt 0.025 | |
| type train | step 13170 | loss 0.0086 0.0744 0.3370 1.0464 | lr 3.5e-04 | norm 0.1080 | dt 0.025 | |
| type train | step 13180 | loss 0.0086 0.0769 0.3455 1.0814 | lr 3.5e-04 | norm 0.1114 | dt 0.025 | |
| type train | step 13190 | loss 0.0090 0.0763 0.3553 1.1181 | lr 3.5e-04 | norm 0.1151 | dt 0.025 | |
| type train | step 13200 | loss 0.0086 0.0768 0.3566 1.1020 | lr 3.5e-04 | norm 0.1074 | dt 0.026 | |
| type train | step 13210 | loss 0.0089 0.0768 0.3455 1.0700 | lr 3.5e-04 | norm 0.1083 | dt 0.026 | |
| type train | step 13220 | loss 0.0086 0.0770 0.3526 1.0845 | lr 3.5e-04 | norm 0.1002 | dt 0.025 | |
| type train | step 13230 | loss 0.0086 0.0774 0.3472 1.0848 | lr 3.5e-04 | norm 0.1320 | dt 0.026 | |
| type train | step 13240 | loss 0.0087 0.0784 0.3588 1.1047 | lr 3.5e-04 | norm 0.0994 | dt 0.026 | |
| type train | step 13250 | loss 0.0086 0.0748 0.3413 1.0697 | lr 3.5e-04 | norm 0.1134 | dt 0.025 | |
| type train | step 13260 | loss 0.0089 0.0778 0.3554 1.0987 | lr 3.5e-04 | norm 0.0916 | dt 0.026 | |
| type train | step 13270 | loss 0.0085 0.0745 0.3368 1.0496 | lr 3.5e-04 | norm 0.1059 | dt 0.025 | |
| type train | step 13280 | loss 0.0088 0.0756 0.3540 1.0912 | lr 3.4e-04 | norm 0.1030 | dt 0.026 | |
| type train | step 13290 | loss 0.0087 0.0776 0.3546 1.0926 | lr 3.4e-04 | norm 0.0987 | dt 0.026 | |
| type train | step 13300 | loss 0.0087 0.0786 0.3600 1.1098 | lr 3.4e-04 | norm 0.1097 | dt 0.025 | |
| type train | step 13310 | loss 0.0089 0.0765 0.3513 1.0950 | lr 3.4e-04 | norm 0.1156 | dt 0.026 | |
| type train | step 13320 | loss 0.0087 0.0783 0.3646 1.1230 | lr 3.4e-04 | norm 0.1067 | dt 0.026 | |
| type train | step 13330 | loss 0.0087 0.0779 0.3627 1.1128 | lr 3.4e-04 | norm 0.1257 | dt 0.025 | |
| type train | step 13340 | loss 0.0087 0.0775 0.3613 1.1248 | lr 3.4e-04 | norm 0.1084 | dt 0.026 | |
| type train | step 13350 | loss 0.0084 0.0758 0.3425 1.0652 | lr 3.4e-04 | norm 0.1005 | dt 0.026 | |
| type train | step 13360 | loss 0.0085 0.0776 0.3531 1.0941 | lr 3.4e-04 | norm 0.1180 | dt 0.025 | |
| type train | step 13370 | loss 0.0090 0.0759 0.3495 1.0857 | lr 3.4e-04 | norm 0.1302 | dt 0.026 | |
| type train | step 13380 | loss 0.0086 0.0783 0.3682 1.1367 | lr 3.4e-04 | norm 0.1027 | dt 0.026 | |
| type train | step 13390 | loss 0.0086 0.0768 0.3457 1.0718 | lr 3.4e-04 | norm 0.1374 | dt 0.026 | |
| type train | step 13400 | loss 0.0088 0.0770 0.3572 1.1087 | lr 3.4e-04 | norm 0.1211 | dt 0.027 | |
| type train | step 13410 | loss 0.0084 0.0759 0.3459 1.0700 | lr 3.4e-04 | norm 0.1041 | dt 0.027 | |
| type train | step 13420 | loss 0.0085 0.0782 0.3672 1.1283 | lr 3.4e-04 | norm 0.1091 | dt 0.030 | |
| type train | step 13430 | loss 0.0086 0.0771 0.3526 1.0928 | lr 3.3e-04 | norm 0.0958 | dt 0.027 | |
| type train | step 13440 | loss 0.0086 0.0783 0.3598 1.1157 | lr 3.3e-04 | norm 0.1093 | dt 0.026 | |
| type train | step 13450 | loss 0.0086 0.0749 0.3422 1.0695 | lr 3.3e-04 | norm 0.1062 | dt 0.026 | |
| type train | step 13460 | loss 0.0089 0.0785 0.3640 1.1254 | lr 3.3e-04 | norm 0.1323 | dt 0.026 | |
| type train | step 13470 | loss 0.0085 0.0765 0.3452 1.0625 | lr 3.3e-04 | norm 0.1132 | dt 0.026 | |
| type train | step 13480 | loss 0.0084 0.0763 0.3531 1.0928 | lr 3.3e-04 | norm 0.1134 | dt 0.027 | |
| type train | step 13490 | loss 0.0085 0.0764 0.3467 1.0730 | lr 3.3e-04 | norm 0.0835 | dt 0.027 | |
| type train | step 13500 | loss 0.0089 0.0796 0.3711 1.1492 | lr 3.3e-04 | norm 0.0938 | dt 0.026 | |
| type train | step 13510 | loss 0.0084 0.0764 0.3494 1.0777 | lr 3.3e-04 | norm 0.1217 | dt 0.035 | |
| type train | step 13520 | loss 0.0084 0.0740 0.3400 1.0632 | lr 3.3e-04 | norm 0.0992 | dt 0.036 | |
| type train | step 13530 | loss 0.0086 0.0771 0.3526 1.0773 | lr 3.3e-04 | norm 0.1057 | dt 0.035 | |
| type train | step 13540 | loss 0.0088 0.0776 0.3448 1.0634 | lr 3.3e-04 | norm 0.1010 | dt 0.035 | |
| type train | step 13550 | loss 0.0085 0.0759 0.3462 1.0758 | lr 3.3e-04 | norm 0.0934 | dt 0.034 | |
| type train | step 13560 | loss 0.0092 0.0765 0.3496 1.0896 | lr 3.3e-04 | norm 0.1422 | dt 0.035 | |
| type train | step 13570 | loss 0.0087 0.0781 0.3556 1.1045 | lr 3.3e-04 | norm 0.1262 | dt 0.034 | |
| type train | step 13580 | loss 0.0086 0.0763 0.3551 1.1007 | lr 3.3e-04 | norm 0.1163 | dt 0.033 | |
| type train | step 13590 | loss 0.0084 0.0771 0.3493 1.0653 | lr 3.2e-04 | norm 0.1201 | dt 0.033 | |
| type train | step 13600 | loss 0.0086 0.0775 0.3484 1.0772 | lr 3.2e-04 | norm 0.1060 | dt 0.033 | |
| type train | step 13610 | loss 0.0085 0.0761 0.3504 1.0947 | lr 3.2e-04 | norm 0.1143 | dt 0.033 | |
| type train | step 13620 | loss 0.0091 0.0789 0.3617 1.1226 | lr 3.2e-04 | norm 0.1030 | dt 0.033 | |
| type train | step 13630 | loss 0.0089 0.0763 0.3485 1.0906 | lr 3.2e-04 | norm 0.1076 | dt 0.033 | |
| type train | step 13640 | loss 0.0085 0.0764 0.3551 1.0914 | lr 3.2e-04 | norm 0.1066 | dt 0.032 | |
| type train | step 13650 | loss 0.0084 0.0764 0.3474 1.0689 | lr 3.2e-04 | norm 0.1154 | dt 0.033 | |
| type train | step 13660 | loss 0.0087 0.0799 0.3599 1.0949 | lr 3.2e-04 | norm 0.1085 | dt 0.033 | |
| type train | step 13670 | loss 0.0086 0.0756 0.3423 1.0702 | lr 3.2e-04 | norm 0.0975 | dt 0.032 | |
| type train | step 13680 | loss 0.0085 0.0755 0.3449 1.0815 | lr 3.2e-04 | norm 0.0970 | dt 0.032 | |
| type train | step 13690 | loss 0.0088 0.0757 0.3422 1.0652 | lr 3.2e-04 | norm 0.0905 | dt 0.032 | |
| type train | step 13700 | loss 0.0089 0.0767 0.3571 1.1139 | lr 3.2e-04 | norm 0.1263 | dt 0.032 | |
| type train | step 13710 | loss 0.0086 0.0751 0.3392 1.0613 | lr 3.2e-04 | norm 0.1173 | dt 0.032 | |
| type train | step 13720 | loss 0.0087 0.0789 0.3502 1.0763 | lr 3.2e-04 | norm 0.1188 | dt 0.032 | |
| type train | step 13730 | loss 0.0088 0.0764 0.3506 1.0881 | lr 3.2e-04 | norm 0.1123 | dt 0.032 | |
| type train | step 13740 | loss 0.0087 0.0751 0.3475 1.0871 | lr 3.2e-04 | norm 0.1124 | dt 0.031 | |
| type train | step 13750 | loss 0.0087 0.0775 0.3588 1.1055 | lr 3.1e-04 | norm 0.0943 | dt 0.031 | |
| type train | step 13760 | loss 0.0085 0.0767 0.3560 1.1007 | lr 3.1e-04 | norm 0.0950 | dt 0.025 | |
| type train | step 13770 | loss 0.0085 0.0769 0.3488 1.0834 | lr 3.1e-04 | norm 0.1207 | dt 0.025 | |
| type train | step 13780 | loss 0.0085 0.0742 0.3365 1.0448 | lr 3.1e-04 | norm 0.1096 | dt 0.025 | |
| type train | step 13790 | loss 0.0086 0.0768 0.3452 1.0804 | lr 3.1e-04 | norm 0.1120 | dt 0.025 | |
| type train | step 13800 | loss 0.0090 0.0761 0.3550 1.1167 | lr 3.1e-04 | norm 0.1129 | dt 0.026 | |
| type train | step 13810 | loss 0.0085 0.0766 0.3561 1.1010 | lr 3.1e-04 | norm 0.1033 | dt 0.026 | |
| type train | step 13820 | loss 0.0089 0.0766 0.3451 1.0691 | lr 3.1e-04 | norm 0.1070 | dt 0.025 | |
| type train | step 13830 | loss 0.0086 0.0767 0.3524 1.0834 | lr 3.1e-04 | norm 0.0980 | dt 0.025 | |
| type train | step 13840 | loss 0.0086 0.0771 0.3468 1.0836 | lr 3.1e-04 | norm 0.1234 | dt 0.025 | |
| type train | step 13850 | loss 0.0086 0.0782 0.3582 1.1034 | lr 3.1e-04 | norm 0.0969 | dt 0.026 | |
| type train | step 13860 | loss 0.0086 0.0746 0.3409 1.0685 | lr 3.1e-04 | norm 0.1135 | dt 0.025 | |
| type train | step 13870 | loss 0.0088 0.0775 0.3549 1.0977 | lr 3.1e-04 | norm 0.0915 | dt 0.025 | |
| type train | step 13880 | loss 0.0085 0.0743 0.3366 1.0494 | lr 3.1e-04 | norm 0.1085 | dt 0.026 | |
| type train | step 13890 | loss 0.0087 0.0754 0.3538 1.0906 | lr 3.1e-04 | norm 0.0937 | dt 0.026 | |
| type train | step 13900 | loss 0.0087 0.0774 0.3541 1.0915 | lr 3.1e-04 | norm 0.0966 | dt 0.025 | |
| type train | step 13910 | loss 0.0086 0.0783 0.3595 1.1089 | lr 3.0e-04 | norm 0.1062 | dt 0.025 | |
| type train | step 13920 | loss 0.0089 0.0764 0.3509 1.0937 | lr 3.0e-04 | norm 0.1099 | dt 0.026 | |
| type train | step 13930 | loss 0.0087 0.0781 0.3643 1.1218 | lr 3.0e-04 | norm 0.1011 | dt 0.025 | |
| type train | step 13940 | loss 0.0086 0.0776 0.3624 1.1122 | lr 3.0e-04 | norm 0.1266 | dt 0.026 | |
| type train | step 13950 | loss 0.0087 0.0773 0.3612 1.1240 | lr 3.0e-04 | norm 0.1137 | dt 0.025 | |
| type train | step 13960 | loss 0.0084 0.0755 0.3420 1.0643 | lr 3.0e-04 | norm 0.0999 | dt 0.026 | |
| type train | step 13970 | loss 0.0084 0.0773 0.3528 1.0929 | lr 3.0e-04 | norm 0.1133 | dt 0.026 | |
| type train | step 13980 | loss 0.0089 0.0757 0.3491 1.0847 | lr 3.0e-04 | norm 0.1310 | dt 0.026 | |
| type train | step 13990 | loss 0.0085 0.0781 0.3677 1.1355 | lr 3.0e-04 | norm 0.1022 | dt 0.026 | |
| type train | step 14000 | loss 0.0085 0.0767 0.3453 1.0707 | lr 3.0e-04 | norm 0.1327 | dt 0.025 | |
| type train | step 14010 | loss 0.0087 0.0769 0.3569 1.1076 | lr 3.0e-04 | norm 0.1254 | dt 0.026 | |
| type train | step 14020 | loss 0.0084 0.0757 0.3457 1.0687 | lr 3.0e-04 | norm 0.0999 | dt 0.026 | |
| type train | step 14030 | loss 0.0085 0.0780 0.3670 1.1277 | lr 3.0e-04 | norm 0.1091 | dt 0.028 | |
| type train | step 14040 | loss 0.0085 0.0768 0.3524 1.0919 | lr 3.0e-04 | norm 0.0978 | dt 0.027 | |
| type train | step 14050 | loss 0.0086 0.0780 0.3594 1.1147 | lr 3.0e-04 | norm 0.1073 | dt 0.027 | |
| type train | step 14060 | loss 0.0086 0.0747 0.3420 1.0686 | lr 3.0e-04 | norm 0.1062 | dt 0.028 | |
| type train | step 14070 | loss 0.0088 0.0783 0.3634 1.1244 | lr 2.9e-04 | norm 0.1365 | dt 0.031 | |
| type train | step 14080 | loss 0.0085 0.0763 0.3449 1.0613 | lr 2.9e-04 | norm 0.1053 | dt 0.035 | |
| type train | step 14090 | loss 0.0083 0.0761 0.3528 1.0920 | lr 2.9e-04 | norm 0.1096 | dt 0.035 | |
| type train | step 14100 | loss 0.0085 0.0761 0.3464 1.0728 | lr 2.9e-04 | norm 0.0851 | dt 0.035 | |
| type train | step 14110 | loss 0.0089 0.0793 0.3708 1.1480 | lr 2.9e-04 | norm 0.0931 | dt 0.035 | |
| type train | step 14120 | loss 0.0084 0.0762 0.3493 1.0767 | lr 2.9e-04 | norm 0.1153 | dt 0.035 | |
| type train | step 14130 | loss 0.0084 0.0737 0.3397 1.0625 | lr 2.9e-04 | norm 0.0958 | dt 0.035 | |
| type train | step 14140 | loss 0.0086 0.0768 0.3523 1.0764 | lr 2.9e-04 | norm 0.1046 | dt 0.035 | |
| type train | step 14150 | loss 0.0087 0.0774 0.3446 1.0625 | lr 2.9e-04 | norm 0.1012 | dt 0.035 | |
| type train | step 14160 | loss 0.0085 0.0757 0.3461 1.0752 | lr 2.9e-04 | norm 0.0897 | dt 0.035 | |
| type train | step 14170 | loss 0.0091 0.0763 0.3493 1.0880 | lr 2.9e-04 | norm 0.1379 | dt 0.035 | |
| type train | step 14180 | loss 0.0086 0.0779 0.3553 1.1037 | lr 2.9e-04 | norm 0.1276 | dt 0.035 | |
| type train | step 14190 | loss 0.0086 0.0761 0.3547 1.0992 | lr 2.9e-04 | norm 0.1154 | dt 0.035 | |
| type train | step 14200 | loss 0.0084 0.0768 0.3489 1.0644 | lr 2.9e-04 | norm 0.1202 | dt 0.034 | |
| type train | step 14210 | loss 0.0086 0.0772 0.3479 1.0762 | lr 2.9e-04 | norm 0.1035 | dt 0.034 | |
| type train | step 14220 | loss 0.0085 0.0759 0.3502 1.0942 | lr 2.9e-04 | norm 0.1090 | dt 0.033 | |
| type train | step 14230 | loss 0.0090 0.0787 0.3613 1.1217 | lr 2.9e-04 | norm 0.0992 | dt 0.033 | |
| type train | step 14240 | loss 0.0088 0.0760 0.3484 1.0894 | lr 2.8e-04 | norm 0.1099 | dt 0.033 | |
| type train | step 14250 | loss 0.0085 0.0761 0.3550 1.0910 | lr 2.8e-04 | norm 0.1013 | dt 0.033 | |
| type train | step 14260 | loss 0.0084 0.0762 0.3471 1.0679 | lr 2.8e-04 | norm 0.1090 | dt 0.026 | |
| type train | step 14270 | loss 0.0087 0.0797 0.3596 1.0939 | lr 2.8e-04 | norm 0.1049 | dt 0.025 | |
| type train | step 14280 | loss 0.0086 0.0753 0.3421 1.0695 | lr 2.8e-04 | norm 0.0961 | dt 0.025 | |
| type train | step 14290 | loss 0.0085 0.0753 0.3448 1.0808 | lr 2.8e-04 | norm 0.0923 | dt 0.026 | |
| type train | step 14300 | loss 0.0087 0.0755 0.3418 1.0649 | lr 2.8e-04 | norm 0.0895 | dt 0.025 | |
| type train | step 14310 | loss 0.0089 0.0764 0.3568 1.1135 | lr 2.8e-04 | norm 0.1249 | dt 0.025 | |
| type train | step 14320 | loss 0.0085 0.0749 0.3389 1.0609 | lr 2.8e-04 | norm 0.1145 | dt 0.025 | |
| type train | step 14330 | loss 0.0087 0.0787 0.3499 1.0751 | lr 2.8e-04 | norm 0.1164 | dt 0.025 | |
| type train | step 14340 | loss 0.0088 0.0761 0.3505 1.0871 | lr 2.8e-04 | norm 0.1136 | dt 0.026 | |
| type train | step 14350 | loss 0.0087 0.0749 0.3472 1.0858 | lr 2.8e-04 | norm 0.1105 | dt 0.025 | |
| type train | step 14360 | loss 0.0087 0.0773 0.3585 1.1050 | lr 2.8e-04 | norm 0.0936 | dt 0.025 | |
| type train | step 14370 | loss 0.0084 0.0765 0.3558 1.0998 | lr 2.8e-04 | norm 0.0923 | dt 0.025 | |
| type train | step 14380 | loss 0.0085 0.0766 0.3483 1.0826 | lr 2.8e-04 | norm 0.1194 | dt 0.026 | |
| type train | step 14390 | loss 0.0085 0.0740 0.3361 1.0436 | lr 2.8e-04 | norm 0.1025 | dt 0.026 | |
| type train | step 14400 | loss 0.0085 0.0765 0.3449 1.0792 | lr 2.8e-04 | norm 0.1092 | dt 0.025 | |
| type train | step 14410 | loss 0.0089 0.0759 0.3545 1.1150 | lr 2.7e-04 | norm 0.1130 | dt 0.026 | |
| type train | step 14420 | loss 0.0085 0.0763 0.3558 1.0999 | lr 2.7e-04 | norm 0.1032 | dt 0.026 | |
| type train | step 14430 | loss 0.0088 0.0764 0.3450 1.0684 | lr 2.7e-04 | norm 0.1071 | dt 0.025 | |
| type train | step 14440 | loss 0.0085 0.0765 0.3521 1.0827 | lr 2.7e-04 | norm 0.0965 | dt 0.026 | |
| type train | step 14450 | loss 0.0085 0.0769 0.3463 1.0827 | lr 2.7e-04 | norm 0.1220 | dt 0.026 | |
| type train | step 14460 | loss 0.0086 0.0780 0.3579 1.1026 | lr 2.7e-04 | norm 0.0972 | dt 0.025 | |
| type train | step 14470 | loss 0.0085 0.0744 0.3405 1.0673 | lr 2.7e-04 | norm 0.1136 | dt 0.025 | |
| type train | step 14480 | loss 0.0088 0.0773 0.3547 1.0964 | lr 2.7e-04 | norm 0.0917 | dt 0.025 | |
| type train | step 14490 | loss 0.0084 0.0742 0.3364 1.0486 | lr 2.7e-04 | norm 0.1154 | dt 0.026 | |
| type train | step 14500 | loss 0.0087 0.0752 0.3534 1.0899 | lr 2.7e-04 | norm 0.0964 | dt 0.026 | |
| type train | step 14510 | loss 0.0086 0.0771 0.3537 1.0904 | lr 2.7e-04 | norm 0.0952 | dt 0.026 | |
| type train | step 14520 | loss 0.0086 0.0782 0.3591 1.1079 | lr 2.7e-04 | norm 0.1065 | dt 0.026 | |
| type train | step 14530 | loss 0.0088 0.0762 0.3504 1.0926 | lr 2.7e-04 | norm 0.1124 | dt 0.026 | |
| type train | step 14540 | loss 0.0086 0.0778 0.3639 1.1207 | lr 2.7e-04 | norm 0.1038 | dt 0.026 | |
| type train | step 14550 | loss 0.0086 0.0775 0.3622 1.1112 | lr 2.7e-04 | norm 0.1229 | dt 0.026 | |
| type train | step 14560 | loss 0.0086 0.0771 0.3611 1.1231 | lr 2.7e-04 | norm 0.1032 | dt 0.026 | |
| type train | step 14570 | loss 0.0083 0.0754 0.3417 1.0634 | lr 2.7e-04 | norm 0.1005 | dt 0.026 | |
| type train | step 14580 | loss 0.0084 0.0772 0.3524 1.0923 | lr 2.6e-04 | norm 0.1096 | dt 0.025 | |
| type train | step 14590 | loss 0.0088 0.0756 0.3488 1.0839 | lr 2.6e-04 | norm 0.1275 | dt 0.026 | |
| type train | step 14600 | loss 0.0085 0.0779 0.3673 1.1342 | lr 2.6e-04 | norm 0.1041 | dt 0.027 | |
| type train | step 14610 | loss 0.0085 0.0765 0.3451 1.0703 | lr 2.6e-04 | norm 0.1429 | dt 0.032 | |
| type train | step 14620 | loss 0.0087 0.0767 0.3567 1.1066 | lr 2.6e-04 | norm 0.1160 | dt 0.035 | |
| type train | step 14630 | loss 0.0083 0.0756 0.3455 1.0681 | lr 2.6e-04 | norm 0.0995 | dt 0.035 | |
| type train | step 14640 | loss 0.0085 0.0779 0.3667 1.1267 | lr 2.6e-04 | norm 0.1158 | dt 0.036 | |
| type train | step 14650 | loss 0.0085 0.0767 0.3522 1.0913 | lr 2.6e-04 | norm 0.0961 | dt 0.035 | |
| type train | step 14660 | loss 0.0086 0.0778 0.3590 1.1137 | lr 2.6e-04 | norm 0.1044 | dt 0.035 | |
| type train | step 14670 | loss 0.0085 0.0746 0.3417 1.0678 | lr 2.6e-04 | norm 0.1062 | dt 0.034 | |
| type train | step 14680 | loss 0.0088 0.0781 0.3631 1.1239 | lr 2.6e-04 | norm 0.1322 | dt 0.035 | |
| type train | step 14690 | loss 0.0084 0.0761 0.3444 1.0607 | lr 2.6e-04 | norm 0.1087 | dt 0.035 | |
| type train | step 14700 | loss 0.0083 0.0759 0.3523 1.0910 | lr 2.6e-04 | norm 0.1078 | dt 0.035 | |
| type train | step 14710 | loss 0.0084 0.0761 0.3462 1.0721 | lr 2.6e-04 | norm 0.0800 | dt 0.035 | |
| type train | step 14720 | loss 0.0088 0.0792 0.3705 1.1473 | lr 2.6e-04 | norm 0.0977 | dt 0.035 | |
| type train | step 14730 | loss 0.0083 0.0761 0.3489 1.0758 | lr 2.6e-04 | norm 0.1165 | dt 0.035 | |
| type train | step 14740 | loss 0.0083 0.0736 0.3394 1.0618 | lr 2.6e-04 | norm 0.0969 | dt 0.034 | |
| type train | step 14750 | loss 0.0086 0.0767 0.3518 1.0756 | lr 2.6e-04 | norm 0.1052 | dt 0.034 | |
| type train | step 14760 | loss 0.0087 0.0772 0.3442 1.0620 | lr 2.5e-04 | norm 0.0985 | dt 0.032 | |
| type train | step 14770 | loss 0.0084 0.0756 0.3458 1.0751 | lr 2.5e-04 | norm 0.0916 | dt 0.031 | |
| type train | step 14780 | loss 0.0091 0.0761 0.3489 1.0871 | lr 2.5e-04 | norm 0.1410 | dt 0.031 | |
| type train | step 14790 | loss 0.0086 0.0777 0.3549 1.1032 | lr 2.5e-04 | norm 0.1234 | dt 0.028 | |
| type train | step 14800 | loss 0.0086 0.0759 0.3544 1.0984 | lr 2.5e-04 | norm 0.1132 | dt 0.026 | |
| type train | step 14810 | loss 0.0083 0.0767 0.3486 1.0631 | lr 2.5e-04 | norm 0.1191 | dt 0.025 | |
| type train | step 14820 | loss 0.0085 0.0771 0.3476 1.0757 | lr 2.5e-04 | norm 0.1018 | dt 0.025 | |
| type train | step 14830 | loss 0.0084 0.0758 0.3499 1.0940 | lr 2.5e-04 | norm 0.1095 | dt 0.026 | |
| type train | step 14840 | loss 0.0090 0.0785 0.3609 1.1210 | lr 2.5e-04 | norm 0.1032 | dt 0.026 | |
| type train | step 14850 | loss 0.0087 0.0759 0.3482 1.0889 | lr 2.5e-04 | norm 0.1050 | dt 0.026 | |
| type train | step 14860 | loss 0.0084 0.0760 0.3546 1.0905 | lr 2.5e-04 | norm 0.0984 | dt 0.026 | |
| type train | step 14870 | loss 0.0083 0.0761 0.3468 1.0673 | lr 2.5e-04 | norm 0.1079 | dt 0.026 | |
| type train | step 14880 | loss 0.0086 0.0795 0.3593 1.0934 | lr 2.5e-04 | norm 0.1016 | dt 0.026 | |
| type train | step 14890 | loss 0.0086 0.0752 0.3419 1.0688 | lr 2.5e-04 | norm 0.0915 | dt 0.025 | |
| type train | step 14900 | loss 0.0085 0.0751 0.3444 1.0798 | lr 2.5e-04 | norm 0.0957 | dt 0.025 | |
| type train | step 14910 | loss 0.0087 0.0753 0.3416 1.0645 | lr 2.5e-04 | norm 0.0869 | dt 0.026 | |
| type train | step 14920 | loss 0.0088 0.0762 0.3564 1.1125 | lr 2.5e-04 | norm 0.1195 | dt 0.026 | |
| type train | step 14930 | loss 0.0085 0.0748 0.3385 1.0598 | lr 2.5e-04 | norm 0.1121 | dt 0.025 | |
| type train | step 14940 | loss 0.0087 0.0785 0.3496 1.0744 | lr 2.4e-04 | norm 0.1198 | dt 0.025 | |
| type train | step 14950 | loss 0.0087 0.0760 0.3501 1.0866 | lr 2.4e-04 | norm 0.1087 | dt 0.026 | |
| type train | step 14960 | loss 0.0087 0.0748 0.3469 1.0850 | lr 2.4e-04 | norm 0.1114 | dt 0.026 | |
| type train | step 14970 | loss 0.0087 0.0772 0.3581 1.1044 | lr 2.4e-04 | norm 0.0972 | dt 0.025 | |
| type train | step 14980 | loss 0.0084 0.0764 0.3556 1.0995 | lr 2.4e-04 | norm 0.0897 | dt 0.025 | |
| type train | step 14990 | loss 0.0084 0.0765 0.3480 1.0820 | lr 2.4e-04 | norm 0.1164 | dt 0.026 | |
| type train | step 15000 | loss 0.0085 0.0738 0.3357 1.0428 | lr 2.4e-04 | norm 0.1023 | dt 0.026 | |
| type train | step 15010 | loss 0.0085 0.0764 0.3446 1.0783 | lr 2.4e-04 | norm 0.1122 | dt 0.025 | |
| type train | step 15020 | loss 0.0088 0.0758 0.3542 1.1140 | lr 2.4e-04 | norm 0.1151 | dt 0.026 | |
| type train | step 15030 | loss 0.0084 0.0762 0.3555 1.0992 | lr 2.4e-04 | norm 0.1001 | dt 0.026 | |
| type train | step 15040 | loss 0.0088 0.0763 0.3448 1.0677 | lr 2.4e-04 | norm 0.1072 | dt 0.026 | |
| type train | step 15050 | loss 0.0085 0.0764 0.3518 1.0823 | lr 2.4e-04 | norm 0.0951 | dt 0.026 | |
| type train | step 15060 | loss 0.0084 0.0768 0.3460 1.0823 | lr 2.4e-04 | norm 0.1248 | dt 0.026 | |
| type train | step 15070 | loss 0.0085 0.0780 0.3576 1.1025 | lr 2.4e-04 | norm 0.0902 | dt 0.026 | |
| type train | step 15080 | loss 0.0085 0.0744 0.3402 1.0668 | lr 2.4e-04 | norm 0.1111 | dt 0.025 | |
| type train | step 15090 | loss 0.0087 0.0772 0.3544 1.0952 | lr 2.4e-04 | norm 0.0916 | dt 0.025 | |
| type train | step 15100 | loss 0.0083 0.0741 0.3362 1.0481 | lr 2.4e-04 | norm 0.1117 | dt 0.026 | |
| type train | step 15110 | loss 0.0086 0.0752 0.3532 1.0894 | lr 2.4e-04 | norm 0.0970 | dt 0.026 | |
| type train | step 15120 | loss 0.0085 0.0770 0.3534 1.0896 | lr 2.4e-04 | norm 0.0925 | dt 0.026 | |
| type train | step 15130 | loss 0.0085 0.0780 0.3588 1.1070 | lr 2.3e-04 | norm 0.1013 | dt 0.026 | |
| type train | step 15140 | loss 0.0088 0.0761 0.3502 1.0922 | lr 2.3e-04 | norm 0.1093 | dt 0.025 | |
| type train | step 15150 | loss 0.0086 0.0777 0.3637 1.1196 | lr 2.3e-04 | norm 0.1004 | dt 0.026 | |
| type train | step 15160 | loss 0.0085 0.0774 0.3619 1.1104 | lr 2.3e-04 | norm 0.1227 | dt 0.025 | |
| type train | step 15170 | loss 0.0086 0.0771 0.3609 1.1224 | lr 2.3e-04 | norm 0.1129 | dt 0.026 | |
| type train | step 15180 | loss 0.0082 0.0752 0.3414 1.0626 | lr 2.3e-04 | norm 0.1002 | dt 0.025 | |
| type train | step 15190 | loss 0.0083 0.0771 0.3522 1.0916 | lr 2.3e-04 | norm 0.1084 | dt 0.027 | |
| type train | step 15200 | loss 0.0088 0.0754 0.3484 1.0833 | lr 2.3e-04 | norm 0.1270 | dt 0.027 | |
| type train | step 15210 | loss 0.0085 0.0778 0.3671 1.1327 | lr 2.3e-04 | norm 0.0996 | dt 0.030 | |
| type train | step 15220 | loss 0.0085 0.0764 0.3448 1.0697 | lr 2.3e-04 | norm 0.1363 | dt 0.035 | |
| type train | step 15230 | loss 0.0086 0.0766 0.3564 1.1058 | lr 2.3e-04 | norm 0.1166 | dt 0.035 | |
| type train | step 15240 | loss 0.0083 0.0755 0.3451 1.0674 | lr 2.3e-04 | norm 0.0993 | dt 0.035 | |
| type train | step 15250 | loss 0.0084 0.0778 0.3665 1.1258 | lr 2.3e-04 | norm 0.1156 | dt 0.037 | |
| type train | step 15260 | loss 0.0084 0.0766 0.3519 1.0913 | lr 2.3e-04 | norm 0.0925 | dt 0.031 | |
| type train | step 15270 | loss 0.0085 0.0778 0.3588 1.1130 | lr 2.3e-04 | norm 0.1032 | dt 0.031 | |
| type train | step 15280 | loss 0.0085 0.0745 0.3414 1.0672 | lr 2.3e-04 | norm 0.1066 | dt 0.033 | |
| type train | step 15290 | loss 0.0088 0.0779 0.3627 1.1231 | lr 2.3e-04 | norm 0.1265 | dt 0.032 | |
| type train | step 15300 | loss 0.0084 0.0760 0.3442 1.0592 | lr 2.3e-04 | norm 0.1035 | dt 0.032 | |
| type train | step 15310 | loss 0.0082 0.0758 0.3521 1.0907 | lr 2.3e-04 | norm 0.1063 | dt 0.032 | |
| type train | step 15320 | loss 0.0084 0.0760 0.3461 1.0719 | lr 2.2e-04 | norm 0.0856 | dt 0.031 | |
| type train | step 15330 | loss 0.0088 0.0791 0.3702 1.1464 | lr 2.2e-04 | norm 0.0946 | dt 0.031 | |
| type train | step 15340 | loss 0.0083 0.0760 0.3486 1.0752 | lr 2.2e-04 | norm 0.1184 | dt 0.031 | |
| type train | step 15350 | loss 0.0083 0.0735 0.3391 1.0620 | lr 2.2e-04 | norm 0.0892 | dt 0.031 | |
| type train | step 15360 | loss 0.0085 0.0766 0.3514 1.0746 | lr 2.2e-04 | norm 0.1008 | dt 0.028 | |
| type train | step 15370 | loss 0.0087 0.0772 0.3438 1.0616 | lr 2.2e-04 | norm 0.0980 | dt 0.026 | |
| type train | step 15380 | loss 0.0084 0.0756 0.3457 1.0746 | lr 2.2e-04 | norm 0.0886 | dt 0.026 | |
| type train | step 15390 | loss 0.0090 0.0759 0.3487 1.0865 | lr 2.2e-04 | norm 0.1408 | dt 0.025 | |
| type train | step 15400 | loss 0.0085 0.0776 0.3546 1.1026 | lr 2.2e-04 | norm 0.1234 | dt 0.025 | |
| type train | step 15410 | loss 0.0085 0.0758 0.3542 1.0979 | lr 2.2e-04 | norm 0.1095 | dt 0.026 | |
| type train | step 15420 | loss 0.0083 0.0766 0.3481 1.0622 | lr 2.2e-04 | norm 0.1134 | dt 0.026 | |
| type train | step 15430 | loss 0.0085 0.0770 0.3474 1.0751 | lr 2.2e-04 | norm 0.1081 | dt 0.026 | |
| type train | step 15440 | loss 0.0084 0.0757 0.3498 1.0934 | lr 2.2e-04 | norm 0.1066 | dt 0.026 | |
| type train | step 15450 | loss 0.0090 0.0784 0.3606 1.1199 | lr 2.2e-04 | norm 0.1050 | dt 0.025 | |
| type train | step 15460 | loss 0.0087 0.0758 0.3481 1.0886 | lr 2.2e-04 | norm 0.1034 | dt 0.025 | |
| type train | step 15470 | loss 0.0084 0.0759 0.3546 1.0899 | lr 2.2e-04 | norm 0.0955 | dt 0.025 | |
| type train | step 15480 | loss 0.0083 0.0760 0.3465 1.0666 | lr 2.2e-04 | norm 0.1036 | dt 0.025 | |
| type train | step 15490 | loss 0.0086 0.0795 0.3589 1.0925 | lr 2.2e-04 | norm 0.1050 | dt 0.025 | |
| type train | step 15500 | loss 0.0085 0.0751 0.3416 1.0685 | lr 2.2e-04 | norm 0.0939 | dt 0.025 | |
| type train | step 15510 | loss 0.0084 0.0750 0.3443 1.0789 | lr 2.2e-04 | norm 0.0951 | dt 0.026 | |
| type train | step 15520 | loss 0.0087 0.0753 0.3414 1.0637 | lr 2.2e-04 | norm 0.0926 | dt 0.026 | |
| type train | step 15530 | loss 0.0088 0.0760 0.3562 1.1120 | lr 2.1e-04 | norm 0.1125 | dt 0.025 | |
| type train | step 15540 | loss 0.0085 0.0747 0.3383 1.0591 | lr 2.1e-04 | norm 0.1125 | dt 0.025 | |
| type train | step 15550 | loss 0.0086 0.0785 0.3492 1.0737 | lr 2.1e-04 | norm 0.1216 | dt 0.025 | |
| type train | step 15560 | loss 0.0087 0.0759 0.3498 1.0866 | lr 2.1e-04 | norm 0.1072 | dt 0.026 | |
| type train | step 15570 | loss 0.0086 0.0748 0.3466 1.0842 | lr 2.1e-04 | norm 0.1090 | dt 0.026 | |
| type train | step 15580 | loss 0.0086 0.0771 0.3579 1.1036 | lr 2.1e-04 | norm 0.0959 | dt 0.025 | |
| type train | step 15590 | loss 0.0083 0.0763 0.3554 1.0994 | lr 2.1e-04 | norm 0.0859 | dt 0.026 | |
| type train | step 15600 | loss 0.0084 0.0764 0.3476 1.0813 | lr 2.1e-04 | norm 0.1114 | dt 0.026 | |
| type train | step 15610 | loss 0.0084 0.0738 0.3354 1.0423 | lr 2.1e-04 | norm 0.1033 | dt 0.025 | |
| type train | step 15620 | loss 0.0084 0.0763 0.3445 1.0782 | lr 2.1e-04 | norm 0.1105 | dt 0.025 | |
| type train | step 15630 | loss 0.0088 0.0758 0.3539 1.1134 | lr 2.1e-04 | norm 0.1116 | dt 0.026 | |
| type train | step 15640 | loss 0.0084 0.0761 0.3552 1.0986 | lr 2.1e-04 | norm 0.1027 | dt 0.026 | |
| type train | step 15650 | loss 0.0087 0.0763 0.3446 1.0674 | lr 2.1e-04 | norm 0.1080 | dt 0.025 | |
| type train | step 15660 | loss 0.0084 0.0763 0.3514 1.0819 | lr 2.1e-04 | norm 0.0912 | dt 0.026 | |
| type train | step 15670 | loss 0.0084 0.0767 0.3458 1.0814 | lr 2.1e-04 | norm 0.1195 | dt 0.026 | |
| type train | step 15680 | loss 0.0085 0.0779 0.3574 1.1020 | lr 2.1e-04 | norm 0.0920 | dt 0.025 | |
| type train | step 15690 | loss 0.0085 0.0743 0.3399 1.0663 | lr 2.1e-04 | norm 0.1134 | dt 0.026 | |
| type train | step 15700 | loss 0.0087 0.0771 0.3543 1.0945 | lr 2.1e-04 | norm 0.0860 | dt 0.026 | |
| type train | step 15710 | loss 0.0083 0.0740 0.3361 1.0481 | lr 2.1e-04 | norm 0.1098 | dt 0.025 | |
| type train | step 15720 | loss 0.0086 0.0751 0.3531 1.0889 | lr 2.1e-04 | norm 0.1024 | dt 0.025 | |
| type train | step 15730 | loss 0.0085 0.0769 0.3532 1.0891 | lr 2.0e-04 | norm 0.0906 | dt 0.026 | |
| type train | step 15740 | loss 0.0085 0.0780 0.3585 1.1066 | lr 2.0e-04 | norm 0.1021 | dt 0.030 | |
| type train | step 15750 | loss 0.0087 0.0760 0.3499 1.0916 | lr 2.0e-04 | norm 0.1101 | dt 0.035 | |
| type train | step 15760 | loss 0.0086 0.0776 0.3636 1.1187 | lr 2.0e-04 | norm 0.1002 | dt 0.032 | |
| type train | step 15770 | loss 0.0085 0.0774 0.3616 1.1099 | lr 2.0e-04 | norm 0.1234 | dt 0.034 | |
| type train | step 15780 | loss 0.0085 0.0770 0.3608 1.1220 | lr 2.0e-04 | norm 0.1021 | dt 0.033 | |
| type train | step 15790 | loss 0.0082 0.0751 0.3411 1.0620 | lr 2.0e-04 | norm 0.0991 | dt 0.033 | |
| type train | step 15800 | loss 0.0083 0.0770 0.3519 1.0912 | lr 2.0e-04 | norm 0.1105 | dt 0.032 | |
| type train | step 15810 | loss 0.0088 0.0752 0.3481 1.0822 | lr 2.0e-04 | norm 0.1224 | dt 0.033 | |
| type train | step 15820 | loss 0.0084 0.0777 0.3668 1.1321 | lr 2.0e-04 | norm 0.0978 | dt 0.032 | |
| type train | step 15830 | loss 0.0084 0.0763 0.3446 1.0697 | lr 2.0e-04 | norm 0.1389 | dt 0.032 | |
| type train | step 15840 | loss 0.0086 0.0765 0.3563 1.1050 | lr 2.0e-04 | norm 0.1129 | dt 0.032 | |
| type train | step 15850 | loss 0.0082 0.0753 0.3451 1.0673 | lr 2.0e-04 | norm 0.0969 | dt 0.032 | |
| type train | step 15860 | loss 0.0084 0.0777 0.3663 1.1247 | lr 2.0e-04 | norm 0.1114 | dt 0.032 | |
| type train | step 15870 | loss 0.0084 0.0765 0.3517 1.0908 | lr 2.0e-04 | norm 0.0926 | dt 0.027 | |
| type train | step 15880 | loss 0.0085 0.0777 0.3586 1.1123 | lr 2.0e-04 | norm 0.0961 | dt 0.025 | |
| type train | step 15890 | loss 0.0084 0.0744 0.3412 1.0669 | lr 2.0e-04 | norm 0.1041 | dt 0.026 | |
| type train | step 15900 | loss 0.0087 0.0778 0.3624 1.1223 | lr 2.0e-04 | norm 0.1253 | dt 0.025 | |
| type train | step 15910 | loss 0.0084 0.0759 0.3440 1.0581 | lr 2.0e-04 | norm 0.1050 | dt 0.025 | |
| type train | step 15920 | loss 0.0082 0.0757 0.3520 1.0905 | lr 2.0e-04 | norm 0.1008 | dt 0.025 | |
| type train | step 15930 | loss 0.0083 0.0759 0.3459 1.0714 | lr 2.0e-04 | norm 0.0835 | dt 0.026 | |
| type train | step 15940 | loss 0.0088 0.0790 0.3699 1.1458 | lr 2.0e-04 | norm 0.0926 | dt 0.025 | |
| type train | step 15950 | loss 0.0083 0.0759 0.3483 1.0748 | lr 1.9e-04 | norm 0.1169 | dt 0.026 | |
| type train | step 15960 | loss 0.0082 0.0734 0.3391 1.0619 | lr 1.9e-04 | norm 0.0908 | dt 0.026 | |
| type train | step 15970 | loss 0.0085 0.0764 0.3511 1.0739 | lr 1.9e-04 | norm 0.0993 | dt 0.025 | |
| type train | step 15980 | loss 0.0086 0.0771 0.3436 1.0614 | lr 1.9e-04 | norm 0.0997 | dt 0.026 | |
| type train | step 15990 | loss 0.0083 0.0755 0.3455 1.0744 | lr 1.9e-04 | norm 0.0832 | dt 0.025 | |
| type train | step 16000 | loss 0.0090 0.0759 0.3483 1.0857 | lr 1.9e-04 | norm 0.1457 | dt 0.026 | |
| type train | step 16010 | loss 0.0085 0.0774 0.3544 1.1022 | lr 1.9e-04 | norm 0.1231 | dt 0.025 | |
| type train | step 16020 | loss 0.0085 0.0757 0.3540 1.0968 | lr 1.9e-04 | norm 0.1022 | dt 0.025 | |
| type train | step 16030 | loss 0.0083 0.0764 0.3478 1.0616 | lr 1.9e-04 | norm 0.1123 | dt 0.026 | |
| type train | step 16040 | loss 0.0085 0.0769 0.3470 1.0748 | lr 1.9e-04 | norm 0.1011 | dt 0.025 | |
| type train | step 16050 | loss 0.0084 0.0757 0.3495 1.0930 | lr 1.9e-04 | norm 0.1100 | dt 0.025 | |
| type train | step 16060 | loss 0.0089 0.0783 0.3602 1.1195 | lr 1.9e-04 | norm 0.1043 | dt 0.025 | |
| type train | step 16070 | loss 0.0087 0.0757 0.3478 1.0882 | lr 1.9e-04 | norm 0.1021 | dt 0.026 | |
| type train | step 16080 | loss 0.0084 0.0758 0.3543 1.0898 | lr 1.9e-04 | norm 0.0935 | dt 0.026 | |
| type train | step 16090 | loss 0.0083 0.0758 0.3462 1.0660 | lr 1.9e-04 | norm 0.1005 | dt 0.025 | |
| type train | step 16100 | loss 0.0086 0.0794 0.3587 1.0919 | lr 1.9e-04 | norm 0.1028 | dt 0.026 | |
| type train | step 16110 | loss 0.0085 0.0750 0.3415 1.0681 | lr 1.9e-04 | norm 0.0882 | dt 0.028 | |
| type train | step 16120 | loss 0.0084 0.0750 0.3441 1.0785 | lr 1.9e-04 | norm 0.0952 | dt 0.026 | |
| type train | step 16130 | loss 0.0086 0.0752 0.3412 1.0631 | lr 1.9e-04 | norm 0.0893 | dt 0.027 | |
| type train | step 16140 | loss 0.0087 0.0759 0.3561 1.1116 | lr 1.9e-04 | norm 0.1128 | dt 0.029 | |
| type train | step 16150 | loss 0.0084 0.0746 0.3379 1.0588 | lr 1.9e-04 | norm 0.1112 | dt 0.026 | |
| type train | step 16160 | loss 0.0086 0.0784 0.3489 1.0733 | lr 1.9e-04 | norm 0.1146 | dt 0.026 | |
| type train | step 16170 | loss 0.0086 0.0758 0.3497 1.0864 | lr 1.9e-04 | norm 0.1156 | dt 0.026 | |
| type train | step 16180 | loss 0.0086 0.0747 0.3464 1.0838 | lr 1.8e-04 | norm 0.1088 | dt 0.026 | |
| type train | step 16190 | loss 0.0086 0.0771 0.3578 1.1033 | lr 1.8e-04 | norm 0.0936 | dt 0.026 | |
| type train | step 16200 | loss 0.0083 0.0762 0.3552 1.0994 | lr 1.8e-04 | norm 0.0902 | dt 0.026 | |
| type train | step 16210 | loss 0.0083 0.0763 0.3474 1.0810 | lr 1.8e-04 | norm 0.1066 | dt 0.026 | |
| type train | step 16220 | loss 0.0084 0.0736 0.3352 1.0418 | lr 1.8e-04 | norm 0.1010 | dt 0.026 | |
| type train | step 16230 | loss 0.0084 0.0762 0.3443 1.0779 | lr 1.8e-04 | norm 0.1065 | dt 0.026 | |
| type train | step 16240 | loss 0.0088 0.0757 0.3537 1.1127 | lr 1.8e-04 | norm 0.1145 | dt 0.026 | |
| type train | step 16250 | loss 0.0084 0.0761 0.3552 1.0978 | lr 1.8e-04 | norm 0.1035 | dt 0.026 | |
| type train | step 16260 | loss 0.0087 0.0763 0.3444 1.0669 | lr 1.8e-04 | norm 0.1091 | dt 0.026 | |
| type train | step 16270 | loss 0.0084 0.0761 0.3512 1.0818 | lr 1.8e-04 | norm 0.0928 | dt 0.032 | |
| type train | step 16280 | loss 0.0084 0.0766 0.3456 1.0806 | lr 1.8e-04 | norm 0.1214 | dt 0.035 | |
| type train | step 16290 | loss 0.0085 0.0778 0.3572 1.1014 | lr 1.8e-04 | norm 0.0888 | dt 0.035 | |
| type train | step 16300 | loss 0.0084 0.0743 0.3397 1.0657 | lr 1.8e-04 | norm 0.1121 | dt 0.035 | |
| type train | step 16310 | loss 0.0087 0.0770 0.3541 1.0942 | lr 1.8e-04 | norm 0.0824 | dt 0.035 | |
| type train | step 16320 | loss 0.0083 0.0740 0.3359 1.0476 | lr 1.8e-04 | norm 0.1122 | dt 0.035 | |
| type train | step 16330 | loss 0.0086 0.0750 0.3529 1.0887 | lr 1.8e-04 | norm 0.0921 | dt 0.035 | |
| type train | step 16340 | loss 0.0084 0.0768 0.3529 1.0883 | lr 1.8e-04 | norm 0.0877 | dt 0.035 | |
| type train | step 16350 | loss 0.0084 0.0779 0.3582 1.1058 | lr 1.8e-04 | norm 0.1026 | dt 0.035 | |
| type train | step 16360 | loss 0.0087 0.0759 0.3495 1.0911 | lr 1.8e-04 | norm 0.1127 | dt 0.035 | |
| type train | step 16370 | loss 0.0085 0.0775 0.3634 1.1183 | lr 1.8e-04 | norm 0.1010 | dt 0.035 | |
| type train | step 16380 | loss 0.0085 0.0774 0.3613 1.1093 | lr 1.8e-04 | norm 0.1264 | dt 0.036 | |
| type train | step 16390 | loss 0.0085 0.0769 0.3607 1.1216 | lr 1.8e-04 | norm 0.1004 | dt 0.035 | |
| type train | step 16400 | loss 0.0082 0.0751 0.3408 1.0613 | lr 1.8e-04 | norm 0.0974 | dt 0.035 | |
| type train | step 16410 | loss 0.0083 0.0769 0.3518 1.0907 | lr 1.8e-04 | norm 0.1081 | dt 0.035 | |
| type train | step 16420 | loss 0.0087 0.0752 0.3476 1.0817 | lr 1.7e-04 | norm 0.1230 | dt 0.035 | |
| type train | step 16430 | loss 0.0084 0.0777 0.3666 1.1319 | lr 1.7e-04 | norm 0.0994 | dt 0.034 | |
| type train | step 16440 | loss 0.0084 0.0762 0.3442 1.0692 | lr 1.7e-04 | norm 0.1383 | dt 0.034 | |
| type train | step 16450 | loss 0.0086 0.0764 0.3560 1.1045 | lr 1.7e-04 | norm 0.1122 | dt 0.034 | |
| type train | step 16460 | loss 0.0082 0.0753 0.3449 1.0668 | lr 1.7e-04 | norm 0.0989 | dt 0.034 | |
| type train | step 16470 | loss 0.0083 0.0777 0.3662 1.1242 | lr 1.7e-04 | norm 0.1066 | dt 0.036 | |
| type train | step 16480 | loss 0.0083 0.0764 0.3517 1.0906 | lr 1.7e-04 | norm 0.0905 | dt 0.033 | |
| type train | step 16490 | loss 0.0084 0.0776 0.3582 1.1118 | lr 1.7e-04 | norm 0.0987 | dt 0.033 | |
| type train | step 16500 | loss 0.0084 0.0743 0.3408 1.0664 | lr 1.7e-04 | norm 0.1035 | dt 0.033 | |
| type train | step 16510 | loss 0.0087 0.0776 0.3622 1.1218 | lr 1.7e-04 | norm 0.1213 | dt 0.031 | |
| type train | step 16520 | loss 0.0083 0.0758 0.3437 1.0572 | lr 1.7e-04 | norm 0.1035 | dt 0.030 | |
| type train | step 16530 | loss 0.0081 0.0756 0.3519 1.0900 | lr 1.7e-04 | norm 0.0980 | dt 0.029 | |
| type train | step 16540 | loss 0.0083 0.0759 0.3459 1.0713 | lr 1.7e-04 | norm 0.0799 | dt 0.026 | |
| type train | step 16550 | loss 0.0087 0.0789 0.3697 1.1456 | lr 1.7e-04 | norm 0.0961 | dt 0.026 | |
| type train | step 16560 | loss 0.0082 0.0758 0.3480 1.0746 | lr 1.7e-04 | norm 0.1130 | dt 0.025 | |
| type train | step 16570 | loss 0.0082 0.0733 0.3389 1.0612 | lr 1.7e-04 | norm 0.0833 | dt 0.026 | |
| type train | step 16580 | loss 0.0084 0.0763 0.3510 1.0731 | lr 1.7e-04 | norm 0.0986 | dt 0.025 | |
| type train | step 16590 | loss 0.0086 0.0770 0.3434 1.0606 | lr 1.7e-04 | norm 0.0950 | dt 0.025 | |
| type train | step 16600 | loss 0.0083 0.0755 0.3454 1.0742 | lr 1.7e-04 | norm 0.0867 | dt 0.025 | |
| type train | step 16610 | loss 0.0090 0.0758 0.3480 1.0853 | lr 1.7e-04 | norm 0.1391 | dt 0.025 | |
| type train | step 16620 | loss 0.0085 0.0774 0.3540 1.1019 | lr 1.7e-04 | norm 0.1224 | dt 0.025 | |
| type train | step 16630 | loss 0.0085 0.0756 0.3538 1.0967 | lr 1.7e-04 | norm 0.1003 | dt 0.026 | |
| type train | step 16640 | loss 0.0082 0.0764 0.3477 1.0611 | lr 1.7e-04 | norm 0.1107 | dt 0.025 | |
| type train | step 16650 | loss 0.0084 0.0769 0.3468 1.0744 | lr 1.7e-04 | norm 0.0991 | dt 0.025 | |
| type train | step 16660 | loss 0.0083 0.0756 0.3494 1.0929 | lr 1.7e-04 | norm 0.1086 | dt 0.025 | |
| type train | step 16670 | loss 0.0089 0.0782 0.3600 1.1186 | lr 1.6e-04 | norm 0.1010 | dt 0.026 | |
| type train | step 16680 | loss 0.0087 0.0756 0.3476 1.0877 | lr 1.6e-04 | norm 0.1064 | dt 0.026 | |
| type train | step 16690 | loss 0.0083 0.0757 0.3541 1.0893 | lr 1.6e-04 | norm 0.0941 | dt 0.025 | |
| type train | step 16700 | loss 0.0082 0.0758 0.3461 1.0659 | lr 1.6e-04 | norm 0.1002 | dt 0.025 | |
| type train | step 16710 | loss 0.0085 0.0793 0.3586 1.0918 | lr 1.6e-04 | norm 0.1009 | dt 0.026 | |
| type train | step 16720 | loss 0.0085 0.0750 0.3412 1.0679 | lr 1.6e-04 | norm 0.0895 | dt 0.025 | |
| type train | step 16730 | loss 0.0083 0.0750 0.3438 1.0780 | lr 1.6e-04 | norm 0.0967 | dt 0.026 | |
| type train | step 16740 | loss 0.0086 0.0751 0.3410 1.0628 | lr 1.6e-04 | norm 0.0855 | dt 0.026 | |
| type train | step 16750 | loss 0.0087 0.0758 0.3558 1.1113 | lr 1.6e-04 | norm 0.1086 | dt 0.025 | |
| type train | step 16760 | loss 0.0084 0.0745 0.3376 1.0584 | lr 1.6e-04 | norm 0.1115 | dt 0.025 | |
| type train | step 16770 | loss 0.0086 0.0783 0.3487 1.0732 | lr 1.6e-04 | norm 0.1150 | dt 0.025 | |
| type train | step 16780 | loss 0.0086 0.0758 0.3495 1.0860 | lr 1.6e-04 | norm 0.1116 | dt 0.025 | |
| type train | step 16790 | loss 0.0086 0.0746 0.3461 1.0832 | lr 1.6e-04 | norm 0.1123 | dt 0.025 | |
| type train | step 16800 | loss 0.0086 0.0770 0.3577 1.1033 | lr 1.6e-04 | norm 0.0876 | dt 0.026 | |
| type train | step 16810 | loss 0.0083 0.0761 0.3551 1.0993 | lr 1.6e-04 | norm 0.0910 | dt 0.026 | |
| type train | step 16820 | loss 0.0083 0.0762 0.3473 1.0803 | lr 1.6e-04 | norm 0.1091 | dt 0.026 | |
| type train | step 16830 | loss 0.0083 0.0735 0.3349 1.0415 | lr 1.6e-04 | norm 0.1066 | dt 0.025 | |
| type train | step 16840 | loss 0.0084 0.0762 0.3442 1.0776 | lr 1.6e-04 | norm 0.1033 | dt 0.025 | |
| type train | step 16850 | loss 0.0087 0.0756 0.3534 1.1120 | lr 1.6e-04 | norm 0.1108 | dt 0.026 | |
| type train | step 16860 | loss 0.0083 0.0760 0.3548 1.0974 | lr 1.6e-04 | norm 0.1014 | dt 0.025 | |
| type train | step 16870 | loss 0.0087 0.0762 0.3443 1.0665 | lr 1.6e-04 | norm 0.1100 | dt 0.025 | |
| type train | step 16880 | loss 0.0084 0.0761 0.3510 1.0813 | lr 1.6e-04 | norm 0.0945 | dt 0.025 | |
| type train | step 16890 | loss 0.0084 0.0765 0.3453 1.0801 | lr 1.6e-04 | norm 0.1157 | dt 0.026 | |
| type train | step 16900 | loss 0.0084 0.0777 0.3571 1.1009 | lr 1.6e-04 | norm 0.0923 | dt 0.026 | |
| type train | step 16910 | loss 0.0084 0.0742 0.3394 1.0654 | lr 1.6e-04 | norm 0.1088 | dt 0.027 | |
| type train | step 16920 | loss 0.0086 0.0769 0.3539 1.0940 | lr 1.6e-04 | norm 0.0833 | dt 0.027 | |
| type train | step 16930 | loss 0.0082 0.0739 0.3357 1.0471 | lr 1.6e-04 | norm 0.1113 | dt 0.027 | |
| type train | step 16940 | loss 0.0085 0.0749 0.3527 1.0888 | lr 1.5e-04 | norm 0.0944 | dt 0.026 | |
| type train | step 16950 | loss 0.0084 0.0768 0.3528 1.0881 | lr 1.5e-04 | norm 0.0922 | dt 0.027 | |
| type train | step 16960 | loss 0.0084 0.0778 0.3580 1.1056 | lr 1.5e-04 | norm 0.0978 | dt 0.029 | |
| type train | step 16970 | loss 0.0086 0.0759 0.3492 1.0907 | lr 1.5e-04 | norm 0.1151 | dt 0.031 | |
| type train | step 16980 | loss 0.0085 0.0774 0.3632 1.1176 | lr 1.5e-04 | norm 0.1010 | dt 0.035 | |
| type train | step 16990 | loss 0.0084 0.0773 0.3611 1.1087 | lr 1.5e-04 | norm 0.1227 | dt 0.035 | |
| type train | step 17000 | loss 0.0085 0.0769 0.3604 1.1212 | lr 1.5e-04 | norm 0.0958 | dt 0.035 | |
| type train | step 17010 | loss 0.0082 0.0750 0.3404 1.0608 | lr 1.5e-04 | norm 0.0964 | dt 0.032 | |
| type train | step 17020 | loss 0.0082 0.0768 0.3516 1.0899 | lr 1.5e-04 | norm 0.1061 | dt 0.031 | |
| type train | step 17030 | loss 0.0087 0.0750 0.3473 1.0813 | lr 1.5e-04 | norm 0.1204 | dt 0.033 | |
| type train | step 17040 | loss 0.0084 0.0776 0.3664 1.1311 | lr 1.5e-04 | norm 0.1019 | dt 0.032 | |
| type train | step 17050 | loss 0.0084 0.0761 0.3440 1.0689 | lr 1.5e-04 | norm 0.1374 | dt 0.032 | |
| type train | step 17060 | loss 0.0085 0.0763 0.3559 1.1039 | lr 1.5e-04 | norm 0.1109 | dt 0.032 | |
| type train | step 17070 | loss 0.0082 0.0752 0.3448 1.0667 | lr 1.5e-04 | norm 0.0992 | dt 0.031 | |
| type train | step 17080 | loss 0.0083 0.0775 0.3661 1.1237 | lr 1.5e-04 | norm 0.1092 | dt 0.030 | |
| type train | step 17090 | loss 0.0083 0.0763 0.3514 1.0905 | lr 1.5e-04 | norm 0.0897 | dt 0.027 | |
| type train | step 17100 | loss 0.0084 0.0776 0.3581 1.1110 | lr 1.5e-04 | norm 0.0985 | dt 0.026 | |
| type train | step 17110 | loss 0.0084 0.0742 0.3407 1.0661 | lr 1.5e-04 | norm 0.1051 | dt 0.026 | |
| type train | step 17120 | loss 0.0087 0.0775 0.3621 1.1214 | lr 1.5e-04 | norm 0.1181 | dt 0.025 | |
| type train | step 17130 | loss 0.0083 0.0757 0.3435 1.0570 | lr 1.5e-04 | norm 0.0996 | dt 0.025 | |
| type train | step 17140 | loss 0.0081 0.0755 0.3516 1.0895 | lr 1.5e-04 | norm 0.1011 | dt 0.025 | |
| type train | step 17150 | loss 0.0083 0.0758 0.3457 1.0711 | lr 1.5e-04 | norm 0.0836 | dt 0.025 | |
| type train | step 17160 | loss 0.0087 0.0789 0.3695 1.1448 | lr 1.5e-04 | norm 0.0949 | dt 0.025 | |
| type train | step 17170 | loss 0.0082 0.0757 0.3478 1.0742 | lr 1.5e-04 | norm 0.1139 | dt 0.026 | |
| type train | step 17180 | loss 0.0082 0.0732 0.3387 1.0612 | lr 1.5e-04 | norm 0.0864 | dt 0.025 | |
| type train | step 17190 | loss 0.0084 0.0763 0.3508 1.0729 | lr 1.5e-04 | norm 0.0979 | dt 0.025 | |
| type train | step 17200 | loss 0.0086 0.0770 0.3434 1.0602 | lr 1.5e-04 | norm 0.0974 | dt 0.027 | |
| type train | step 17210 | loss 0.0083 0.0754 0.3452 1.0740 | lr 1.5e-04 | norm 0.0850 | dt 0.025 | |
| type train | step 17220 | loss 0.0089 0.0757 0.3478 1.0849 | lr 1.5e-04 | norm 0.1371 | dt 0.025 | |
| type train | step 17230 | loss 0.0084 0.0772 0.3538 1.1013 | lr 1.5e-04 | norm 0.1144 | dt 0.025 | |
| type train | step 17240 | loss 0.0084 0.0755 0.3537 1.0965 | lr 1.4e-04 | norm 0.0976 | dt 0.025 | |
| type train | step 17250 | loss 0.0082 0.0763 0.3476 1.0608 | lr 1.4e-04 | norm 0.1107 | dt 0.025 | |
| type train | step 17260 | loss 0.0084 0.0768 0.3466 1.0741 | lr 1.4e-04 | norm 0.1016 | dt 0.026 | |
| type train | step 17270 | loss 0.0083 0.0755 0.3492 1.0929 | lr 1.4e-04 | norm 0.1042 | dt 0.027 | |
| type train | step 17280 | loss 0.0089 0.0782 0.3598 1.1177 | lr 1.4e-04 | norm 0.1054 | dt 0.026 | |
| type train | step 17290 | loss 0.0086 0.0755 0.3474 1.0867 | lr 1.4e-04 | norm 0.0995 | dt 0.025 | |
| type train | step 17300 | loss 0.0083 0.0757 0.3540 1.0890 | lr 1.4e-04 | norm 0.0956 | dt 0.025 | |
| type train | step 17310 | loss 0.0082 0.0757 0.3459 1.0656 | lr 1.4e-04 | norm 0.1061 | dt 0.026 | |
| type train | step 17320 | loss 0.0085 0.0792 0.3583 1.0914 | lr 1.4e-04 | norm 0.0997 | dt 0.026 | |
| type train | step 17330 | loss 0.0084 0.0749 0.3411 1.0675 | lr 1.4e-04 | norm 0.0926 | dt 0.025 | |
| type train | step 17340 | loss 0.0083 0.0749 0.3437 1.0773 | lr 1.4e-04 | norm 0.0958 | dt 0.026 | |
| type train | step 17350 | loss 0.0086 0.0750 0.3409 1.0627 | lr 1.4e-04 | norm 0.0817 | dt 0.026 | |
| type train | step 17360 | loss 0.0087 0.0757 0.3557 1.1108 | lr 1.4e-04 | norm 0.1066 | dt 0.025 | |
| type train | step 17370 | loss 0.0084 0.0745 0.3374 1.0578 | lr 1.4e-04 | norm 0.1083 | dt 0.026 | |
| type train | step 17380 | loss 0.0085 0.0782 0.3487 1.0724 | lr 1.4e-04 | norm 0.1128 | dt 0.026 | |
| type train | step 17390 | loss 0.0086 0.0757 0.3494 1.0852 | lr 1.4e-04 | norm 0.1087 | dt 0.025 | |
| type train | step 17400 | loss 0.0085 0.0746 0.3459 1.0829 | lr 1.4e-04 | norm 0.1109 | dt 0.026 | |
| type train | step 17410 | loss 0.0085 0.0769 0.3575 1.1026 | lr 1.4e-04 | norm 0.0862 | dt 0.026 | |
| type train | step 17420 | loss 0.0083 0.0761 0.3549 1.0986 | lr 1.4e-04 | norm 0.0889 | dt 0.025 | |
| type train | step 17430 | loss 0.0083 0.0762 0.3471 1.0800 | lr 1.4e-04 | norm 0.1096 | dt 0.025 | |
| type train | step 17440 | loss 0.0083 0.0735 0.3348 1.0414 | lr 1.4e-04 | norm 0.1008 | dt 0.026 | |
| type train | step 17450 | loss 0.0083 0.0761 0.3441 1.0771 | lr 1.4e-04 | norm 0.1031 | dt 0.025 | |
| type train | step 17460 | loss 0.0087 0.0755 0.3533 1.1112 | lr 1.4e-04 | norm 0.1085 | dt 0.026 | |
| type train | step 17470 | loss 0.0083 0.0760 0.3548 1.0971 | lr 1.4e-04 | norm 0.1009 | dt 0.026 | |
| type train | step 17480 | loss 0.0087 0.0761 0.3441 1.0665 | lr 1.4e-04 | norm 0.1123 | dt 0.025 | |
| type train | step 17490 | loss 0.0083 0.0760 0.3508 1.0811 | lr 1.4e-04 | norm 0.0923 | dt 0.026 | |
| type train | step 17500 | loss 0.0083 0.0764 0.3453 1.0798 | lr 1.4e-04 | norm 0.1145 | dt 0.026 | |
| type train | step 17510 | loss 0.0084 0.0777 0.3570 1.1010 | lr 1.4e-04 | norm 0.0895 | dt 0.035 | |
| type train | step 17520 | loss 0.0084 0.0741 0.3392 1.0648 | lr 1.4e-04 | norm 0.1110 | dt 0.033 | |
| type train | step 17530 | loss 0.0086 0.0768 0.3539 1.0932 | lr 1.4e-04 | norm 0.0830 | dt 0.033 | |
| type train | step 17540 | loss 0.0082 0.0738 0.3355 1.0468 | lr 1.4e-04 | norm 0.1094 | dt 0.033 | |
| type train | step 17550 | loss 0.0085 0.0749 0.3527 1.0884 | lr 1.4e-04 | norm 0.0900 | dt 0.033 | |
| type train | step 17560 | loss 0.0084 0.0767 0.3526 1.0879 | lr 1.4e-04 | norm 0.0891 | dt 0.032 | |
| type train | step 17570 | loss 0.0084 0.0777 0.3579 1.1053 | lr 1.3e-04 | norm 0.1009 | dt 0.032 | |
| type train | step 17580 | loss 0.0086 0.0758 0.3490 1.0903 | lr 1.3e-04 | norm 0.1161 | dt 0.033 | |
| type train | step 17590 | loss 0.0085 0.0773 0.3630 1.1173 | lr 1.3e-04 | norm 0.1015 | dt 0.032 | |
| type train | step 17600 | loss 0.0084 0.0772 0.3610 1.1087 | lr 1.3e-04 | norm 0.1216 | dt 0.032 | |
| type train | step 17610 | loss 0.0085 0.0768 0.3603 1.1209 | lr 1.3e-04 | norm 0.0942 | dt 0.032 | |
| type train | step 17620 | loss 0.0081 0.0750 0.3402 1.0604 | lr 1.3e-04 | norm 0.0995 | dt 0.032 | |
| type train | step 17630 | loss 0.0082 0.0768 0.3514 1.0896 | lr 1.3e-04 | norm 0.1053 | dt 0.031 | |
| type train | step 17640 | loss 0.0087 0.0750 0.3472 1.0812 | lr 1.3e-04 | norm 0.1244 | dt 0.032 | |
| type train | step 17650 | loss 0.0083 0.0775 0.3663 1.1314 | lr 1.3e-04 | norm 0.0981 | dt 0.031 | |
| type train | step 17660 | loss 0.0083 0.0760 0.3438 1.0686 | lr 1.3e-04 | norm 0.1320 | dt 0.031 | |
| type train | step 17670 | loss 0.0085 0.0762 0.3558 1.1038 | lr 1.3e-04 | norm 0.1098 | dt 0.032 | |
| type train | step 17680 | loss 0.0081 0.0752 0.3446 1.0663 | lr 1.3e-04 | norm 0.1009 | dt 0.032 | |
| type train | step 17690 | loss 0.0083 0.0775 0.3661 1.1228 | lr 1.3e-04 | norm 0.1051 | dt 0.032 | |
| type train | step 17700 | loss 0.0083 0.0763 0.3513 1.0903 | lr 1.3e-04 | norm 0.0900 | dt 0.029 | |
| type train | step 17710 | loss 0.0084 0.0774 0.3579 1.1109 | lr 1.3e-04 | norm 0.1064 | dt 0.026 | |
| type train | step 17720 | loss 0.0084 0.0741 0.3405 1.0657 | lr 1.3e-04 | norm 0.1026 | dt 0.026 | |
| type train | step 17730 | loss 0.0086 0.0775 0.3620 1.1207 | lr 1.3e-04 | norm 0.1179 | dt 0.025 | |
| type train | step 17740 | loss 0.0083 0.0757 0.3434 1.0568 | lr 1.3e-04 | norm 0.1024 | dt 0.026 | |
| type train | step 17750 | loss 0.0081 0.0755 0.3516 1.0892 | lr 1.3e-04 | norm 0.0953 | dt 0.026 | |
| type train | step 17760 | loss 0.0082 0.0758 0.3456 1.0710 | lr 1.3e-04 | norm 0.0800 | dt 0.026 | |
| type train | step 17770 | loss 0.0087 0.0788 0.3693 1.1445 | lr 1.3e-04 | norm 0.0975 | dt 0.025 | |
| type train | step 17780 | loss 0.0082 0.0756 0.3477 1.0738 | lr 1.3e-04 | norm 0.1093 | dt 0.025 | |
| type train | step 17790 | loss 0.0082 0.0732 0.3387 1.0609 | lr 1.3e-04 | norm 0.0863 | dt 0.025 | |
| type train | step 17800 | loss 0.0084 0.0762 0.3505 1.0727 | lr 1.3e-04 | norm 0.0990 | dt 0.025 | |
| type train | step 17810 | loss 0.0086 0.0769 0.3432 1.0598 | lr 1.3e-04 | norm 0.0929 | dt 0.025 | |
| type train | step 17820 | loss 0.0083 0.0754 0.3452 1.0737 | lr 1.3e-04 | norm 0.0867 | dt 0.025 | |
| type train | step 17830 | loss 0.0089 0.0756 0.3476 1.0846 | lr 1.3e-04 | norm 0.1387 | dt 0.025 | |
| type train | step 17840 | loss 0.0084 0.0771 0.3536 1.1008 | lr 1.3e-04 | norm 0.1108 | dt 0.025 | |
| type train | step 17850 | loss 0.0084 0.0754 0.3536 1.0962 | lr 1.3e-04 | norm 0.1000 | dt 0.025 | |
| type train | step 17860 | loss 0.0082 0.0762 0.3474 1.0598 | lr 1.3e-04 | norm 0.1096 | dt 0.026 | |
| type train | step 17870 | loss 0.0084 0.0768 0.3466 1.0741 | lr 1.3e-04 | norm 0.0986 | dt 0.026 | |
| type train | step 17880 | loss 0.0083 0.0755 0.3491 1.0929 | lr 1.3e-04 | norm 0.1031 | dt 0.025 | |
| type train | step 17890 | loss 0.0088 0.0782 0.3596 1.1173 | lr 1.3e-04 | norm 0.1001 | dt 0.025 | |
| type train | step 17900 | loss 0.0086 0.0754 0.3475 1.0865 | lr 1.3e-04 | norm 0.1001 | dt 0.025 | |
| type train | step 17910 | loss 0.0083 0.0755 0.3539 1.0887 | lr 1.3e-04 | norm 0.0916 | dt 0.026 | |
| type train | step 17920 | loss 0.0082 0.0757 0.3458 1.0654 | lr 1.3e-04 | norm 0.0991 | dt 0.025 | |
| type train | step 17930 | loss 0.0085 0.0792 0.3582 1.0912 | lr 1.3e-04 | norm 0.1076 | dt 0.025 | |
| type train | step 17940 | loss 0.0084 0.0749 0.3410 1.0671 | lr 1.3e-04 | norm 0.0910 | dt 0.025 | |
| type train | step 17950 | loss 0.0083 0.0749 0.3436 1.0769 | lr 1.2e-04 | norm 0.1019 | dt 0.025 | |
| type train | step 17960 | loss 0.0086 0.0750 0.3407 1.0626 | lr 1.2e-04 | norm 0.0822 | dt 0.026 | |
| type train | step 17970 | loss 0.0086 0.0757 0.3556 1.1105 | lr 1.2e-04 | norm 0.1073 | dt 0.026 | |
| type train | step 17980 | loss 0.0083 0.0744 0.3373 1.0573 | lr 1.2e-04 | norm 0.1088 | dt 0.025 | |
| type train | step 17990 | loss 0.0085 0.0782 0.3485 1.0722 | lr 1.2e-04 | norm 0.1133 | dt 0.025 | |
| type train | step 18000 | loss 0.0086 0.0757 0.3493 1.0849 | lr 1.2e-04 | norm 0.1067 | dt 0.025 | |
| type train | step 18010 | loss 0.0085 0.0745 0.3458 1.0825 | lr 1.2e-04 | norm 0.1099 | dt 0.026 | |
| type train | step 18020 | loss 0.0085 0.0769 0.3574 1.1025 | lr 1.2e-04 | norm 0.0849 | dt 0.027 | |
| type train | step 18030 | loss 0.0083 0.0760 0.3548 1.0980 | lr 1.2e-04 | norm 0.0945 | dt 0.026 | |
| type train | step 18040 | loss 0.0082 0.0761 0.3469 1.0794 | lr 1.2e-04 | norm 0.1081 | dt 0.026 | |
| type train | step 18050 | loss 0.0083 0.0734 0.3347 1.0408 | lr 1.2e-04 | norm 0.0991 | dt 0.026 | |
| type train | step 18060 | loss 0.0083 0.0761 0.3439 1.0770 | lr 1.2e-04 | norm 0.1070 | dt 0.026 | |
| type train | step 18070 | loss 0.0087 0.0755 0.3531 1.1110 | lr 1.2e-04 | norm 0.1146 | dt 0.026 | |
| type train | step 18080 | loss 0.0083 0.0759 0.3546 1.0965 | lr 1.2e-04 | norm 0.1011 | dt 0.026 | |
| type train | step 18090 | loss 0.0086 0.0761 0.3438 1.0661 | lr 1.2e-04 | norm 0.1113 | dt 0.026 | |
| type train | step 18100 | loss 0.0083 0.0760 0.3507 1.0811 | lr 1.2e-04 | norm 0.0878 | dt 0.026 | |
| type train | step 18110 | loss 0.0083 0.0764 0.3452 1.0793 | lr 1.2e-04 | norm 0.1150 | dt 0.026 | |
| type train | step 18120 | loss 0.0084 0.0777 0.3568 1.1005 | lr 1.2e-04 | norm 0.0894 | dt 0.026 | |
| type train | step 18130 | loss 0.0084 0.0741 0.3391 1.0647 | lr 1.2e-04 | norm 0.1133 | dt 0.026 | |
| type train | step 18140 | loss 0.0086 0.0768 0.3538 1.0932 | lr 1.2e-04 | norm 0.0868 | dt 0.026 | |
| type train | step 18150 | loss 0.0082 0.0738 0.3354 1.0466 | lr 1.2e-04 | norm 0.1114 | dt 0.026 | |
| type train | step 18160 | loss 0.0085 0.0748 0.3526 1.0885 | lr 1.2e-04 | norm 0.0909 | dt 0.026 | |
| type train | step 18170 | loss 0.0084 0.0767 0.3525 1.0875 | lr 1.2e-04 | norm 0.0891 | dt 0.026 | |
| type train | step 18180 | loss 0.0084 0.0777 0.3578 1.1050 | lr 1.2e-04 | norm 0.0974 | dt 0.026 | |
| type train | step 18190 | loss 0.0086 0.0757 0.3488 1.0899 | lr 1.2e-04 | norm 0.1106 | dt 0.026 | |
| type train | step 18200 | loss 0.0084 0.0773 0.3629 1.1168 | lr 1.2e-04 | norm 0.1018 | dt 0.026 | |
| type train | step 18210 | loss 0.0084 0.0772 0.3609 1.1085 | lr 1.2e-04 | norm 0.1234 | dt 0.026 | |
| type train | step 18220 | loss 0.0085 0.0768 0.3602 1.1206 | lr 1.2e-04 | norm 0.0970 | dt 0.026 | |
| type train | step 18230 | loss 0.0081 0.0749 0.3401 1.0602 | lr 1.2e-04 | norm 0.0983 | dt 0.026 | |
| type train | step 18240 | loss 0.0082 0.0767 0.3514 1.0894 | lr 1.2e-04 | norm 0.1060 | dt 0.026 | |
| type train | step 18250 | loss 0.0086 0.0749 0.3470 1.0807 | lr 1.2e-04 | norm 0.1245 | dt 0.026 | |
| type train | step 18260 | loss 0.0083 0.0775 0.3661 1.1311 | lr 1.2e-04 | norm 0.0994 | dt 0.026 | |
| type train | step 18270 | loss 0.0084 0.0759 0.3436 1.0685 | lr 1.2e-04 | norm 0.1338 | dt 0.026 | |
| type train | step 18280 | loss 0.0085 0.0762 0.3556 1.1035 | lr 1.2e-04 | norm 0.1046 | dt 0.027 | |
| type train | step 18290 | loss 0.0081 0.0751 0.3446 1.0658 | lr 1.2e-04 | norm 0.0970 | dt 0.027 | |
| type train | step 18300 | loss 0.0083 0.0775 0.3659 1.1226 | lr 1.2e-04 | norm 0.1037 | dt 0.029 | |
| type train | step 18310 | loss 0.0083 0.0762 0.3511 1.0901 | lr 1.2e-04 | norm 0.0953 | dt 0.026 | |
| type train | step 18320 | loss 0.0084 0.0774 0.3578 1.1104 | lr 1.2e-04 | norm 0.1007 | dt 0.026 | |
| type train | step 18330 | loss 0.0083 0.0741 0.3403 1.0651 | lr 1.2e-04 | norm 0.1015 | dt 0.026 | |
| type train | step 18340 | loss 0.0086 0.0774 0.3619 1.1207 | lr 1.2e-04 | norm 0.1152 | dt 0.027 | |
| type train | step 18350 | loss 0.0083 0.0756 0.3434 1.0566 | lr 1.2e-04 | norm 0.1002 | dt 0.026 | |
| type train | step 18360 | loss 0.0081 0.0754 0.3516 1.0889 | lr 1.2e-04 | norm 0.0990 | dt 0.026 | |
| type train | step 18370 | loss 0.0082 0.0757 0.3454 1.0704 | lr 1.2e-04 | norm 0.0799 | dt 0.026 | |
| type train | step 18380 | loss 0.0087 0.0787 0.3693 1.1441 | lr 1.2e-04 | norm 0.0976 | dt 0.026 | |
| type train | step 18390 | loss 0.0082 0.0756 0.3475 1.0738 | lr 1.2e-04 | norm 0.1090 | dt 0.026 | |
| type train | step 18400 | loss 0.0081 0.0732 0.3385 1.0607 | lr 1.2e-04 | norm 0.0803 | dt 0.026 | |
| type train | step 18410 | loss 0.0084 0.0762 0.3503 1.0722 | lr 1.2e-04 | norm 0.0989 | dt 0.026 | |
| type train | step 18420 | loss 0.0085 0.0769 0.3431 1.0596 | lr 1.1e-04 | norm 0.0948 | dt 0.027 | |
| type train | step 18430 | loss 0.0082 0.0754 0.3451 1.0735 | lr 1.1e-04 | norm 0.0883 | dt 0.027 | |
| type train | step 18440 | loss 0.0089 0.0756 0.3474 1.0841 | lr 1.1e-04 | norm 0.1363 | dt 0.027 | |
| type train | step 18450 | loss 0.0084 0.0771 0.3534 1.1010 | lr 1.1e-04 | norm 0.1112 | dt 0.027 | |
| type train | step 18460 | loss 0.0084 0.0754 0.3535 1.0963 | lr 1.1e-04 | norm 0.1039 | dt 0.033 | |
| type train | step 18470 | loss 0.0082 0.0762 0.3474 1.0597 | lr 1.1e-04 | norm 0.1095 | dt 0.035 | |
| type train | step 18480 | loss 0.0084 0.0768 0.3464 1.0738 | lr 1.1e-04 | norm 0.0972 | dt 0.035 | |
| type train | step 18490 | loss 0.0083 0.0755 0.3491 1.0928 | lr 1.1e-04 | norm 0.1027 | dt 0.035 | |
| type train | step 18500 | loss 0.0088 0.0781 0.3595 1.1170 | lr 1.1e-04 | norm 0.1024 | dt 0.035 | |
| type train | step 18510 | loss 0.0086 0.0754 0.3476 1.0865 | lr 1.1e-04 | norm 0.0986 | dt 0.032 | |
| type train | step 18520 | loss 0.0082 0.0755 0.3538 1.0887 | lr 1.1e-04 | norm 0.0956 | dt 0.031 | |
| type train | step 18530 | loss 0.0082 0.0756 0.3456 1.0648 | lr 1.1e-04 | norm 0.0946 | dt 0.031 | |
| type train | step 18540 | loss 0.0084 0.0792 0.3582 1.0908 | lr 1.1e-04 | norm 0.1012 | dt 0.032 | |
| type train | step 18550 | loss 0.0084 0.0749 0.3409 1.0670 | lr 1.1e-04 | norm 0.0901 | dt 0.032 | |
| type train | step 18560 | loss 0.0083 0.0748 0.3435 1.0765 | lr 1.1e-04 | norm 0.0976 | dt 0.032 | |
| type train | step 18570 | loss 0.0085 0.0749 0.3408 1.0620 | lr 1.1e-04 | norm 0.0829 | dt 0.031 | |
| type train | step 18580 | loss 0.0086 0.0757 0.3555 1.1104 | lr 1.1e-04 | norm 0.1057 | dt 0.031 | |
| type train | step 18590 | loss 0.0083 0.0744 0.3371 1.0571 | lr 1.1e-04 | norm 0.1057 | dt 0.031 | |
| type train | step 18600 | loss 0.0085 0.0782 0.3485 1.0719 | lr 1.1e-04 | norm 0.1136 | dt 0.031 | |
| type train | step 18610 | loss 0.0086 0.0756 0.3492 1.0845 | lr 1.1e-04 | norm 0.1110 | dt 0.031 | |
| type train | step 18620 | loss 0.0085 0.0745 0.3456 1.0818 | lr 1.1e-04 | norm 0.1132 | dt 0.031 | |
| type train | step 18630 | loss 0.0085 0.0768 0.3574 1.1023 | lr 1.1e-04 | norm 0.0906 | dt 0.031 | |
| type train | step 18640 | loss 0.0082 0.0760 0.3546 1.0973 | lr 1.1e-04 | norm 0.0903 | dt 0.031 | |
| type train | step 18650 | loss 0.0082 0.0760 0.3468 1.0790 | lr 1.1e-04 | norm 0.1074 | dt 0.031 | |
| type train | step 18660 | loss 0.0083 0.0734 0.3346 1.0409 | lr 1.1e-04 | norm 0.0989 | dt 0.031 | |
| type train | step 18670 | loss 0.0083 0.0760 0.3438 1.0771 | lr 1.1e-04 | norm 0.1063 | dt 0.027 | |
| type train | step 18680 | loss 0.0087 0.0755 0.3530 1.1107 | lr 1.1e-04 | norm 0.1077 | dt 0.026 | |
| type train | step 18690 | loss 0.0083 0.0759 0.3545 1.0964 | lr 1.1e-04 | norm 0.1012 | dt 0.026 | |
| type train | step 18700 | loss 0.0086 0.0761 0.3437 1.0661 | lr 1.1e-04 | norm 0.1133 | dt 0.026 | |
| type train | step 18710 | loss 0.0083 0.0760 0.3504 1.0810 | lr 1.1e-04 | norm 0.0847 | dt 0.026 | |
| type train | step 18720 | loss 0.0083 0.0763 0.3451 1.0788 | lr 1.1e-04 | norm 0.1156 | dt 0.025 | |
| type train | step 18730 | loss 0.0084 0.0776 0.3567 1.1005 | lr 1.1e-04 | norm 0.0880 | dt 0.025 | |
| type train | step 18740 | loss 0.0083 0.0741 0.3390 1.0644 | lr 1.1e-04 | norm 0.1066 | dt 0.026 | |
| type train | step 18750 | loss 0.0085 0.0767 0.3535 1.0927 | lr 1.1e-04 | norm 0.0818 | dt 0.026 | |
| type train | step 18760 | loss 0.0082 0.0738 0.3353 1.0465 | lr 1.1e-04 | norm 0.1128 | dt 0.025 | |
| type train | step 18770 | loss 0.0085 0.0748 0.3525 1.0884 | lr 1.1e-04 | norm 0.0904 | dt 0.025 | |
| type train | step 18780 | loss 0.0083 0.0766 0.3525 1.0869 | lr 1.1e-04 | norm 0.0866 | dt 0.025 | |
| type train | step 18790 | loss 0.0083 0.0776 0.3577 1.1048 | lr 1.1e-04 | norm 0.1026 | dt 0.026 | |
| type train | step 18800 | loss 0.0086 0.0758 0.3487 1.0898 | lr 1.1e-04 | norm 0.1118 | dt 0.026 | |
| type train | step 18810 | loss 0.0084 0.0773 0.3629 1.1164 | lr 1.1e-04 | norm 0.1107 | dt 0.026 | |
| type train | step 18820 | loss 0.0084 0.0771 0.3609 1.1080 | lr 1.1e-04 | norm 0.1242 | dt 0.025 | |
| type train | step 18830 | loss 0.0084 0.0768 0.3601 1.1208 | lr 1.1e-04 | norm 0.0964 | dt 0.025 | |
| type train | step 18840 | loss 0.0081 0.0748 0.3400 1.0598 | lr 1.1e-04 | norm 0.1001 | dt 0.025 | |
| type train | step 18850 | loss 0.0082 0.0767 0.3512 1.0891 | lr 1.1e-04 | norm 0.1085 | dt 0.026 | |
| type train | step 18860 | loss 0.0086 0.0749 0.3467 1.0803 | lr 1.1e-04 | norm 0.1226 | dt 0.026 | |
| type train | step 18870 | loss 0.0083 0.0774 0.3660 1.1311 | lr 1.1e-04 | norm 0.1038 | dt 0.025 | |
| type train | step 18880 | loss 0.0083 0.0759 0.3435 1.0684 | lr 1.1e-04 | norm 0.1336 | dt 0.025 | |
| type train | step 18890 | loss 0.0085 0.0762 0.3555 1.1033 | lr 1.1e-04 | norm 0.1083 | dt 0.025 | |
| type train | step 18900 | loss 0.0081 0.0751 0.3445 1.0656 | lr 1.1e-04 | norm 0.0998 | dt 0.026 | |
| type train | step 18910 | loss 0.0083 0.0775 0.3658 1.1224 | lr 1.1e-04 | norm 0.1021 | dt 0.027 | |
| type train | step 18920 | loss 0.0082 0.0762 0.3510 1.0900 | lr 1.1e-04 | norm 0.0942 | dt 0.025 | |
| type train | step 18930 | loss 0.0083 0.0774 0.3577 1.1103 | lr 1.1e-04 | norm 0.1045 | dt 0.025 | |
| type train | step 18940 | loss 0.0083 0.0740 0.3401 1.0652 | lr 1.1e-04 | norm 0.1105 | dt 0.025 | |
| type train | step 18950 | loss 0.0086 0.0773 0.3617 1.1206 | lr 1.1e-04 | norm 0.1158 | dt 0.025 | |
| type train | step 18960 | loss 0.0083 0.0756 0.3433 1.0561 | lr 1.1e-04 | norm 0.0985 | dt 0.026 | |
| type train | step 18970 | loss 0.0081 0.0754 0.3515 1.0886 | lr 1.1e-04 | norm 0.0938 | dt 0.025 | |
| type train | step 18980 | loss 0.0082 0.0757 0.3454 1.0704 | lr 1.1e-04 | norm 0.0787 | dt 0.025 | |
| type train | step 18990 | loss 0.0087 0.0787 0.3692 1.1437 | lr 1.1e-04 | norm 0.1009 | dt 0.026 | |
| type train | step 19000 | loss 0.0082 0.0755 0.3474 1.0737 | lr 1.1e-04 | norm 0.1092 | dt 0.025 | |
| type train | step 19010 | loss 0.0081 0.0731 0.3385 1.0606 | lr 1.1e-04 | norm 0.0820 | dt 0.034 | |
| type train | step 19020 | loss 0.0083 0.0762 0.3502 1.0722 | lr 1.1e-04 | norm 0.0973 | dt 0.035 | |
| type train | step 19030 | loss 0.0085 0.0768 0.3429 1.0591 | lr 1.1e-04 | norm 0.0952 | dt 0.035 | |
| type train | step 19040 | loss 0.0082 0.0753 0.3450 1.0733 | lr 1.1e-04 | norm 0.0902 | dt 0.035 | |
| type train | step 19050 | loss 0.0089 0.0755 0.3473 1.0839 | lr 1.1e-04 | norm 0.1397 | dt 0.035 | |
| type train | step 19060 | loss 0.0084 0.0771 0.3533 1.1006 | lr 1.1e-04 | norm 0.1120 | dt 0.034 | |
| type train | step 19070 | loss 0.0084 0.0753 0.3533 1.0959 | lr 1.1e-04 | norm 0.0986 | dt 0.033 | |
| type train | step 19080 | loss 0.0082 0.0762 0.3471 1.0600 | lr 1.1e-04 | norm 0.1114 | dt 0.033 | |
| type train | step 19090 | loss 0.0084 0.0767 0.3463 1.0738 | lr 1.0e-04 | norm 0.0995 | dt 0.033 | |
| type train | step 19100 | loss 0.0082 0.0755 0.3490 1.0930 | lr 1.0e-04 | norm 0.1043 | dt 0.033 | |
| type train | step 19110 | loss 0.0088 0.0781 0.3595 1.1167 | lr 1.0e-04 | norm 0.1017 | dt 0.033 | |
| type train | step 19120 | loss 0.0086 0.0754 0.3475 1.0860 | lr 1.0e-04 | norm 0.1029 | dt 0.032 | |
| type train | step 19130 | loss 0.0082 0.0755 0.3537 1.0882 | lr 1.0e-04 | norm 0.0920 | dt 0.031 | |
| type train | step 19140 | loss 0.0081 0.0756 0.3456 1.0646 | lr 1.0e-04 | norm 0.1034 | dt 0.032 | |
| type train | step 19150 | loss 0.0084 0.0792 0.3582 1.0905 | lr 1.0e-04 | norm 0.1043 | dt 0.031 | |
| type train | step 19160 | loss 0.0084 0.0748 0.3409 1.0665 | lr 1.0e-04 | norm 0.0951 | dt 0.030 | |
| type train | step 19170 | loss 0.0082 0.0748 0.3434 1.0760 | lr 1.0e-04 | norm 0.0962 | dt 0.032 | |
| type train | step 19180 | loss 0.0085 0.0749 0.3407 1.0620 | lr 1.0e-04 | norm 0.0833 | dt 0.032 | |
| type train | step 19190 | loss 0.0086 0.0756 0.3555 1.1104 | lr 1.0e-04 | norm 0.1155 | dt 0.031 | |
| type train | step 19200 | loss 0.0083 0.0744 0.3370 1.0567 | lr 1.0e-04 | norm 0.1106 | dt 0.031 | |
| type train | step 19210 | loss 0.0085 0.0781 0.3485 1.0717 | lr 1.0e-04 | norm 0.1167 | dt 0.031 | |
| type train | step 19220 | loss 0.0085 0.0756 0.3491 1.0843 | lr 1.0e-04 | norm 0.1097 | dt 0.031 | |
| type train | step 19230 | loss 0.0084 0.0744 0.3455 1.0817 | lr 1.0e-04 | norm 0.1134 | dt 0.031 | |
| type train | step 19240 | loss 0.0085 0.0768 0.3573 1.1017 | lr 1.0e-04 | norm 0.0901 | dt 0.029 | |
| type train | step 19250 | loss 0.0082 0.0760 0.3546 1.0973 | lr 1.0e-04 | norm 0.0901 | dt 0.029 | |
| type train | step 19260 | loss 0.0082 0.0760 0.3467 1.0791 | lr 1.0e-04 | norm 0.1097 | dt 0.033 | |
| type train | step 19270 | loss 0.0082 0.0734 0.3346 1.0405 | lr 1.0e-04 | norm 0.1027 | dt 0.035 | |
| type train | step 19280 | loss 0.0083 0.0760 0.3437 1.0768 | lr 1.0e-04 | norm 0.1051 | dt 0.033 | |
| type train | step 19290 | loss 0.0087 0.0754 0.3528 1.1105 | lr 1.0e-04 | norm 0.1074 | dt 0.033 | |
| type train | step 19300 | loss 0.0082 0.0759 0.3545 1.0965 | lr 1.0e-04 | norm 0.1001 | dt 0.032 | |
| type train | step 19310 | loss 0.0086 0.0760 0.3437 1.0653 | lr 1.0e-04 | norm 0.1049 | dt 0.033 | |
| type train | step 19320 | loss 0.0083 0.0759 0.3503 1.0810 | lr 1.0e-04 | norm 0.0856 | dt 0.032 | |
| type train | step 19330 | loss 0.0083 0.0763 0.3450 1.0786 | lr 1.0e-04 | norm 0.1142 | dt 0.032 | |
| type train | step 19340 | loss 0.0083 0.0776 0.3566 1.1003 | lr 1.0e-04 | norm 0.0955 | dt 0.032 | |
| type train | step 19350 | loss 0.0083 0.0740 0.3389 1.0643 | lr 1.0e-04 | norm 0.1103 | dt 0.031 | |
| type train | step 19360 | loss 0.0085 0.0767 0.3536 1.0926 | lr 1.0e-04 | norm 0.0826 | dt 0.031 | |
| type train | step 19370 | loss 0.0082 0.0737 0.3353 1.0466 | lr 1.0e-04 | norm 0.1141 | dt 0.031 | |
| type train | step 19380 | loss 0.0084 0.0748 0.3525 1.0881 | lr 1.0e-04 | norm 0.0984 | dt 0.032 | |
| type train | step 19390 | loss 0.0083 0.0766 0.3524 1.0866 | lr 1.0e-04 | norm 0.0920 | dt 0.032 | |
| type train | step 19400 | loss 0.0083 0.0776 0.3575 1.1047 | lr 1.0e-04 | norm 0.1014 | dt 0.031 | |
| type train | step 19410 | loss 0.0086 0.0757 0.3487 1.0898 | lr 1.0e-04 | norm 0.1113 | dt 0.031 | |
| type train | step 19420 | loss 0.0084 0.0773 0.3628 1.1159 | lr 1.0e-04 | norm 0.1029 | dt 0.029 | |
| type train | step 19430 | loss 0.0083 0.0771 0.3608 1.1076 | lr 1.0e-04 | norm 0.1254 | dt 0.025 | |
| type train | step 19440 | loss 0.0084 0.0767 0.3601 1.1209 | lr 1.0e-04 | norm 0.0956 | dt 0.026 | |
| type train | step 19450 | loss 0.0081 0.0748 0.3399 1.0598 | lr 1.0e-04 | norm 0.0978 | dt 0.025 | |
| type train | step 19460 | loss 0.0081 0.0767 0.3511 1.0889 | lr 1.0e-04 | norm 0.1046 | dt 0.026 | |
| type train | step 19470 | loss 0.0086 0.0748 0.3466 1.0798 | lr 1.0e-04 | norm 0.1210 | dt 0.025 | |
| type train | step 19480 | loss 0.0083 0.0774 0.3658 1.1311 | lr 1.0e-04 | norm 0.1100 | dt 0.025 | |
| type train | step 19490 | loss 0.0083 0.0758 0.3435 1.0682 | lr 1.0e-04 | norm 0.1357 | dt 0.026 | |
| type train | step 19500 | loss 0.0085 0.0762 0.3555 1.1029 | lr 1.0e-04 | norm 0.1053 | dt 0.026 | |
| type train | step 19510 | loss 0.0081 0.0750 0.3444 1.0656 | lr 1.0e-04 | norm 0.1015 | dt 0.025 | |
| type train | step 19520 | loss 0.0082 0.0775 0.3657 1.1219 | lr 1.0e-04 | norm 0.1021 | dt 0.028 | |
| type train | step 19530 | loss 0.0082 0.0762 0.3508 1.0898 | lr 1.0e-04 | norm 0.0886 | dt 0.025 | |
| type train | step 19540 | loss 0.0083 0.0774 0.3577 1.1103 | lr 1.0e-04 | norm 0.1100 | dt 0.026 | |
| type train | step 19550 | loss 0.0083 0.0740 0.3402 1.0648 | lr 1.0e-04 | norm 0.1080 | dt 0.025 | |
| type train | step 19560 | loss 0.0086 0.0773 0.3618 1.1207 | lr 1.0e-04 | norm 0.1186 | dt 0.025 | |
| type train | step 19570 | loss 0.0082 0.0756 0.3433 1.0558 | lr 1.0e-04 | norm 0.1005 | dt 0.026 | |
| type train | step 19580 | loss 0.0080 0.0754 0.3515 1.0885 | lr 1.0e-04 | norm 0.0966 | dt 0.025 | |
| type train | step 19590 | loss 0.0082 0.0757 0.3454 1.0701 | lr 1.0e-04 | norm 0.0788 | dt 0.025 | |
| type train | step 19600 | loss 0.0086 0.0787 0.3690 1.1434 | lr 1.0e-04 | norm 0.0952 | dt 0.026 | |
| type train | step 19610 | loss 0.0082 0.0755 0.3474 1.0737 | lr 1.0e-04 | norm 0.1069 | dt 0.026 | |
| type train | step 19620 | loss 0.0081 0.0731 0.3385 1.0603 | lr 1.0e-04 | norm 0.0827 | dt 0.025 | |
| type train | step 19630 | loss 0.0083 0.0761 0.3501 1.0717 | lr 1.0e-04 | norm 0.0987 | dt 0.025 | |
| type train | step 19640 | loss 0.0085 0.0768 0.3429 1.0587 | lr 1.0e-04 | norm 0.1382 | dt 0.026 | |
| type train | step 19650 | loss 0.0082 0.0753 0.3451 1.0729 | lr 1.0e-04 | norm 0.1238 | dt 0.026 | |
| type train | step 19660 | loss 0.0088 0.0755 0.3473 1.0836 | lr 1.0e-04 | norm 0.1387 | dt 0.025 | |
| type train | step 19670 | loss 0.0084 0.0770 0.3533 1.1007 | lr 1.0e-04 | norm 0.1078 | dt 0.025 | |
| type train | step 19680 | loss 0.0084 0.0753 0.3533 1.0959 | lr 1.0e-04 | norm 0.0936 | dt 0.025 | |
| type train | step 19690 | loss 0.0081 0.0761 0.3471 1.0598 | lr 1.0e-04 | norm 0.1053 | dt 0.026 | |
| type train | step 19700 | loss 0.0083 0.0767 0.3462 1.0738 | lr 1.0e-04 | norm 0.0970 | dt 0.025 | |
| type train | step 19710 | loss 0.0082 0.0754 0.3490 1.0930 | lr 1.0e-04 | norm 0.1083 | dt 0.025 | |
| type train | step 19720 | loss 0.0088 0.0781 0.3594 1.1166 | lr 1.0e-04 | norm 0.0994 | dt 0.026 | |
| type train | step 19730 | loss 0.0085 0.0754 0.3473 1.0862 | lr 1.0e-04 | norm 0.0985 | dt 0.025 | |
| type train | step 19740 | loss 0.0082 0.0755 0.3537 1.0883 | lr 1.0e-04 | norm 0.0881 | dt 0.025 | |
| type train | step 19750 | loss 0.0081 0.0756 0.3455 1.0644 | lr 1.0e-04 | norm 0.0992 | dt 0.026 | |
| type train | step 19760 | loss 0.0084 0.0791 0.3582 1.0903 | lr 1.0e-04 | norm 0.0981 | dt 0.026 | |
| type train | step 19770 | loss 0.0084 0.0748 0.3408 1.0665 | lr 1.0e-04 | norm 0.0880 | dt 0.026 | |
| type train | step 19780 | loss 0.0082 0.0748 0.3434 1.0759 | lr 1.0e-04 | norm 0.0917 | dt 0.027 | |
| type train | step 19790 | loss 0.0085 0.0748 0.3407 1.0618 | lr 1.0e-04 | norm 0.0816 | dt 0.028 | |
| type train | step 19800 | loss 0.0086 0.0756 0.3556 1.1099 | lr 1.0e-04 | norm 0.1023 | dt 0.035 | |
| type train | step 19810 | loss 0.0083 0.0743 0.3370 1.0563 | lr 1.0e-04 | norm 0.1089 | dt 0.035 | |
| type train | step 19820 | loss 0.0085 0.0781 0.3484 1.0713 | lr 1.0e-04 | norm 0.1167 | dt 0.035 | |
| type train | step 19830 | loss 0.0085 0.0756 0.3490 1.0841 | lr 1.0e-04 | norm 0.1109 | dt 0.035 | |
| type train | step 19840 | loss 0.0085 0.0745 0.3455 1.0818 | lr 1.0e-04 | norm 0.1090 | dt 0.035 | |
| type train | step 19850 | loss 0.0085 0.0768 0.3573 1.1015 | lr 1.0e-04 | norm 0.0865 | dt 0.035 | |
| type train | step 19860 | loss 0.0082 0.0759 0.3546 1.0972 | lr 1.0e-04 | norm 0.0883 | dt 0.035 | |
| type train | step 19870 | loss 0.0082 0.0760 0.3466 1.0790 | lr 1.0e-04 | norm 0.1053 | dt 0.034 | |
| type train | step 19880 | loss 0.0082 0.0733 0.3345 1.0403 | lr 1.0e-04 | norm 0.0982 | dt 0.034 | |
| type train | step 19890 | loss 0.0083 0.0760 0.3437 1.0765 | lr 1.0e-04 | norm 0.1065 | dt 0.034 | |
| type train | step 19900 | loss 0.0086 0.0754 0.3528 1.1101 | lr 1.0e-04 | norm 0.1121 | dt 0.034 | |
| type train | step 19910 | loss 0.0082 0.0758 0.3545 1.0963 | lr 1.0e-04 | norm 0.0997 | dt 0.034 | |
| type train | step 19920 | loss 0.0086 0.0760 0.3437 1.0652 | lr 1.0e-04 | norm 0.1058 | dt 0.033 | |
| type train | step 19930 | loss 0.0083 0.0759 0.3503 1.0810 | lr 1.0e-04 | norm 0.0848 | dt 0.033 | |
| type train | step 19940 | loss 0.0082 0.0763 0.3451 1.0785 | lr 1.0e-04 | norm 0.1127 | dt 0.032 | |
| type train | step 19950 | loss 0.0083 0.0776 0.3565 1.1003 | lr 1.0e-04 | norm 0.0899 | dt 0.032 | |
| type train | step 19960 | loss 0.0083 0.0740 0.3389 1.0641 | lr 1.0e-04 | norm 0.1068 | dt 0.032 | |
| type train | step 19970 | loss 0.0085 0.0767 0.3535 1.0925 | lr 1.0e-04 | norm 0.0835 | dt 0.032 | |
| type train | step 19980 | loss 0.0082 0.0737 0.3352 1.0464 | lr 1.0e-04 | norm 0.1086 | dt 0.032 | |
| type train | step 19990 | loss 0.0084 0.0748 0.3526 1.0878 | lr 1.0e-04 | norm 0.0914 | dt 0.032 | |
| type train | step 20000 | loss 0.0083 0.0766 0.3524 1.0864 | lr 1.0e-04 | norm 0.0890 | dt 0.032 | |