| type train | step 10 | loss 10.3330 | lr 1.0e-03 | norm 0.7072 | dt 0.417 | |
| type train | step 20 | loss 9.7623 | lr 1.0e-03 | norm 0.7508 | dt 0.417 | |
| type train | step 30 | loss 9.1839 | lr 1.0e-03 | norm 0.8107 | dt 0.418 | |
| type train | step 10 | loss 10.3383 | lr 1.0e-03 | norm 0.7068 | dt 0.111 | |
| type train | step 20 | loss 9.7668 | lr 1.0e-03 | norm 0.7496 | dt 0.111 | |
| type train | step 30 | loss 9.1860 | lr 1.0e-03 | norm 0.8091 | dt 0.111 | |
| type train | step 40 | loss 8.6266 | lr 1.0e-03 | norm 0.8568 | dt 0.111 | |
| type train | step 50 | loss 8.1013 | lr 1.0e-03 | norm 0.8972 | dt 0.110 | |
| type train | step 60 | loss 7.6199 | lr 1.0e-03 | norm 0.9090 | dt 0.111 | |
| type train | step 70 | loss 7.1879 | lr 1.0e-03 | norm 0.9057 | dt 0.111 | |
| type train | step 80 | loss 6.8313 | lr 1.0e-03 | norm 0.8720 | dt 0.111 | |
| type train | step 90 | loss 6.5357 | lr 1.0e-03 | norm 0.8382 | dt 0.111 | |
| type train | step 100 | loss 6.3160 | lr 1.0e-03 | norm 0.7600 | dt 0.111 | |
| type train | step 110 | loss 6.1447 | lr 1.0e-03 | norm 0.7295 | dt 0.111 | |
| type train | step 120 | loss 6.0097 | lr 1.0e-03 | norm 0.7348 | dt 0.111 | |
| type train | step 130 | loss 5.8503 | lr 1.0e-03 | norm 0.8594 | dt 0.111 | |
| type train | step 140 | loss 5.7640 | lr 1.0e-03 | norm 1.0842 | dt 0.111 | |
| type train | step 150 | loss 5.6639 | lr 1.0e-03 | norm 1.7299 | dt 0.111 | |
| type train | step 160 | loss 5.5513 | lr 1.0e-03 | norm 1.7778 | dt 0.111 | |
| type train | step 170 | loss 5.4143 | lr 1.0e-03 | norm 0.6508 | dt 0.111 | |
| type train | step 180 | loss 5.3034 | lr 1.0e-03 | norm 1.6037 | dt 0.111 | |
| type train | step 190 | loss 5.1707 | lr 1.0e-03 | norm 2.4459 | dt 0.111 | |
| type train | step 200 | loss 5.0737 | lr 1.0e-03 | norm 2.2686 | dt 0.111 | |
| type train | step 210 | loss 4.9398 | lr 1.0e-03 | norm 2.9354 | dt 0.111 | |
| type train | step 220 | loss 4.8236 | lr 1.0e-03 | norm 1.6275 | dt 0.111 | |
| type train | step 230 | loss 4.7232 | lr 1.0e-03 | norm 1.8814 | dt 0.111 | |
| type train | step 240 | loss 4.6439 | lr 1.0e-03 | norm 1.5637 | dt 0.111 | |
| type train | step 250 | loss 4.5500 | lr 1.0e-03 | norm 0.8494 | dt 0.111 | |
| type train | step 260 | loss 4.4530 | lr 1.0e-03 | norm 1.4317 | dt 0.111 | |
| type train | step 270 | loss 4.4119 | lr 1.0e-03 | norm 0.9960 | dt 0.111 | |
| type train | step 280 | loss 4.3221 | lr 1.0e-03 | norm 0.7116 | dt 0.112 | |
| type train | step 290 | loss 4.2935 | lr 1.0e-03 | norm 1.2159 | dt 0.111 | |
| type train | step 300 | loss 4.2530 | lr 1.0e-03 | norm 2.5565 | dt 0.111 | |
| type train | step 310 | loss 4.2130 | lr 1.0e-03 | norm 1.2535 | dt 0.112 | |
| type train | step 320 | loss 4.1495 | lr 1.0e-03 | norm 1.6140 | dt 0.111 | |
| type train | step 330 | loss 4.0959 | lr 1.0e-03 | norm 1.8347 | dt 0.111 | |
| type train | step 340 | loss 4.0825 | lr 1.0e-03 | norm 1.1320 | dt 0.111 | |
| type train | step 350 | loss 4.0471 | lr 1.0e-03 | norm 1.4113 | dt 0.111 | |
| type train | step 360 | loss 4.0109 | lr 1.0e-03 | norm 1.1816 | dt 0.111 | |
| type train | step 370 | loss 3.9523 | lr 1.0e-03 | norm 1.0637 | dt 0.111 | |
| type train | step 380 | loss 3.9676 | lr 1.0e-03 | norm 1.3457 | dt 0.119 | |
| type train | step 390 | loss 3.9231 | lr 1.0e-03 | norm 1.5495 | dt 0.111 | |
| type train | step 400 | loss 3.9146 | lr 1.0e-03 | norm 1.7005 | dt 0.111 | |
| type train | step 410 | loss 3.8805 | lr 1.0e-03 | norm 1.6612 | dt 0.112 | |
| type train | step 420 | loss 3.8791 | lr 1.0e-03 | norm 1.4267 | dt 0.111 | |
| type train | step 430 | loss 3.8615 | lr 1.0e-03 | norm 1.7027 | dt 0.111 | |
| type train | step 440 | loss 3.8344 | lr 1.0e-03 | norm 1.9053 | dt 0.112 | |
| type train | step 450 | loss 3.8084 | lr 1.0e-03 | norm 1.8223 | dt 0.112 | |
| type train | step 460 | loss 3.8010 | lr 1.0e-03 | norm 1.9712 | dt 0.112 | |
| type train | step 470 | loss 3.7612 | lr 1.0e-03 | norm 1.8497 | dt 0.112 | |
| type train | step 480 | loss 3.7730 | lr 1.0e-03 | norm 1.7918 | dt 0.111 | |
| type train | step 490 | loss 3.7459 | lr 1.0e-03 | norm 1.9049 | dt 0.112 | |
| type train | step 500 | loss 3.7482 | lr 1.0e-03 | norm 1.8751 | dt 0.111 | |
| type train | step 510 | loss 3.6855 | lr 1.0e-03 | norm 2.7669 | dt 0.112 | |
| type train | step 520 | loss 3.6997 | lr 1.0e-03 | norm 1.9023 | dt 0.111 | |
| type train | step 530 | loss 3.7013 | lr 1.0e-03 | norm 2.7672 | dt 0.112 | |
| type train | step 540 | loss 3.6742 | lr 1.0e-03 | norm 1.8523 | dt 0.112 | |
| type train | step 550 | loss 3.6391 | lr 1.0e-03 | norm 1.9280 | dt 0.112 | |
| type train | step 560 | loss 3.6567 | lr 1.0e-03 | norm 1.8137 | dt 0.113 | |
| type train | step 570 | loss 3.6269 | lr 1.0e-03 | norm 2.7995 | dt 0.112 | |
| type train | step 580 | loss 3.6258 | lr 1.0e-03 | norm 2.6514 | dt 0.112 | |
| type train | step 590 | loss 3.6018 | lr 1.0e-03 | norm 2.5518 | dt 0.112 | |
| type train | step 600 | loss 3.5883 | lr 1.0e-03 | norm 3.9457 | dt 0.112 | |
| type train | step 610 | loss 3.5783 | lr 1.0e-03 | norm 2.1407 | dt 0.113 | |
| type train | step 620 | loss 3.5874 | lr 1.0e-03 | norm 2.6008 | dt 0.111 | |
| type train | step 630 | loss 3.5825 | lr 1.0e-03 | norm 3.5026 | dt 0.113 | |
| type train | step 640 | loss 3.5508 | lr 1.0e-03 | norm 4.6949 | dt 0.113 | |
| type train | step 650 | loss 3.5597 | lr 1.0e-03 | norm 1.5705 | dt 0.113 | |
| type train | step 660 | loss 3.5296 | lr 1.0e-03 | norm 2.8740 | dt 0.112 | |
| type train | step 670 | loss 3.5351 | lr 1.0e-03 | norm 1.4937 | dt 0.112 | |
| type train | step 680 | loss 3.5293 | lr 1.0e-03 | norm 2.2113 | dt 0.111 | |
| type train | step 690 | loss 3.5269 | lr 1.0e-03 | norm 2.9841 | dt 0.112 | |
| type train | step 700 | loss 3.4906 | lr 1.0e-03 | norm 2.4684 | dt 0.112 | |
| type train | step 710 | loss 3.4646 | lr 1.0e-03 | norm 2.3474 | dt 0.113 | |
| type train | step 720 | loss 3.4617 | lr 1.0e-03 | norm 2.7813 | dt 0.113 | |
| type train | step 730 | loss 3.4459 | lr 1.0e-03 | norm 1.5406 | dt 0.112 | |
| type train | step 740 | loss 3.4371 | lr 1.0e-03 | norm 2.6166 | dt 0.113 | |
| type train | step 750 | loss 3.4039 | lr 1.0e-03 | norm 1.5664 | dt 0.112 | |
| type train | step 760 | loss 3.4434 | lr 1.0e-03 | norm 3.0861 | dt 0.119 | |
| type train | step 770 | loss 3.4065 | lr 1.0e-03 | norm 3.5232 | dt 0.112 | |
| type train | step 780 | loss 3.4096 | lr 1.0e-03 | norm 2.8305 | dt 0.112 | |
| type train | step 790 | loss 3.3894 | lr 1.0e-03 | norm 1.8813 | dt 0.112 | |
| type train | step 800 | loss 3.4055 | lr 1.0e-03 | norm 2.5700 | dt 0.112 | |
| type train | step 810 | loss 3.3972 | lr 1.0e-03 | norm 2.5834 | dt 0.112 | |
| type train | step 820 | loss 3.3792 | lr 1.0e-03 | norm 2.5971 | dt 0.113 | |
| type train | step 830 | loss 3.3618 | lr 1.0e-03 | norm 1.8595 | dt 0.113 | |
| type train | step 840 | loss 3.3678 | lr 1.0e-03 | norm 2.5004 | dt 0.112 | |
| type train | step 850 | loss 3.3409 | lr 1.0e-03 | norm 2.9880 | dt 0.112 | |
| type train | step 860 | loss 3.3568 | lr 1.0e-03 | norm 3.4849 | dt 0.114 | |
| type train | step 870 | loss 3.3403 | lr 1.0e-03 | norm 2.4013 | dt 0.113 | |
| type train | step 880 | loss 3.3492 | lr 1.0e-03 | norm 2.1356 | dt 0.114 | |
| type train | step 890 | loss 3.2926 | lr 1.0e-03 | norm 1.9721 | dt 0.113 | |
| type train | step 900 | loss 3.3159 | lr 1.0e-03 | norm 2.1230 | dt 0.113 | |
| type train | step 910 | loss 3.3261 | lr 1.0e-03 | norm 2.9547 | dt 0.114 | |
| type train | step 920 | loss 3.2953 | lr 1.0e-03 | norm 1.7801 | dt 0.114 | |
| type train | step 930 | loss 3.2854 | lr 1.0e-03 | norm 3.0788 | dt 0.113 | |
| type train | step 940 | loss 3.3010 | lr 1.0e-03 | norm 2.4363 | dt 0.113 | |
| type train | step 950 | loss 3.2744 | lr 1.0e-03 | norm 1.8727 | dt 0.114 | |
| type train | step 960 | loss 3.2844 | lr 1.0e-03 | norm 3.1843 | dt 0.113 | |
| type train | step 970 | loss 3.2700 | lr 1.0e-03 | norm 2.9397 | dt 0.113 | |
| type train | step 980 | loss 3.2513 | lr 1.0e-03 | norm 2.6933 | dt 0.113 | |
| type train | step 990 | loss 3.2500 | lr 1.0e-03 | norm 2.4030 | dt 0.114 | |
| type train | step 1000 | loss 3.2668 | lr 1.0e-03 | norm 2.4430 | dt 0.113 | |
| type train | step 1010 | loss 3.2617 | lr 1.0e-03 | norm 2.4673 | dt 0.112 | |
| type train | step 1020 | loss 3.2231 | lr 1.0e-03 | norm 2.5424 | dt 0.113 | |
| type train | step 1030 | loss 3.2535 | lr 1.0e-03 | norm 1.9049 | dt 0.113 | |
| type train | step 1040 | loss 3.2289 | lr 1.0e-03 | norm 2.8275 | dt 0.112 | |
| type train | step 1050 | loss 3.2416 | lr 1.0e-03 | norm 2.1650 | dt 0.113 | |
| type train | step 1060 | loss 3.2383 | lr 1.0e-03 | norm 2.3707 | dt 0.113 | |
| type train | step 1070 | loss 3.2356 | lr 1.0e-03 | norm 1.7848 | dt 0.119 | |
| type train | step 1080 | loss 3.2029 | lr 1.0e-03 | norm 2.6397 | dt 0.114 | |
| type train | step 1090 | loss 3.1909 | lr 1.0e-03 | norm 3.0345 | dt 0.115 | |
| type train | step 1100 | loss 3.1876 | lr 1.0e-03 | norm 3.1054 | dt 0.114 | |
| type train | step 1110 | loss 3.1949 | lr 1.0e-03 | norm 3.2054 | dt 0.113 | |
| type train | step 1120 | loss 3.1793 | lr 1.0e-03 | norm 3.1650 | dt 0.113 | |
| type train | step 1130 | loss 3.1449 | lr 1.0e-03 | norm 2.2814 | dt 0.113 | |
| type train | step 1140 | loss 3.1763 | lr 1.0e-03 | norm 2.7021 | dt 0.519 | |
| type train | step 1150 | loss 3.1516 | lr 1.0e-03 | norm 3.2957 | dt 0.113 | |
| type train | step 1160 | loss 3.1611 | lr 1.0e-03 | norm 2.5074 | dt 0.112 | |
| type train | step 1170 | loss 3.1548 | lr 1.0e-03 | norm 3.4140 | dt 0.112 | |
| type train | step 1180 | loss 3.1679 | lr 1.0e-03 | norm 2.9607 | dt 0.114 | |
| type train | step 1190 | loss 3.1651 | lr 1.0e-03 | norm 3.6046 | dt 0.114 | |
| type train | step 1200 | loss 3.1426 | lr 1.0e-03 | norm 2.5556 | dt 0.114 | |
| type train | step 1210 | loss 3.1257 | lr 1.0e-03 | norm 2.6682 | dt 0.112 | |
| type train | step 1220 | loss 3.1413 | lr 1.0e-03 | norm 2.7121 | dt 0.115 | |
| type train | step 1230 | loss 3.1098 | lr 1.0e-03 | norm 2.8892 | dt 0.113 | |
| type train | step 1240 | loss 3.1332 | lr 1.0e-03 | norm 3.3983 | dt 0.114 | |
| type train | step 1250 | loss 3.1184 | lr 1.0e-03 | norm 2.7903 | dt 0.113 | |
| type train | step 1260 | loss 3.1315 | lr 1.0e-03 | norm 2.8927 | dt 0.115 | |
| type train | step 1270 | loss 3.0778 | lr 1.0e-03 | norm 2.3560 | dt 0.115 | |
| type train | step 1280 | loss 3.1146 | lr 1.0e-03 | norm 3.5064 | dt 0.115 | |
| type train | step 1290 | loss 3.1136 | lr 1.0e-03 | norm 2.8561 | dt 0.113 | |
| type train | step 1300 | loss 3.0911 | lr 1.0e-03 | norm 2.2730 | dt 0.115 | |
| type train | step 1310 | loss 3.0748 | lr 1.0e-03 | norm 2.5766 | dt 0.114 | |
| type train | step 1320 | loss 3.1009 | lr 1.0e-03 | norm 3.9068 | dt 0.114 | |
| type train | step 1330 | loss 3.0803 | lr 1.0e-03 | norm 3.1166 | dt 0.113 | |
| type train | step 1340 | loss 3.0926 | lr 1.0e-03 | norm 2.7389 | dt 0.115 | |
| type train | step 1350 | loss 3.0744 | lr 1.0e-03 | norm 2.0786 | dt 0.114 | |
| type train | step 1360 | loss 3.0577 | lr 1.0e-03 | norm 2.5219 | dt 0.113 | |
| type train | step 1370 | loss 3.0669 | lr 1.0e-03 | norm 3.3838 | dt 0.112 | |
| type train | step 1380 | loss 3.0778 | lr 1.0e-03 | norm 2.2342 | dt 0.112 | |
| type train | step 1390 | loss 3.0739 | lr 1.0e-03 | norm 2.5469 | dt 0.112 | |
| type train | step 1400 | loss 3.0546 | lr 1.0e-03 | norm 4.2186 | dt 0.113 | |
| type train | step 1410 | loss 3.0820 | lr 1.0e-03 | norm 2.8416 | dt 0.113 | |
| type train | step 1420 | loss 3.0504 | lr 1.0e-03 | norm 2.4169 | dt 0.113 | |
| type train | step 1430 | loss 3.0668 | lr 1.0e-03 | norm 2.1773 | dt 0.112 | |
| type train | step 1440 | loss 3.0626 | lr 1.0e-03 | norm 1.9690 | dt 0.113 | |
| type train | step 1450 | loss 3.0819 | lr 1.0e-03 | norm 3.4426 | dt 0.112 | |
| type train | step 1460 | loss 3.0320 | lr 1.0e-03 | norm 2.5645 | dt 0.112 | |
| type train | step 1470 | loss 3.0254 | lr 1.0e-03 | norm 3.3960 | dt 0.112 | |
| type train | step 1480 | loss 3.0238 | lr 1.0e-03 | norm 2.7148 | dt 0.112 | |
| type train | step 1490 | loss 3.0299 | lr 1.0e-03 | norm 2.7401 | dt 0.111 | |
| type train | step 1500 | loss 3.0184 | lr 1.0e-03 | norm 2.9799 | dt 0.111 | |
| type train | step 1510 | loss 2.9895 | lr 1.0e-03 | norm 2.7538 | dt 0.112 | |
| type train | step 1520 | loss 3.0257 | lr 1.0e-03 | norm 2.9352 | dt 0.786 | |
| type train | step 1530 | loss 2.9938 | lr 1.0e-03 | norm 3.5523 | dt 0.119 | |
| type train | step 1540 | loss 3.0104 | lr 1.0e-03 | norm 2.2947 | dt 0.113 | |
| type train | step 1550 | loss 3.0030 | lr 1.0e-03 | norm 2.8988 | dt 0.113 | |
| type train | step 1560 | loss 3.0205 | lr 1.0e-03 | norm 2.8917 | dt 0.113 | |
| type train | step 1570 | loss 3.0170 | lr 1.0e-03 | norm 2.9276 | dt 0.112 | |
| type train | step 1580 | loss 3.0038 | lr 1.0e-03 | norm 3.3706 | dt 0.113 | |
| type train | step 1590 | loss 2.9851 | lr 1.0e-03 | norm 3.3408 | dt 0.113 | |
| type train | step 1600 | loss 3.0001 | lr 1.0e-03 | norm 3.3847 | dt 0.111 | |
| type train | step 1610 | loss 2.9764 | lr 1.0e-03 | norm 3.2374 | dt 0.112 | |
| type train | step 1620 | loss 2.9924 | lr 1.0e-03 | norm 2.8452 | dt 0.112 | |
| type train | step 1630 | loss 2.9834 | lr 1.0e-03 | norm 2.5053 | dt 0.111 | |
| type train | step 1640 | loss 3.0062 | lr 1.0e-03 | norm 2.9985 | dt 0.115 | |
| type train | step 1650 | loss 2.9527 | lr 1.0e-03 | norm 2.7376 | dt 0.112 | |
| type train | step 1660 | loss 2.9800 | lr 1.0e-03 | norm 2.2764 | dt 0.112 | |
| type train | step 1670 | loss 2.9875 | lr 1.0e-03 | norm 2.8126 | dt 0.112 | |
| type train | step 1680 | loss 2.9827 | lr 1.0e-03 | norm 4.0496 | dt 0.113 | |
| type train | step 1690 | loss 2.9564 | lr 1.0e-03 | norm 3.7922 | dt 0.113 | |
| type train | step 1700 | loss 2.9789 | lr 1.0e-03 | norm 3.6865 | dt 0.113 | |
| type train | step 1710 | loss 2.9598 | lr 1.0e-03 | norm 2.8699 | dt 0.116 | |
| type train | step 1720 | loss 2.9697 | lr 1.0e-03 | norm 2.7418 | dt 0.113 | |
| type train | step 1730 | loss 2.9698 | lr 1.0e-03 | norm 4.6787 | dt 0.114 | |
| type train | step 1740 | loss 2.9466 | lr 1.0e-03 | norm 2.6091 | dt 0.114 | |
| type train | step 1750 | loss 2.9490 | lr 1.0e-03 | norm 2.7180 | dt 0.113 | |
| type train | step 1760 | loss 2.9675 | lr 1.0e-03 | norm 3.4603 | dt 0.112 | |
| type train | step 1770 | loss 2.9694 | lr 1.0e-03 | norm 3.8823 | dt 0.112 | |
| type train | step 1780 | loss 2.9294 | lr 1.0e-03 | norm 2.1583 | dt 0.113 | |
| type train | step 1790 | loss 2.9758 | lr 1.0e-03 | norm 3.1254 | dt 0.114 | |
| type train | step 1800 | loss 2.9418 | lr 1.0e-03 | norm 2.3019 | dt 0.113 | |
| type train | step 1810 | loss 2.9595 | lr 1.0e-03 | norm 2.3203 | dt 0.114 | |
| type train | step 1820 | loss 2.9645 | lr 1.0e-03 | norm 2.7078 | dt 0.112 | |
| type train | step 1830 | loss 2.9791 | lr 1.0e-03 | norm 4.0801 | dt 0.112 | |
| type train | step 1840 | loss 2.9322 | lr 1.0e-03 | norm 2.5803 | dt 0.112 | |
| type train | step 1850 | loss 2.9238 | lr 1.0e-03 | norm 3.4429 | dt 0.113 | |
| type train | step 1860 | loss 2.9231 | lr 1.0e-03 | norm 2.9536 | dt 0.113 | |
| type train | step 1870 | loss 2.9300 | lr 1.0e-03 | norm 2.7063 | dt 0.112 | |
| type train | step 1880 | loss 2.9261 | lr 1.0e-03 | norm 3.2127 | dt 0.114 | |
| type train | step 1890 | loss 2.8983 | lr 1.0e-03 | norm 3.4601 | dt 0.113 | |
| type train | step 1900 | loss 2.9265 | lr 1.0e-03 | norm 2.7227 | dt 1.548 | |
| type train | step 1910 | loss 2.8953 | lr 1.0e-03 | norm 2.7074 | dt 0.112 | |
| type train | step 1920 | loss 2.9190 | lr 1.0e-03 | norm 2.8995 | dt 0.113 | |
| type train | step 1930 | loss 2.9167 | lr 1.0e-03 | norm 3.8481 | dt 0.113 | |
| type train | step 1940 | loss 2.9281 | lr 1.0e-03 | norm 2.8103 | dt 0.114 | |
| type train | step 1950 | loss 2.9268 | lr 1.0e-03 | norm 2.5927 | dt 0.114 | |
| type train | step 1960 | loss 2.9142 | lr 1.0e-03 | norm 3.0433 | dt 0.113 | |
| type train | step 1970 | loss 2.8983 | lr 1.0e-03 | norm 3.2740 | dt 0.112 | |
| type train | step 1980 | loss 2.9161 | lr 1.0e-03 | norm 3.9185 | dt 0.112 | |
| type train | step 1990 | loss 2.8895 | lr 1.0e-03 | norm 3.0155 | dt 0.112 | |
| type train | step 2000 | loss 2.9092 | lr 1.0e-03 | norm 3.4781 | dt 0.112 | |
| type train | step 2010 | loss 2.9025 | lr 1.0e-03 | norm 3.0084 | dt 0.112 | |
| type train | step 2020 | loss 2.9196 | lr 1.0e-03 | norm 3.2020 | dt 0.114 | |
| type train | step 2030 | loss 2.8721 | lr 1.0e-03 | norm 2.9734 | dt 0.113 | |
| type train | step 2040 | loss 2.9046 | lr 1.0e-03 | norm 3.6180 | dt 0.112 | |
| type train | step 2050 | loss 2.9086 | lr 1.0e-03 | norm 2.7358 | dt 0.112 | |
| type train | step 2060 | loss 2.8910 | lr 1.0e-03 | norm 4.0219 | dt 0.112 | |
| type train | step 2070 | loss 2.8788 | lr 1.0e-03 | norm 3.4502 | dt 0.112 | |
| type train | step 2080 | loss 2.8967 | lr 1.0e-03 | norm 3.7852 | dt 0.112 | |
| type train | step 2090 | loss 2.8824 | lr 1.0e-03 | norm 2.7029 | dt 0.112 | |
| type train | step 2100 | loss 2.8959 | lr 1.0e-03 | norm 2.6283 | dt 0.112 | |
| type train | step 2110 | loss 2.8875 | lr 1.0e-03 | norm 3.4177 | dt 0.112 | |
| type train | step 2120 | loss 2.8799 | lr 1.0e-03 | norm 3.5287 | dt 0.112 | |
| type train | step 2130 | loss 2.8750 | lr 1.0e-03 | norm 2.8981 | dt 0.113 | |
| type train | step 2140 | loss 2.8971 | lr 1.0e-03 | norm 4.3269 | dt 0.115 | |
| type train | step 2150 | loss 2.8941 | lr 1.0e-03 | norm 3.3644 | dt 0.112 | |
| type train | step 2160 | loss 2.8595 | lr 1.0e-03 | norm 3.3858 | dt 0.112 | |
| type train | step 2170 | loss 2.9046 | lr 1.0e-03 | norm 3.0400 | dt 0.113 | |
| type train | step 2180 | loss 2.8755 | lr 1.0e-03 | norm 2.7293 | dt 0.114 | |
| type train | step 2190 | loss 2.8909 | lr 1.0e-03 | norm 2.9052 | dt 0.113 | |
| type train | step 2200 | loss 2.8960 | lr 1.0e-03 | norm 3.4143 | dt 0.114 | |
| type train | step 2210 | loss 2.9062 | lr 1.0e-03 | norm 3.6502 | dt 0.113 | |
| type train | step 2220 | loss 2.8667 | lr 1.0e-03 | norm 4.0320 | dt 0.113 | |
| type train | step 2230 | loss 2.8529 | lr 1.0e-03 | norm 2.5836 | dt 0.114 | |
| type train | step 2240 | loss 2.8551 | lr 1.0e-03 | norm 2.9113 | dt 0.113 | |
| type train | step 2250 | loss 2.8683 | lr 1.0e-03 | norm 3.0616 | dt 0.114 | |
| type train | step 2260 | loss 2.8626 | lr 1.0e-03 | norm 3.6118 | dt 0.113 | |
| type train | step 2270 | loss 2.8310 | lr 1.0e-03 | norm 2.9135 | dt 0.113 | |
| type train | step 2280 | loss 2.8622 | lr 1.0e-03 | norm 2.8750 | dt 0.121 | |
| type train | step 2290 | loss 2.8356 | lr 1.0e-03 | norm 3.8494 | dt 0.113 | |
| type train | step 2300 | loss 2.8605 | lr 1.0e-03 | norm 3.9284 | dt 0.113 | |
| type train | step 2310 | loss 2.8499 | lr 1.0e-03 | norm 3.0800 | dt 0.112 | |
| type train | step 2320 | loss 2.8685 | lr 1.0e-03 | norm 3.8296 | dt 0.113 | |
| type train | step 2330 | loss 2.8656 | lr 1.0e-03 | norm 3.0852 | dt 0.113 | |
| type train | step 2340 | loss 2.8557 | lr 1.0e-03 | norm 3.1466 | dt 0.114 | |
| type train | step 2350 | loss 2.8373 | lr 1.0e-03 | norm 2.8554 | dt 0.112 | |
| type train | step 2360 | loss 2.8624 | lr 1.0e-03 | norm 4.2768 | dt 0.113 | |
| type train | step 2370 | loss 2.8275 | lr 1.0e-03 | norm 2.8592 | dt 0.112 | |
| type train | step 2380 | loss 2.8514 | lr 1.0e-03 | norm 3.2689 | dt 0.112 | |
| type train | step 2390 | loss 2.8451 | lr 1.0e-03 | norm 2.7075 | dt 0.112 | |
| type train | step 2400 | loss 2.8721 | lr 1.0e-03 | norm 4.6623 | dt 0.113 | |
| type train | step 2410 | loss 2.8171 | lr 1.0e-03 | norm 2.7415 | dt 0.114 | |
| type train | step 2420 | loss 2.8505 | lr 1.0e-03 | norm 3.7496 | dt 0.114 | |
| type train | step 2430 | loss 2.8556 | lr 1.0e-03 | norm 3.3305 | dt 0.113 | |
| type train | step 2440 | loss 2.8453 | lr 1.0e-03 | norm 3.7107 | dt 0.114 | |
| type train | step 2450 | loss 2.8160 | lr 1.0e-03 | norm 2.5289 | dt 0.113 | |
| type train | step 2460 | loss 2.8412 | lr 1.0e-03 | norm 3.0190 | dt 0.113 | |
| type train | step 2470 | loss 2.8300 | lr 1.0e-03 | norm 3.0272 | dt 0.114 | |
| type train | step 2480 | loss 2.8461 | lr 1.0e-03 | norm 4.2926 | dt 0.113 | |
| type train | step 2490 | loss 2.8300 | lr 1.0e-03 | norm 3.4930 | dt 0.113 | |
| type train | step 2500 | loss 2.8174 | lr 1.0e-03 | norm 3.0242 | dt 0.112 | |
| type train | step 2510 | loss 2.8407 | lr 1.0e-03 | norm 6.4129 | dt 0.112 | |
| type train | step 2520 | loss 2.8393 | lr 1.0e-03 | norm 3.7974 | dt 0.112 | |
| type train | step 2530 | loss 2.8411 | lr 1.0e-03 | norm 2.9628 | dt 0.113 | |
| type train | step 2540 | loss 2.8201 | lr 1.0e-03 | norm 3.6036 | dt 0.112 | |
| type train | step 2550 | loss 2.8549 | lr 1.0e-03 | norm 4.0269 | dt 0.113 | |
| type train | step 2560 | loss 2.8308 | lr 1.0e-03 | norm 3.6912 | dt 0.114 | |
| type train | step 2570 | loss 2.8413 | lr 1.0e-03 | norm 2.8608 | dt 0.114 | |
| type train | step 2580 | loss 2.8525 | lr 1.0e-03 | norm 3.1250 | dt 0.113 | |
| type train | step 2590 | loss 2.8555 | lr 1.0e-03 | norm 2.9232 | dt 0.112 | |
| type train | step 2600 | loss 2.8209 | lr 1.0e-03 | norm 3.4581 | dt 0.112 | |
| type train | step 2610 | loss 2.8085 | lr 1.0e-03 | norm 3.6581 | dt 0.113 | |
| type train | step 2620 | loss 2.8089 | lr 1.0e-03 | norm 3.5684 | dt 0.113 | |
| type train | step 2630 | loss 2.8268 | lr 1.0e-03 | norm 3.6094 | dt 0.113 | |
| type train | step 2640 | loss 2.8174 | lr 1.0e-03 | norm 3.3872 | dt 0.113 | |
| type train | step 2650 | loss 2.7831 | lr 1.0e-03 | norm 2.9816 | dt 0.113 | |
| type train | step 2660 | loss 2.8178 | lr 1.0e-03 | norm 2.8023 | dt 0.119 | |
| type train | step 2670 | loss 2.7875 | lr 1.0e-03 | norm 3.4217 | dt 0.115 | |
| type train | step 2680 | loss 2.8154 | lr 1.0e-03 | norm 3.6919 | dt 0.111 | |
| type train | step 2690 | loss 2.8023 | lr 1.0e-03 | norm 3.2433 | dt 0.114 | |
| type train | step 2700 | loss 2.8197 | lr 1.0e-03 | norm 2.8322 | dt 0.115 | |
| type train | step 2710 | loss 2.8159 | lr 1.0e-03 | norm 3.1906 | dt 0.113 | |
| type train | step 2720 | loss 2.8026 | lr 1.0e-03 | norm 2.5616 | dt 0.113 | |
| type train | step 2730 | loss 2.7919 | lr 1.0e-03 | norm 3.0122 | dt 0.114 | |
| type train | step 2740 | loss 2.8215 | lr 1.0e-03 | norm 4.5926 | dt 0.114 | |
| type train | step 2750 | loss 2.7926 | lr 1.0e-03 | norm 4.4416 | dt 0.113 | |
| type train | step 2760 | loss 2.8127 | lr 1.0e-03 | norm 4.3600 | dt 0.113 | |
| type train | step 2770 | loss 2.8033 | lr 1.0e-03 | norm 3.2910 | dt 0.113 | |
| type train | step 2780 | loss 2.8248 | lr 1.0e-03 | norm 4.1132 | dt 0.113 | |
| type train | step 2790 | loss 2.7778 | lr 1.0e-03 | norm 3.6617 | dt 0.113 | |
| type train | step 2800 | loss 2.8080 | lr 1.0e-03 | norm 3.5671 | dt 0.113 | |
| type train | step 2810 | loss 2.8120 | lr 1.0e-03 | norm 2.8360 | dt 0.112 | |
| type train | step 2820 | loss 2.7959 | lr 1.0e-03 | norm 3.4689 | dt 0.113 | |
| type train | step 2830 | loss 2.7751 | lr 1.0e-03 | norm 3.1263 | dt 0.113 | |
| type train | step 2840 | loss 2.8040 | lr 1.0e-03 | norm 4.7723 | dt 0.113 | |
| type train | step 2850 | loss 2.7878 | lr 1.0e-03 | norm 2.8504 | dt 0.113 | |
| type train | step 2860 | loss 2.8069 | lr 1.0e-03 | norm 3.9886 | dt 0.112 | |
| type train | step 2870 | loss 2.7915 | lr 1.0e-03 | norm 3.3547 | dt 0.113 | |
| type train | step 2880 | loss 2.7795 | lr 1.0e-03 | norm 3.8351 | dt 0.112 | |
| type train | step 2890 | loss 2.7911 | lr 1.0e-03 | norm 3.7898 | dt 0.111 | |
| type train | step 2900 | loss 2.7968 | lr 1.0e-03 | norm 3.3697 | dt 0.112 | |
| type train | step 2910 | loss 2.7973 | lr 1.0e-03 | norm 2.7783 | dt 0.113 | |
| type train | step 2920 | loss 2.7721 | lr 1.0e-03 | norm 4.2394 | dt 0.112 | |
| type train | step 2930 | loss 2.8144 | lr 1.0e-03 | norm 3.7734 | dt 0.114 | |
| type train | step 2940 | loss 2.7893 | lr 1.0e-03 | norm 3.3477 | dt 0.113 | |
| type train | step 2950 | loss 2.8055 | lr 1.0e-03 | norm 4.1091 | dt 0.113 | |
| type train | step 2960 | loss 2.8119 | lr 1.0e-03 | norm 3.3889 | dt 0.114 | |
| type train | step 2970 | loss 2.8116 | lr 1.0e-03 | norm 2.3539 | dt 0.112 | |
| type train | step 2980 | loss 2.7820 | lr 1.0e-03 | norm 3.1976 | dt 0.114 | |
| type train | step 2990 | loss 2.7685 | lr 1.0e-03 | norm 3.1358 | dt 0.112 | |
| type train | step 3000 | loss 2.7698 | lr 1.0e-03 | norm 3.3466 | dt 0.114 | |
| type train | step 3010 | loss 2.7901 | lr 1.0e-03 | norm 4.0134 | dt 0.112 | |
| type train | step 3020 | loss 2.7774 | lr 1.0e-03 | norm 3.3120 | dt 0.112 | |
| type train | step 3030 | loss 2.7468 | lr 1.0e-03 | norm 3.5089 | dt 0.112 | |
| type train | step 3040 | loss 2.7845 | lr 1.0e-03 | norm 3.5996 | dt 0.683 | |
| type train | step 3050 | loss 2.7484 | lr 1.0e-03 | norm 3.3160 | dt 0.113 | |
| type train | step 3060 | loss 2.7802 | lr 1.0e-03 | norm 3.5098 | dt 0.112 | |
| type train | step 3070 | loss 2.7648 | lr 1.0e-03 | norm 2.9626 | dt 0.113 | |
| type train | step 3080 | loss 2.7928 | lr 1.0e-03 | norm 4.9214 | dt 0.112 | |
| type train | step 3090 | loss 2.7792 | lr 1.0e-03 | norm 3.0415 | dt 0.114 | |
| type train | step 3100 | loss 2.7703 | lr 1.0e-03 | norm 3.8194 | dt 0.112 | |
| type train | step 3110 | loss 2.7536 | lr 1.0e-03 | norm 2.6179 | dt 0.113 | |
| type train | step 3120 | loss 2.7777 | lr 1.0e-03 | norm 3.8597 | dt 0.111 | |
| type train | step 3130 | loss 2.7586 | lr 1.0e-03 | norm 3.5598 | dt 0.113 | |
| type train | step 3140 | loss 2.7790 | lr 1.0e-03 | norm 3.6057 | dt 0.112 | |
| type train | step 3150 | loss 2.7643 | lr 1.0e-03 | norm 2.9176 | dt 0.113 | |
| type train | step 3160 | loss 2.7912 | lr 1.0e-03 | norm 3.6541 | dt 0.113 | |
| type train | step 3170 | loss 2.7408 | lr 1.0e-03 | norm 3.4311 | dt 0.113 | |
| type train | step 3180 | loss 2.7720 | lr 1.0e-03 | norm 3.8333 | dt 0.112 | |
| type train | step 3190 | loss 2.7741 | lr 1.0e-03 | norm 2.7587 | dt 0.113 | |
| type train | step 3200 | loss 2.7577 | lr 1.0e-03 | norm 3.2884 | dt 0.112 | |
| type train | step 3210 | loss 2.7489 | lr 1.0e-03 | norm 4.7644 | dt 0.113 | |
| type train | step 3220 | loss 2.7650 | lr 1.0e-03 | norm 4.1145 | dt 0.111 | |
| type train | step 3230 | loss 2.7511 | lr 1.0e-03 | norm 3.1436 | dt 0.112 | |
| type train | step 3240 | loss 2.7716 | lr 1.0e-03 | norm 4.0496 | dt 0.112 | |
| type train | step 3250 | loss 2.7592 | lr 1.0e-03 | norm 3.4712 | dt 0.112 | |
| type train | step 3260 | loss 2.7435 | lr 1.0e-03 | norm 3.2489 | dt 0.112 | |
| type train | step 3270 | loss 2.7635 | lr 1.0e-03 | norm 5.3592 | dt 0.111 | |
| type train | step 3280 | loss 2.7653 | lr 1.0e-03 | norm 3.9390 | dt 0.115 | |
| type train | step 3290 | loss 2.7688 | lr 1.0e-03 | norm 3.1874 | dt 0.113 | |
| type train | step 3300 | loss 2.7344 | lr 1.0e-03 | norm 2.8381 | dt 0.114 | |
| type train | step 3310 | loss 2.7841 | lr 1.0e-03 | norm 4.4371 | dt 0.113 | |
| type train | step 3320 | loss 2.7544 | lr 1.0e-03 | norm 3.4362 | dt 0.113 | |
| type train | step 3330 | loss 2.7671 | lr 1.0e-03 | norm 2.6081 | dt 0.114 | |
| type train | step 3340 | loss 2.7799 | lr 1.0e-03 | norm 2.9720 | dt 0.113 | |
| type train | step 3350 | loss 2.7861 | lr 1.0e-03 | norm 4.1959 | dt 0.112 | |
| type train | step 3360 | loss 2.7476 | lr 1.0e-03 | norm 3.6431 | dt 0.112 | |
| type train | step 3370 | loss 2.7421 | lr 1.0e-03 | norm 3.7062 | dt 0.113 | |
| type train | step 3380 | loss 2.7359 | lr 1.0e-03 | norm 3.2727 | dt 0.112 | |
| type train | step 3390 | loss 2.7536 | lr 1.0e-03 | norm 4.0304 | dt 0.112 | |
| type train | step 3400 | loss 2.7492 | lr 1.0e-03 | norm 4.5609 | dt 0.112 | |
| type train | step 3410 | loss 2.7132 | lr 1.0e-03 | norm 2.9332 | dt 0.111 | |
| type train | step 3420 | loss 2.7632 | lr 1.0e-03 | norm 3.8717 | dt 0.453 | |
| type train | step 3430 | loss 2.7204 | lr 1.0e-03 | norm 3.7906 | dt 0.114 | |
| type train | step 3440 | loss 2.7436 | lr 1.0e-03 | norm 3.1095 | dt 0.112 | |
| type train | step 3450 | loss 2.7374 | lr 1.0e-03 | norm 3.4169 | dt 0.112 | |
| type train | step 3460 | loss 2.7563 | lr 1.0e-03 | norm 3.8623 | dt 0.113 | |
| type train | step 3470 | loss 2.7534 | lr 1.0e-03 | norm 4.0683 | dt 0.112 | |
| type train | step 3480 | loss 2.7455 | lr 1.0e-03 | norm 4.5492 | dt 0.112 | |
| type train | step 3490 | loss 2.7274 | lr 1.0e-03 | norm 3.2147 | dt 0.112 | |
| type train | step 3500 | loss 2.7516 | lr 1.0e-03 | norm 4.0523 | dt 0.113 | |
| type train | step 3510 | loss 2.7194 | lr 1.0e-03 | norm 3.0837 | dt 0.112 | |
| type train | step 3520 | loss 2.7470 | lr 1.0e-03 | norm 4.2621 | dt 0.113 | |
| type train | step 3530 | loss 2.7400 | lr 1.0e-03 | norm 3.6261 | dt 0.112 | |
| type train | step 3540 | loss 2.7652 | lr 1.0e-03 | norm 4.2756 | dt 0.112 | |
| type train | step 3550 | loss 2.7168 | lr 1.0e-03 | norm 4.0585 | dt 0.112 | |
| type train | step 3560 | loss 2.7429 | lr 1.0e-03 | norm 3.2045 | dt 0.112 | |
| type train | step 3570 | loss 2.7464 | lr 1.0e-03 | norm 2.7687 | dt 0.113 | |
| type train | step 3580 | loss 2.7373 | lr 1.0e-03 | norm 5.4374 | dt 0.115 | |
| type train | step 3590 | loss 2.7237 | lr 1.0e-03 | norm 4.9567 | dt 0.112 | |
| type train | step 3600 | loss 2.7365 | lr 1.0e-03 | norm 3.8587 | dt 0.113 | |
| type train | step 3610 | loss 2.7276 | lr 1.0e-03 | norm 3.8897 | dt 0.112 | |
| type train | step 3620 | loss 2.7446 | lr 1.0e-03 | norm 3.1910 | dt 0.113 | |
| type train | step 3630 | loss 2.7278 | lr 1.0e-03 | norm 2.7554 | dt 0.112 | |
| type train | step 3640 | loss 2.7135 | lr 1.0e-03 | norm 2.9128 | dt 0.112 | |
| type train | step 3650 | loss 2.7271 | lr 1.0e-03 | norm 3.9133 | dt 0.111 | |
| type train | step 3660 | loss 2.7397 | lr 1.0e-03 | norm 4.2219 | dt 0.117 | |
| type train | step 3670 | loss 2.7378 | lr 1.0e-03 | norm 2.5150 | dt 0.112 | |
| type train | step 3680 | loss 2.7068 | lr 1.0e-03 | norm 2.8761 | dt 0.112 | |
| type train | step 3690 | loss 2.7525 | lr 1.0e-03 | norm 3.2987 | dt 0.112 | |
| type train | step 3700 | loss 2.7258 | lr 1.0e-03 | norm 3.2104 | dt 0.112 | |
| type train | step 3710 | loss 2.7404 | lr 1.0e-03 | norm 3.2426 | dt 0.114 | |
| type train | step 3720 | loss 2.7449 | lr 1.0e-03 | norm 2.9995 | dt 0.113 | |
| type train | step 3730 | loss 2.7604 | lr 1.0e-03 | norm 4.3234 | dt 0.112 | |
| type train | step 3740 | loss 2.7211 | lr 1.0e-03 | norm 2.9728 | dt 0.112 | |
| type train | step 3750 | loss 2.7178 | lr 1.0e-03 | norm 4.3165 | dt 0.113 | |
| type train | step 3760 | loss 2.7066 | lr 1.0e-03 | norm 2.5637 | dt 0.114 | |
| type train | step 3770 | loss 2.7282 | lr 1.0e-03 | norm 3.4858 | dt 0.114 | |
| type train | step 3780 | loss 2.7243 | lr 1.0e-03 | norm 4.1968 | dt 0.112 | |
| type train | step 3790 | loss 2.6911 | lr 1.0e-03 | norm 3.7193 | dt 0.114 | |
| type train | step 3800 | loss 2.7315 | lr 1.0e-03 | norm 3.7593 | dt 0.274 | |
| type train | step 3810 | loss 2.6951 | lr 1.0e-03 | norm 3.5749 | dt 0.113 | |
| type train | step 3820 | loss 2.7177 | lr 1.0e-03 | norm 2.8342 | dt 0.112 | |
| type train | step 3830 | loss 2.7106 | lr 1.0e-03 | norm 3.6803 | dt 0.113 | |
| type train | step 3840 | loss 2.7366 | lr 1.0e-03 | norm 3.8993 | dt 0.112 | |
| type train | step 3850 | loss 2.7305 | lr 1.0e-03 | norm 3.5593 | dt 0.114 | |
| type train | step 3860 | loss 2.7142 | lr 1.0e-03 | norm 3.3496 | dt 0.112 | |
| type train | step 3870 | loss 2.7017 | lr 1.0e-03 | norm 3.7996 | dt 0.113 | |
| type train | step 3880 | loss 2.7246 | lr 1.0e-03 | norm 2.9687 | dt 0.115 | |
| type train | step 3890 | loss 2.6996 | lr 1.0e-03 | norm 4.3324 | dt 0.112 | |
| type train | step 3900 | loss 2.7299 | lr 1.0e-03 | norm 4.7540 | dt 0.112 | |
| type train | step 3910 | loss 2.7175 | lr 1.0e-03 | norm 4.5622 | dt 0.112 | |
| type train | step 3920 | loss 2.7445 | lr 1.0e-03 | norm 4.5210 | dt 0.112 | |
| type train | step 3930 | loss 2.6917 | lr 1.0e-03 | norm 4.2401 | dt 0.113 | |
| type train | step 3940 | loss 2.7196 | lr 1.0e-03 | norm 3.9838 | dt 0.111 | |
| type train | step 3950 | loss 2.7259 | lr 1.0e-03 | norm 3.5923 | dt 0.112 | |
| type train | step 3960 | loss 2.7024 | lr 1.0e-03 | norm 2.9689 | dt 0.112 | |
| type train | step 3970 | loss 2.6920 | lr 1.0e-03 | norm 5.0040 | dt 0.112 | |
| type train | step 3980 | loss 2.7100 | lr 1.0e-03 | norm 3.7985 | dt 0.113 | |
| type train | step 3990 | loss 2.7032 | lr 1.0e-03 | norm 3.4739 | dt 0.113 | |
| type train | step 4000 | loss 2.7203 | lr 1.0e-03 | norm 3.7162 | dt 0.112 | |
| type train | step 4010 | loss 2.7059 | lr 1.0e-03 | norm 3.3635 | dt 0.112 | |
| type train | step 4020 | loss 2.6927 | lr 1.0e-03 | norm 3.5211 | dt 0.112 | |
| type train | step 4030 | loss 2.6982 | lr 1.0e-03 | norm 3.4169 | dt 0.114 | |
| type train | step 4040 | loss 2.7185 | lr 1.0e-03 | norm 3.5633 | dt 0.112 | |
| type train | step 4050 | loss 2.7151 | lr 1.0e-03 | norm 3.2189 | dt 0.112 | |
| type train | step 4060 | loss 2.6830 | lr 1.0e-03 | norm 3.1005 | dt 0.113 | |
| type train | step 4070 | loss 2.7358 | lr 1.0e-03 | norm 4.2720 | dt 0.112 | |
| type train | step 4080 | loss 2.7034 | lr 1.0e-03 | norm 2.6812 | dt 0.112 | |
| type train | step 4090 | loss 2.7200 | lr 1.0e-03 | norm 3.8335 | dt 0.112 | |
| type train | step 4100 | loss 2.7263 | lr 1.0e-03 | norm 3.0014 | dt 0.113 | |
| type train | step 4110 | loss 2.7304 | lr 1.0e-03 | norm 3.1252 | dt 0.112 | |
| type train | step 4120 | loss 2.6995 | lr 1.0e-03 | norm 3.2771 | dt 0.112 | |
| type train | step 4130 | loss 2.6983 | lr 1.0e-03 | norm 4.1201 | dt 0.112 | |
| type train | step 4140 | loss 2.6936 | lr 1.0e-03 | norm 3.9117 | dt 0.113 | |
| type train | step 4150 | loss 2.7068 | lr 1.0e-03 | norm 3.6974 | dt 0.113 | |
| type train | step 4160 | loss 2.6984 | lr 1.0e-03 | norm 3.1602 | dt 0.113 | |
| type train | step 4170 | loss 2.6660 | lr 1.0e-03 | norm 3.7614 | dt 0.114 | |
| type train | step 4180 | loss 2.7068 | lr 1.0e-03 | norm 3.5035 | dt 0.122 | |
| type train | step 4190 | loss 2.6706 | lr 1.0e-03 | norm 3.4399 | dt 0.113 | |
| type train | step 4200 | loss 2.7023 | lr 1.0e-03 | norm 3.7420 | dt 0.112 | |
| type train | step 4210 | loss 2.6893 | lr 1.0e-03 | norm 3.5744 | dt 0.113 | |
| type train | step 4220 | loss 2.7063 | lr 1.0e-03 | norm 2.9496 | dt 0.114 | |
| type train | step 4230 | loss 2.7079 | lr 1.0e-03 | norm 4.1317 | dt 0.112 | |
| type train | step 4240 | loss 2.7058 | lr 1.0e-03 | norm 5.3741 | dt 0.113 | |
| type train | step 4250 | loss 2.6881 | lr 1.0e-03 | norm 3.9840 | dt 0.112 | |
| type train | step 4260 | loss 2.7151 | lr 1.0e-03 | norm 5.0553 | dt 0.111 | |
| type train | step 4270 | loss 2.6837 | lr 1.0e-03 | norm 5.0900 | dt 0.112 | |
| type train | step 4280 | loss 2.7110 | lr 1.0e-03 | norm 3.9984 | dt 0.112 | |
| type train | step 4290 | loss 2.6954 | lr 1.0e-03 | norm 3.2105 | dt 0.112 | |
| type train | step 4300 | loss 2.7243 | lr 1.0e-03 | norm 4.0479 | dt 0.112 | |
| type train | step 4310 | loss 2.6692 | lr 1.0e-03 | norm 3.1193 | dt 0.112 | |
| type train | step 4320 | loss 2.7000 | lr 1.0e-03 | norm 4.1347 | dt 0.112 | |
| type train | step 4330 | loss 2.7062 | lr 1.0e-03 | norm 3.1431 | dt 0.112 | |
| type train | step 4340 | loss 2.6915 | lr 1.0e-03 | norm 3.9019 | dt 0.113 | |
| type train | step 4350 | loss 2.6757 | lr 1.0e-03 | norm 3.2795 | dt 0.113 | |
| type train | step 4360 | loss 2.6951 | lr 1.0e-03 | norm 3.9674 | dt 0.113 | |
| type train | step 4370 | loss 2.6855 | lr 1.0e-03 | norm 4.0404 | dt 0.112 | |
| type train | step 4380 | loss 2.7024 | lr 1.0e-03 | norm 4.5539 | dt 0.113 | |
| type train | step 4390 | loss 2.6917 | lr 1.0e-03 | norm 4.2215 | dt 0.113 | |
| type train | step 4400 | loss 2.6675 | lr 1.0e-03 | norm 2.7754 | dt 0.112 | |
| type train | step 4410 | loss 2.6809 | lr 1.0e-03 | norm 3.2600 | dt 0.112 | |
| type train | step 4420 | loss 2.6956 | lr 1.0e-03 | norm 3.2976 | dt 0.112 | |
| type train | step 4430 | loss 2.6984 | lr 1.0e-03 | norm 3.1306 | dt 0.113 | |
| type train | step 4440 | loss 2.6770 | lr 1.0e-03 | norm 4.3158 | dt 0.113 | |
| type train | step 4450 | loss 2.7173 | lr 1.0e-03 | norm 3.8809 | dt 0.113 | |
| type train | step 4460 | loss 2.6844 | lr 1.0e-03 | norm 3.6960 | dt 0.112 | |
| type train | step 4470 | loss 2.6986 | lr 1.0e-03 | norm 3.2030 | dt 0.113 | |
| type train | step 4480 | loss 2.7112 | lr 1.0e-03 | norm 3.9093 | dt 0.113 | |
| type train | step 4490 | loss 2.7174 | lr 1.0e-03 | norm 4.0407 | dt 0.113 | |
| type train | step 4500 | loss 2.6825 | lr 1.0e-03 | norm 4.1231 | dt 0.112 | |
| type train | step 4510 | loss 2.6809 | lr 1.0e-03 | norm 4.3415 | dt 0.112 | |
| type train | step 4520 | loss 2.6655 | lr 1.0e-03 | norm 2.8773 | dt 0.115 | |
| type train | step 4530 | loss 2.6912 | lr 1.0e-03 | norm 4.1305 | dt 0.115 | |
| type train | step 4540 | loss 2.6796 | lr 1.0e-03 | norm 3.3092 | dt 0.114 | |
| type train | step 4550 | loss 2.6493 | lr 1.0e-03 | norm 3.4483 | dt 0.113 | |
| type train | step 4560 | loss 2.6926 | lr 1.0e-03 | norm 3.4501 | dt 0.294 | |
| type train | step 4570 | loss 2.6629 | lr 1.0e-03 | norm 4.3711 | dt 0.112 | |
| type train | step 4580 | loss 2.6822 | lr 1.0e-03 | norm 4.2044 | dt 0.112 | |
| type train | step 4590 | loss 2.6727 | lr 1.0e-03 | norm 3.6775 | dt 0.113 | |
| type train | step 4600 | loss 2.6843 | lr 1.0e-03 | norm 2.6381 | dt 0.113 | |
| type train | step 4610 | loss 2.6898 | lr 1.0e-03 | norm 4.4385 | dt 0.113 | |
| type train | step 4620 | loss 2.6811 | lr 1.0e-03 | norm 4.5569 | dt 0.113 | |
| type train | step 4630 | loss 2.6692 | lr 1.0e-03 | norm 4.3784 | dt 0.114 | |
| type train | step 4640 | loss 2.6894 | lr 1.0e-03 | norm 4.3346 | dt 0.112 | |
| type train | step 4650 | loss 2.6616 | lr 1.0e-03 | norm 4.1270 | dt 0.114 | |
| type train | step 4660 | loss 2.6857 | lr 1.0e-03 | norm 3.7124 | dt 0.114 | |
| type train | step 4670 | loss 2.6729 | lr 1.0e-03 | norm 2.9445 | dt 0.114 | |
| type train | step 4680 | loss 2.7061 | lr 1.0e-03 | norm 5.1870 | dt 0.113 | |
| type train | step 4690 | loss 2.6547 | lr 1.0e-03 | norm 4.0145 | dt 0.112 | |
| type train | step 4700 | loss 2.6832 | lr 1.0e-03 | norm 3.6089 | dt 0.113 | |
| type train | step 4710 | loss 2.6875 | lr 1.0e-03 | norm 3.4779 | dt 0.113 | |
| type train | step 4720 | loss 2.6775 | lr 1.0e-03 | norm 4.6578 | dt 0.113 | |
| type train | step 4730 | loss 2.6581 | lr 1.0e-03 | norm 3.5623 | dt 0.114 | |
| type train | step 4740 | loss 2.6719 | lr 1.0e-03 | norm 3.3082 | dt 0.113 | |
| type train | step 4750 | loss 2.6682 | lr 1.0e-03 | norm 3.8716 | dt 0.113 | |
| type train | step 4760 | loss 2.6786 | lr 1.0e-03 | norm 3.2089 | dt 0.114 | |
| type train | step 4770 | loss 2.6733 | lr 1.0e-03 | norm 3.6031 | dt 0.114 | |
| type train | step 4780 | loss 2.6551 | lr 1.0e-03 | norm 3.8391 | dt 0.114 | |
| type train | step 4790 | loss 2.6726 | lr 1.0e-03 | norm 5.0322 | dt 0.112 | |
| type train | step 4800 | loss 2.6790 | lr 1.0e-03 | norm 3.8612 | dt 0.113 | |
| type train | step 4810 | loss 2.6857 | lr 1.0e-03 | norm 3.5971 | dt 0.113 | |
| type train | step 4820 | loss 2.6557 | lr 1.0e-03 | norm 3.6249 | dt 0.113 | |
| type train | step 4830 | loss 2.6996 | lr 1.0e-03 | norm 4.0842 | dt 0.112 | |
| type train | step 4840 | loss 2.6734 | lr 1.0e-03 | norm 3.9741 | dt 0.112 | |
| type train | step 4850 | loss 2.6854 | lr 1.0e-03 | norm 3.1812 | dt 0.112 | |
| type train | step 4860 | loss 2.6908 | lr 1.0e-03 | norm 3.2802 | dt 0.112 | |
| type train | step 4870 | loss 2.6986 | lr 1.0e-03 | norm 4.2077 | dt 0.112 | |
| type train | step 4880 | loss 2.6561 | lr 1.0e-03 | norm 2.2364 | dt 0.112 | |
| type train | step 4890 | loss 2.6601 | lr 1.0e-03 | norm 4.0356 | dt 0.112 | |
| type train | step 4900 | loss 2.6486 | lr 1.0e-03 | norm 3.2067 | dt 0.112 | |
| type train | step 4910 | loss 2.6725 | lr 1.0e-03 | norm 3.2824 | dt 0.113 | |
| type train | step 4920 | loss 2.6629 | lr 1.0e-03 | norm 3.1649 | dt 0.112 | |
| type train | step 4930 | loss 2.6344 | lr 1.0e-03 | norm 3.8950 | dt 0.112 | |
| type train | step 4940 | loss 2.6770 | lr 1.0e-03 | norm 3.6144 | dt 0.121 | |
| type train | step 4950 | loss 2.6364 | lr 1.0e-03 | norm 3.4820 | dt 0.113 | |
| type train | step 4960 | loss 2.6633 | lr 1.0e-03 | norm 2.8684 | dt 0.112 | |
| type train | step 4970 | loss 2.6570 | lr 1.0e-03 | norm 3.7896 | dt 0.113 | |
| type train | step 4980 | loss 2.6794 | lr 1.0e-03 | norm 5.2916 | dt 0.111 | |
| type train | step 4990 | loss 2.6667 | lr 1.0e-03 | norm 3.1418 | dt 0.112 | |
| type train | step 5000 | loss 2.6611 | lr 1.0e-03 | norm 3.3038 | dt 0.112 | |