diff --git "a/train.log" "b/train.log" new file mode 100644--- /dev/null +++ "b/train.log" @@ -0,0 +1,2000 @@ +type train | step 10 | loss 4.8309 | lr 1.0e-05 | norm 4.0009 | dt 0.006 +type train | step 20 | loss 4.7680 | lr 2.0e-05 | norm 4.0526 | dt 0.006 +type train | step 30 | loss 4.6570 | lr 3.0e-05 | norm 2.9620 | dt 0.006 +type train | step 40 | loss 4.5865 | lr 4.0e-05 | norm 1.7101 | dt 0.006 +type train | step 50 | loss 4.5008 | lr 5.0e-05 | norm 1.6243 | dt 0.006 +type train | step 60 | loss 4.4437 | lr 6.0e-05 | norm 1.6355 | dt 0.006 +type train | step 70 | loss 4.3919 | lr 7.0e-05 | norm 1.6051 | dt 0.006 +type train | step 80 | loss 4.3534 | lr 8.0e-05 | norm 1.4969 | dt 0.006 +type train | step 90 | loss 4.2577 | lr 9.0e-05 | norm 1.5033 | dt 0.006 +type train | step 100 | loss 4.1658 | lr 1.0e-04 | norm 1.5262 | dt 0.006 +type train | step 110 | loss 4.0947 | lr 1.1e-04 | norm 1.4606 | dt 0.006 +type train | step 120 | loss 4.0375 | lr 1.2e-04 | norm 1.3758 | dt 0.005 +type train | step 130 | loss 3.9384 | lr 1.3e-04 | norm 1.3661 | dt 0.006 +type train | step 140 | loss 3.9065 | lr 1.4e-04 | norm 1.3231 | dt 0.006 +type train | step 150 | loss 3.7817 | lr 1.5e-04 | norm 1.3106 | dt 0.006 +type train | step 160 | loss 3.6835 | lr 1.6e-04 | norm 1.2825 | dt 0.005 +type train | step 170 | loss 3.5912 | lr 1.7e-04 | norm 1.2545 | dt 0.005 +type train | step 180 | loss 3.5723 | lr 1.8e-04 | norm 1.1794 | dt 0.005 +type train | step 190 | loss 3.4632 | lr 1.9e-04 | norm 1.3593 | dt 0.005 +type train | step 200 | loss 3.4484 | lr 2.0e-04 | norm 1.0811 | dt 0.006 +type train | step 210 | loss 3.3530 | lr 2.1e-04 | norm 1.0967 | dt 0.006 +type train | step 220 | loss 3.2461 | lr 2.2e-04 | norm 1.0648 | dt 0.006 +type train | step 230 | loss 3.1835 | lr 2.3e-04 | norm 0.8570 | dt 0.006 +type train | step 240 | loss 3.1844 | lr 2.4e-04 | norm 0.8710 | dt 0.006 +type train | step 250 | loss 3.0983 | lr 2.5e-04 | norm 0.9418 | dt 0.006 +type train | step 260 | loss 3.0448 | lr 2.6e-04 | norm 1.0380 | dt 0.005 +type train | step 270 | loss 3.0137 | lr 2.7e-04 | norm 0.7408 | dt 0.005 +type train | step 280 | loss 2.9744 | lr 2.8e-04 | norm 1.1018 | dt 0.006 +type train | step 290 | loss 2.9073 | lr 2.9e-04 | norm 0.7634 | dt 0.005 +type train | step 300 | loss 2.9146 | lr 3.0e-04 | norm 0.9888 | dt 0.006 +type train | step 310 | loss 2.8528 | lr 3.1e-04 | norm 0.7189 | dt 0.006 +type train | step 320 | loss 2.8050 | lr 3.2e-04 | norm 0.9847 | dt 0.006 +type train | step 330 | loss 2.7807 | lr 3.3e-04 | norm 0.9837 | dt 0.007 +type train | step 340 | loss 2.7291 | lr 3.4e-04 | norm 0.6222 | dt 0.006 +type train | step 350 | loss 2.7345 | lr 3.5e-04 | norm 1.1165 | dt 0.006 +type train | step 360 | loss 2.6843 | lr 3.6e-04 | norm 0.7136 | dt 0.006 +type train | step 370 | loss 2.6563 | lr 3.7e-04 | norm 0.7871 | dt 0.006 +type train | step 380 | loss 2.6742 | lr 3.8e-04 | norm 1.0987 | dt 0.006 +type train | step 390 | loss 2.6187 | lr 3.9e-04 | norm 1.0026 | dt 0.006 +type train | step 400 | loss 2.6024 | lr 4.0e-04 | norm 0.8218 | dt 0.006 +type train | step 410 | loss 2.6043 | lr 4.1e-04 | norm 1.1103 | dt 0.006 +type train | step 420 | loss 2.5862 | lr 4.2e-04 | norm 1.0664 | dt 0.006 +type train | step 430 | loss 2.5743 | lr 4.3e-04 | norm 1.0234 | dt 0.006 +type train | step 440 | loss 2.5464 | lr 4.4e-04 | norm 1.5495 | dt 0.006 +type train | step 450 | loss 2.5382 | lr 4.5e-04 | norm 1.1610 | dt 0.006 +type train | step 460 | loss 2.5061 | lr 4.6e-04 | norm 1.8627 | dt 0.006 +type train | step 470 | loss 2.5257 | lr 4.7e-04 | norm 2.3599 | dt 0.006 +type train | step 480 | loss 2.5216 | lr 4.8e-04 | norm 1.5766 | dt 0.006 +type train | step 490 | loss 2.5057 | lr 4.9e-04 | norm 1.5181 | dt 0.006 +type train | step 500 | loss 2.4930 | lr 5.0e-04 | norm 1.0661 | dt 0.006 +type train | step 510 | loss 2.4979 | lr 5.1e-04 | norm 1.0381 | dt 0.005 +type train | step 520 | loss 2.5275 | lr 5.2e-04 | norm 1.5448 | dt 0.006 +type train | step 530 | loss 2.4824 | lr 5.3e-04 | norm 1.1008 | dt 0.006 +type train | step 540 | loss 2.4692 | lr 5.4e-04 | norm 1.5237 | dt 0.006 +type train | step 550 | loss 2.4709 | lr 5.5e-04 | norm 1.0866 | dt 0.006 +type train | step 560 | loss 2.4789 | lr 5.6e-04 | norm 2.2736 | dt 0.006 +type train | step 570 | loss 2.4728 | lr 5.7e-04 | norm 1.0876 | dt 0.006 +type train | step 580 | loss 2.4605 | lr 5.8e-04 | norm 1.2144 | dt 0.006 +type train | step 590 | loss 2.4287 | lr 5.9e-04 | norm 1.2082 | dt 0.006 +type train | step 600 | loss 2.4317 | lr 6.0e-04 | norm 0.9409 | dt 0.006 +type train | step 610 | loss 2.4500 | lr 6.1e-04 | norm 1.5193 | dt 0.007 +type train | step 620 | loss 2.4432 | lr 6.2e-04 | norm 1.5774 | dt 0.006 +type train | step 630 | loss 2.4417 | lr 6.3e-04 | norm 1.1207 | dt 0.006 +type train | step 640 | loss 2.4098 | lr 6.4e-04 | norm 1.2874 | dt 0.006 +type train | step 650 | loss 2.4189 | lr 6.5e-04 | norm 1.5379 | dt 0.006 +type train | step 660 | loss 2.4032 | lr 6.6e-04 | norm 0.9878 | dt 0.006 +type train | step 670 | loss 2.4409 | lr 6.7e-04 | norm 1.6552 | dt 0.006 +type train | step 680 | loss 2.3919 | lr 6.8e-04 | norm 1.1341 | dt 0.006 +type train | step 690 | loss 2.4541 | lr 6.9e-04 | norm 1.5144 | dt 0.006 +type train | step 700 | loss 2.4118 | lr 7.0e-04 | norm 1.3966 | dt 0.006 +type train | step 710 | loss 2.3339 | lr 7.1e-04 | norm 1.4972 | dt 0.006 +type train | step 720 | loss 2.3998 | lr 7.2e-04 | norm 3.2408 | dt 0.006 +type train | step 730 | loss 2.3676 | lr 7.3e-04 | norm 1.6927 | dt 0.006 +type train | step 740 | loss 2.3409 | lr 7.4e-04 | norm 1.1834 | dt 0.006 +type train | step 750 | loss 2.3632 | lr 7.5e-04 | norm 1.5752 | dt 0.006 +type train | step 760 | loss 2.3656 | lr 7.6e-04 | norm 1.3305 | dt 0.005 +type train | step 770 | loss 2.3188 | lr 7.7e-04 | norm 1.3809 | dt 0.006 +type train | step 780 | loss 2.3938 | lr 7.8e-04 | norm 1.9301 | dt 0.006 +type train | step 790 | loss 2.3464 | lr 7.9e-04 | norm 2.3654 | dt 0.006 +type train | step 800 | loss 2.3214 | lr 8.0e-04 | norm 1.4018 | dt 0.006 +type train | step 810 | loss 2.3682 | lr 8.1e-04 | norm 2.0394 | dt 0.006 +type train | step 820 | loss 2.2901 | lr 8.2e-04 | norm 1.6638 | dt 0.006 +type train | step 830 | loss 2.3005 | lr 8.3e-04 | norm 1.2414 | dt 0.006 +type train | step 840 | loss 2.3358 | lr 8.4e-04 | norm 1.5463 | dt 0.006 +type train | step 850 | loss 2.3436 | lr 8.5e-04 | norm 2.2538 | dt 0.006 +type train | step 860 | loss 2.2528 | lr 8.6e-04 | norm 1.4287 | dt 0.006 +type train | step 870 | loss 2.2619 | lr 8.7e-04 | norm 1.3034 | dt 0.006 +type train | step 880 | loss 2.2317 | lr 8.8e-04 | norm 0.8974 | dt 0.006 +type train | step 890 | loss 2.2509 | lr 8.9e-04 | norm 1.3397 | dt 0.006 +type train | step 900 | loss 2.2899 | lr 9.0e-04 | norm 1.4124 | dt 0.006 +type train | step 910 | loss 2.3197 | lr 9.1e-04 | norm 1.3067 | dt 0.006 +type train | step 920 | loss 2.2373 | lr 9.2e-04 | norm 1.0454 | dt 0.005 +type train | step 930 | loss 2.2257 | lr 9.3e-04 | norm 1.5683 | dt 0.005 +type train | step 940 | loss 2.2533 | lr 9.4e-04 | norm 2.4597 | dt 0.005 +type train | step 950 | loss 2.2576 | lr 9.5e-04 | norm 1.3404 | dt 0.005 +type train | step 960 | loss 2.2831 | lr 9.6e-04 | norm 1.4621 | dt 0.005 +type train | step 970 | loss 2.2055 | lr 9.7e-04 | norm 1.2653 | dt 0.005 +type train | step 980 | loss 2.1981 | lr 9.8e-04 | norm 1.0926 | dt 0.005 +type train | step 990 | loss 2.2064 | lr 9.9e-04 | norm 1.2123 | dt 0.005 +type train | step 1000 | loss 2.2089 | lr 1.0e-03 | norm 0.9849 | dt 0.006 +type train | step 1010 | loss 2.1902 | lr 1.0e-03 | norm 1.0946 | dt 0.005 +type train | step 1020 | loss 2.2237 | lr 1.0e-03 | norm 1.2810 | dt 0.006 +type train | step 1030 | loss 2.1790 | lr 1.0e-03 | norm 1.3467 | dt 0.006 +type train | step 1040 | loss 2.2038 | lr 1.0e-03 | norm 1.5802 | dt 0.006 +type train | step 1050 | loss 2.1567 | lr 1.0e-03 | norm 1.6174 | dt 0.006 +type train | step 1060 | loss 2.1583 | lr 1.0e-03 | norm 1.3230 | dt 0.006 +type train | step 1070 | loss 2.1280 | lr 1.0e-03 | norm 1.2174 | dt 0.006 +type train | step 1080 | loss 2.0970 | lr 1.0e-03 | norm 1.2499 | dt 0.006 +type train | step 1090 | loss 2.1542 | lr 1.0e-03 | norm 1.4954 | dt 0.006 +type train | step 1100 | loss 2.1388 | lr 1.0e-03 | norm 1.3708 | dt 0.006 +type train | step 1110 | loss 2.1379 | lr 1.0e-03 | norm 1.4237 | dt 0.006 +type train | step 1120 | loss 2.1437 | lr 1.0e-03 | norm 2.6375 | dt 0.006 +type train | step 1130 | loss 2.1941 | lr 1.0e-03 | norm 1.7433 | dt 0.006 +type train | step 1140 | loss 2.0763 | lr 1.0e-03 | norm 2.0836 | dt 0.006 +type train | step 1150 | loss 2.0915 | lr 1.0e-03 | norm 1.5835 | dt 0.007 +type train | step 1160 | loss 2.1158 | lr 1.0e-03 | norm 1.6225 | dt 0.007 +type train | step 1170 | loss 2.1176 | lr 1.0e-03 | norm 1.7542 | dt 0.006 +type train | step 1180 | loss 2.1085 | lr 1.0e-03 | norm 1.3864 | dt 0.006 +type train | step 1190 | loss 2.0864 | lr 1.0e-03 | norm 1.0832 | dt 0.006 +type train | step 1200 | loss 2.0178 | lr 1.0e-03 | norm 1.2647 | dt 0.007 +type train | step 1210 | loss 2.0655 | lr 1.0e-03 | norm 1.4422 | dt 0.006 +type train | step 1220 | loss 2.0898 | lr 1.0e-03 | norm 1.7953 | dt 0.008 +type train | step 1230 | loss 2.0745 | lr 1.0e-03 | norm 2.1479 | dt 0.006 +type train | step 1240 | loss 2.0639 | lr 1.0e-03 | norm 1.2215 | dt 0.005 +type train | step 1250 | loss 2.0284 | lr 1.0e-03 | norm 1.3596 | dt 0.006 +type train | step 1260 | loss 1.9921 | lr 1.0e-03 | norm 1.7923 | dt 0.006 +type train | step 1270 | loss 2.0117 | lr 1.0e-03 | norm 1.4242 | dt 0.006 +type train | step 1280 | loss 2.0693 | lr 1.0e-03 | norm 2.3749 | dt 0.005 +type train | step 1290 | loss 1.9980 | lr 1.0e-03 | norm 1.8051 | dt 0.006 +type train | step 1300 | loss 2.0587 | lr 1.0e-03 | norm 1.5633 | dt 0.006 +type train | step 1310 | loss 2.0336 | lr 1.0e-03 | norm 1.1776 | dt 0.006 +type train | step 1320 | loss 1.9506 | lr 1.0e-03 | norm 1.5441 | dt 0.005 +type train | step 1330 | loss 2.0163 | lr 1.0e-03 | norm 1.6453 | dt 0.005 +type train | step 1340 | loss 1.9224 | lr 1.0e-03 | norm 1.5125 | dt 0.005 +type train | step 1350 | loss 1.9410 | lr 1.0e-03 | norm 2.1029 | dt 0.006 +type train | step 1360 | loss 1.9012 | lr 1.0e-03 | norm 1.9314 | dt 0.006 +type train | step 1370 | loss 2.0056 | lr 1.0e-03 | norm 1.7261 | dt 0.006 +type train | step 1380 | loss 1.9309 | lr 1.0e-03 | norm 1.5264 | dt 0.006 +type train | step 1390 | loss 2.0265 | lr 1.0e-03 | norm 1.5729 | dt 0.006 +type train | step 1400 | loss 1.9219 | lr 1.0e-03 | norm 1.8232 | dt 0.006 +type train | step 1410 | loss 1.9162 | lr 9.9e-04 | norm 2.0310 | dt 0.006 +type train | step 1420 | loss 1.9581 | lr 9.9e-04 | norm 2.5452 | dt 0.006 +type train | step 1430 | loss 1.8876 | lr 9.9e-04 | norm 1.4725 | dt 0.006 +type train | step 1440 | loss 1.9317 | lr 9.9e-04 | norm 1.3685 | dt 0.006 +type train | step 1450 | loss 1.9836 | lr 9.9e-04 | norm 2.0041 | dt 0.006 +type train | step 1460 | loss 1.9127 | lr 9.9e-04 | norm 1.6845 | dt 0.005 +type train | step 1470 | loss 1.8885 | lr 9.9e-04 | norm 1.5867 | dt 0.005 +type train | step 1480 | loss 1.8761 | lr 9.9e-04 | norm 1.7703 | dt 0.005 +type train | step 1490 | loss 1.8460 | lr 9.9e-04 | norm 1.5615 | dt 0.005 +type train | step 1500 | loss 1.8543 | lr 9.9e-04 | norm 2.4919 | dt 0.006 +type train | step 1510 | loss 1.9269 | lr 9.9e-04 | norm 1.8614 | dt 0.005 +type train | step 1520 | loss 1.8896 | lr 9.9e-04 | norm 1.4824 | dt 0.005 +type train | step 1530 | loss 1.8979 | lr 9.9e-04 | norm 1.4218 | dt 0.005 +type train | step 1540 | loss 1.8593 | lr 9.9e-04 | norm 1.4099 | dt 0.005 +type train | step 1550 | loss 1.8838 | lr 9.9e-04 | norm 1.3439 | dt 0.005 +type train | step 1560 | loss 1.9055 | lr 9.9e-04 | norm 1.8307 | dt 0.005 +type train | step 1570 | loss 1.9361 | lr 9.9e-04 | norm 1.8276 | dt 0.006 +type train | step 1580 | loss 1.8339 | lr 9.9e-04 | norm 1.3923 | dt 0.006 +type train | step 1590 | loss 1.8446 | lr 9.9e-04 | norm 1.3827 | dt 0.006 +type train | step 1600 | loss 1.8264 | lr 9.9e-04 | norm 1.8389 | dt 0.006 +type train | step 1610 | loss 1.8453 | lr 9.9e-04 | norm 1.6979 | dt 0.006 +type train | step 1620 | loss 1.8461 | lr 9.9e-04 | norm 1.4858 | dt 0.006 +type train | step 1630 | loss 1.8863 | lr 9.9e-04 | norm 1.9001 | dt 0.006 +type train | step 1640 | loss 1.8206 | lr 9.9e-04 | norm 1.8193 | dt 0.006 +type train | step 1650 | loss 1.8849 | lr 9.9e-04 | norm 1.8157 | dt 0.006 +type train | step 1660 | loss 1.7959 | lr 9.9e-04 | norm 1.7712 | dt 0.006 +type train | step 1670 | loss 1.8215 | lr 9.9e-04 | norm 1.2602 | dt 0.006 +type train | step 1680 | loss 1.8029 | lr 9.9e-04 | norm 1.3981 | dt 0.006 +type train | step 1690 | loss 1.7521 | lr 9.9e-04 | norm 1.5445 | dt 0.006 +type train | step 1700 | loss 1.8305 | lr 9.9e-04 | norm 1.4391 | dt 0.006 +type train | step 1710 | loss 1.8693 | lr 9.8e-04 | norm 1.5840 | dt 0.006 +type train | step 1720 | loss 1.8145 | lr 9.8e-04 | norm 1.6259 | dt 0.006 +type train | step 1730 | loss 1.8332 | lr 9.8e-04 | norm 1.6102 | dt 0.006 +type train | step 1740 | loss 1.8579 | lr 9.8e-04 | norm 1.5121 | dt 0.006 +type train | step 1750 | loss 1.7465 | lr 9.8e-04 | norm 1.9891 | dt 0.006 +type train | step 1760 | loss 1.7520 | lr 9.8e-04 | norm 1.3789 | dt 0.005 +type train | step 1770 | loss 1.8390 | lr 9.8e-04 | norm 2.3547 | dt 0.005 +type train | step 1780 | loss 1.7852 | lr 9.8e-04 | norm 1.7437 | dt 0.005 +type train | step 1790 | loss 1.8230 | lr 9.8e-04 | norm 1.8772 | dt 0.006 +type train | step 1800 | loss 1.8212 | lr 9.8e-04 | norm 1.4434 | dt 0.006 +type train | step 1810 | loss 1.6940 | lr 9.8e-04 | norm 1.4157 | dt 0.006 +type train | step 1820 | loss 1.7639 | lr 9.8e-04 | norm 1.5168 | dt 0.006 +type train | step 1830 | loss 1.8748 | lr 9.8e-04 | norm 1.3788 | dt 0.007 +type train | step 1840 | loss 1.7878 | lr 9.8e-04 | norm 1.2912 | dt 0.006 +type train | step 1850 | loss 1.8110 | lr 9.8e-04 | norm 1.4117 | dt 0.006 +type train | step 1860 | loss 1.8015 | lr 9.8e-04 | norm 1.8619 | dt 0.005 +type train | step 1870 | loss 1.6858 | lr 9.8e-04 | norm 1.7533 | dt 0.006 +type train | step 1880 | loss 1.7649 | lr 9.8e-04 | norm 1.4285 | dt 0.006 +type train | step 1890 | loss 1.8456 | lr 9.8e-04 | norm 1.4743 | dt 0.005 +type train | step 1900 | loss 1.7485 | lr 9.8e-04 | norm 1.4819 | dt 0.006 +type train | step 1910 | loss 1.8044 | lr 9.8e-04 | norm 1.5007 | dt 0.006 +type train | step 1920 | loss 1.8265 | lr 9.7e-04 | norm 1.5327 | dt 0.006 +type train | step 1930 | loss 1.7172 | lr 9.7e-04 | norm 1.6238 | dt 0.007 +type train | step 1940 | loss 1.8250 | lr 9.7e-04 | norm 1.4092 | dt 0.006 +type train | step 1950 | loss 1.6575 | lr 9.7e-04 | norm 1.6824 | dt 0.006 +type train | step 1960 | loss 1.6851 | lr 9.7e-04 | norm 1.7229 | dt 0.006 +type train | step 1970 | loss 1.5848 | lr 9.7e-04 | norm 1.4158 | dt 0.008 +type train | step 1980 | loss 1.8259 | lr 9.7e-04 | norm 1.9717 | dt 0.006 +type train | step 1990 | loss 1.7003 | lr 9.7e-04 | norm 1.3627 | dt 0.006 +type train | step 2000 | loss 1.8334 | lr 9.7e-04 | norm 1.5790 | dt 0.005 +type train | step 2010 | loss 1.6695 | lr 9.7e-04 | norm 1.9557 | dt 0.005 +type train | step 2020 | loss 1.6553 | lr 9.7e-04 | norm 1.6256 | dt 0.005 +type train | step 2030 | loss 1.7093 | lr 9.7e-04 | norm 1.5781 | dt 0.006 +type train | step 2040 | loss 1.6706 | lr 9.7e-04 | norm 1.7342 | dt 0.006 +type train | step 2050 | loss 1.7295 | lr 9.7e-04 | norm 1.2843 | dt 0.006 +type train | step 2060 | loss 1.7919 | lr 9.7e-04 | norm 1.4993 | dt 0.006 +type train | step 2070 | loss 1.7027 | lr 9.7e-04 | norm 1.6776 | dt 0.006 +type train | step 2080 | loss 1.6608 | lr 9.7e-04 | norm 1.3721 | dt 0.006 +type train | step 2090 | loss 1.6689 | lr 9.6e-04 | norm 1.4462 | dt 0.006 +type train | step 2100 | loss 1.6337 | lr 9.6e-04 | norm 1.2409 | dt 0.006 +type train | step 2110 | loss 1.6307 | lr 9.6e-04 | norm 1.4356 | dt 0.006 +type train | step 2120 | loss 1.7393 | lr 9.6e-04 | norm 1.7613 | dt 0.006 +type train | step 2130 | loss 1.6870 | lr 9.6e-04 | norm 1.4301 | dt 0.006 +type train | step 2140 | loss 1.6575 | lr 9.6e-04 | norm 1.3791 | dt 0.006 +type train | step 2150 | loss 1.6515 | lr 9.6e-04 | norm 1.3729 | dt 0.006 +type train | step 2160 | loss 1.7007 | lr 9.6e-04 | norm 1.9115 | dt 0.006 +type train | step 2170 | loss 1.7313 | lr 9.6e-04 | norm 1.4283 | dt 0.006 +type train | step 2180 | loss 1.7700 | lr 9.6e-04 | norm 1.6046 | dt 0.006 +type train | step 2190 | loss 1.6586 | lr 9.6e-04 | norm 1.6915 | dt 0.006 +type train | step 2200 | loss 1.6344 | lr 9.6e-04 | norm 1.2653 | dt 0.006 +type train | step 2210 | loss 1.6195 | lr 9.6e-04 | norm 1.9298 | dt 0.006 +type train | step 2220 | loss 1.6742 | lr 9.6e-04 | norm 1.6144 | dt 0.006 +type train | step 2230 | loss 1.6879 | lr 9.6e-04 | norm 1.5395 | dt 0.006 +type train | step 2240 | loss 1.7422 | lr 9.5e-04 | norm 1.8999 | dt 0.006 +type train | step 2250 | loss 1.6366 | lr 9.5e-04 | norm 1.4305 | dt 0.006 +type train | step 2260 | loss 1.7020 | lr 9.5e-04 | norm 1.3660 | dt 0.005 +type train | step 2270 | loss 1.6226 | lr 9.5e-04 | norm 1.6556 | dt 0.005 +type train | step 2280 | loss 1.6573 | lr 9.5e-04 | norm 1.6097 | dt 0.006 +type train | step 2290 | loss 1.6401 | lr 9.5e-04 | norm 1.3018 | dt 0.006 +type train | step 2300 | loss 1.5887 | lr 9.5e-04 | norm 1.5656 | dt 0.006 +type train | step 2310 | loss 1.6821 | lr 9.5e-04 | norm 1.3651 | dt 0.006 +type train | step 2320 | loss 1.7227 | lr 9.5e-04 | norm 1.3481 | dt 0.006 +type train | step 2330 | loss 1.6534 | lr 9.5e-04 | norm 1.5967 | dt 0.006 +type train | step 2340 | loss 1.7018 | lr 9.5e-04 | norm 1.4876 | dt 0.006 +type train | step 2350 | loss 1.7018 | lr 9.5e-04 | norm 1.6788 | dt 0.006 +type train | step 2360 | loss 1.5739 | lr 9.5e-04 | norm 1.3017 | dt 0.006 +type train | step 2370 | loss 1.6153 | lr 9.4e-04 | norm 1.5675 | dt 0.006 +type train | step 2380 | loss 1.6834 | lr 9.4e-04 | norm 1.7088 | dt 0.006 +type train | step 2390 | loss 1.6150 | lr 9.4e-04 | norm 2.3370 | dt 0.006 +type train | step 2400 | loss 1.6700 | lr 9.4e-04 | norm 1.6322 | dt 0.006 +type train | step 2410 | loss 1.6838 | lr 9.4e-04 | norm 2.1107 | dt 0.006 +type train | step 2420 | loss 1.5397 | lr 9.4e-04 | norm 1.4998 | dt 0.006 +type train | step 2430 | loss 1.6390 | lr 9.4e-04 | norm 1.5672 | dt 0.006 +type train | step 2440 | loss 1.7671 | lr 9.4e-04 | norm 1.5584 | dt 0.008 +type train | step 2450 | loss 1.6386 | lr 9.4e-04 | norm 1.1854 | dt 0.005 +type train | step 2460 | loss 1.6737 | lr 9.4e-04 | norm 1.8161 | dt 0.006 +type train | step 2470 | loss 1.6584 | lr 9.4e-04 | norm 1.2011 | dt 0.006 +type train | step 2480 | loss 1.5414 | lr 9.4e-04 | norm 1.4604 | dt 0.006 +type train | step 2490 | loss 1.6392 | lr 9.3e-04 | norm 1.4475 | dt 0.007 +type train | step 2500 | loss 1.7510 | lr 9.3e-04 | norm 1.3929 | dt 0.006 +type train | step 2510 | loss 1.6199 | lr 9.3e-04 | norm 1.3680 | dt 0.005 +type train | step 2520 | loss 1.6782 | lr 9.3e-04 | norm 1.8072 | dt 0.005 +type train | step 2530 | loss 1.7031 | lr 9.3e-04 | norm 1.6041 | dt 0.006 +type train | step 2540 | loss 1.5968 | lr 9.3e-04 | norm 1.1549 | dt 0.005 +type train | step 2550 | loss 1.7064 | lr 9.3e-04 | norm 1.2198 | dt 0.006 +type train | step 2560 | loss 1.5467 | lr 9.3e-04 | norm 1.5981 | dt 0.007 +type train | step 2570 | loss 1.5708 | lr 9.3e-04 | norm 1.2685 | dt 0.006 +type train | step 2580 | loss 1.4822 | lr 9.3e-04 | norm 1.5153 | dt 0.006 +type train | step 2590 | loss 1.6737 | lr 9.3e-04 | norm 1.4000 | dt 0.006 +type train | step 2600 | loss 1.5812 | lr 9.2e-04 | norm 1.0656 | dt 0.006 +type train | step 2610 | loss 1.7350 | lr 9.2e-04 | norm 1.5733 | dt 0.006 +type train | step 2620 | loss 1.5572 | lr 9.2e-04 | norm 1.4509 | dt 0.005 +type train | step 2630 | loss 1.5506 | lr 9.2e-04 | norm 1.4138 | dt 0.006 +type train | step 2640 | loss 1.6039 | lr 9.2e-04 | norm 1.5019 | dt 0.005 +type train | step 2650 | loss 1.5528 | lr 9.2e-04 | norm 1.3705 | dt 0.006 +type train | step 2660 | loss 1.6269 | lr 9.2e-04 | norm 1.4651 | dt 0.006 +type train | step 2670 | loss 1.6908 | lr 9.2e-04 | norm 1.2603 | dt 0.006 +type train | step 2680 | loss 1.5964 | lr 9.2e-04 | norm 1.7778 | dt 0.006 +type train | step 2690 | loss 1.5653 | lr 9.2e-04 | norm 1.2080 | dt 0.006 +type train | step 2700 | loss 1.5692 | lr 9.2e-04 | norm 1.3091 | dt 0.006 +type train | step 2710 | loss 1.5315 | lr 9.1e-04 | norm 1.1103 | dt 0.006 +type train | step 2720 | loss 1.5252 | lr 9.1e-04 | norm 1.4206 | dt 0.006 +type train | step 2730 | loss 1.6371 | lr 9.1e-04 | norm 1.4454 | dt 0.005 +type train | step 2740 | loss 1.5945 | lr 9.1e-04 | norm 1.6470 | dt 0.007 +type train | step 2750 | loss 1.5694 | lr 9.1e-04 | norm 1.5556 | dt 0.006 +type train | step 2760 | loss 1.5418 | lr 9.1e-04 | norm 1.6154 | dt 0.005 +type train | step 2770 | loss 1.5879 | lr 9.1e-04 | norm 1.1584 | dt 0.006 +type train | step 2780 | loss 1.6229 | lr 9.1e-04 | norm 1.3245 | dt 0.006 +type train | step 2790 | loss 1.6766 | lr 9.1e-04 | norm 1.4213 | dt 0.006 +type train | step 2800 | loss 1.5685 | lr 9.1e-04 | norm 1.3179 | dt 0.006 +type train | step 2810 | loss 1.5577 | lr 9.0e-04 | norm 1.4844 | dt 0.006 +type train | step 2820 | loss 1.5090 | lr 9.0e-04 | norm 1.3831 | dt 0.006 +type train | step 2830 | loss 1.5781 | lr 9.0e-04 | norm 1.3834 | dt 0.006 +type train | step 2840 | loss 1.5882 | lr 9.0e-04 | norm 1.3128 | dt 0.006 +type train | step 2850 | loss 1.6414 | lr 9.0e-04 | norm 1.2852 | dt 0.006 +type train | step 2860 | loss 1.5377 | lr 9.0e-04 | norm 1.0609 | dt 0.006 +type train | step 2870 | loss 1.6189 | lr 9.0e-04 | norm 1.0193 | dt 0.006 +type train | step 2880 | loss 1.5276 | lr 9.0e-04 | norm 1.2621 | dt 0.006 +type train | step 2890 | loss 1.5518 | lr 9.0e-04 | norm 1.2777 | dt 0.006 +type train | step 2900 | loss 1.5444 | lr 9.0e-04 | norm 1.0650 | dt 0.006 +type train | step 2910 | loss 1.5010 | lr 8.9e-04 | norm 1.3497 | dt 0.006 +type train | step 2920 | loss 1.5976 | lr 8.9e-04 | norm 1.4629 | dt 0.006 +type train | step 2930 | loss 1.6342 | lr 8.9e-04 | norm 1.4438 | dt 0.006 +type train | step 2940 | loss 1.5578 | lr 8.9e-04 | norm 1.4909 | dt 0.006 +type train | step 2950 | loss 1.5941 | lr 8.9e-04 | norm 1.0958 | dt 0.006 +type train | step 2960 | loss 1.6193 | lr 8.9e-04 | norm 1.7928 | dt 0.006 +type train | step 2970 | loss 1.4867 | lr 8.9e-04 | norm 1.3395 | dt 0.006 +type train | step 2980 | loss 1.5459 | lr 8.9e-04 | norm 1.5551 | dt 0.006 +type train | step 2990 | loss 1.6223 | lr 8.9e-04 | norm 2.2543 | dt 0.006 +type train | step 3000 | loss 1.5271 | lr 8.8e-04 | norm 1.6176 | dt 0.006 +type train | step 3010 | loss 1.5702 | lr 8.8e-04 | norm 1.3747 | dt 0.005 +type train | step 3020 | loss 1.6159 | lr 8.8e-04 | norm 1.7208 | dt 0.006 +type train | step 3030 | loss 1.4618 | lr 8.8e-04 | norm 1.4158 | dt 0.006 +type train | step 3040 | loss 1.5690 | lr 8.8e-04 | norm 1.4258 | dt 0.006 +type train | step 3050 | loss 1.6798 | lr 8.8e-04 | norm 1.5247 | dt 0.007 +type train | step 3060 | loss 1.5665 | lr 8.8e-04 | norm 1.4443 | dt 0.006 +type train | step 3070 | loss 1.5774 | lr 8.8e-04 | norm 1.4031 | dt 0.006 +type train | step 3080 | loss 1.5950 | lr 8.8e-04 | norm 1.2476 | dt 0.006 +type train | step 3090 | loss 1.4530 | lr 8.7e-04 | norm 1.1745 | dt 0.006 +type train | step 3100 | loss 1.5667 | lr 8.7e-04 | norm 1.1666 | dt 0.006 +type train | step 3110 | loss 1.6769 | lr 8.7e-04 | norm 1.3661 | dt 0.006 +type train | step 3120 | loss 1.5635 | lr 8.7e-04 | norm 1.3114 | dt 0.008 +type train | step 3130 | loss 1.5920 | lr 8.7e-04 | norm 1.3823 | dt 0.006 +type train | step 3140 | loss 1.6380 | lr 8.7e-04 | norm 1.3750 | dt 0.006 +type train | step 3150 | loss 1.5367 | lr 8.7e-04 | norm 1.1804 | dt 0.006 +type train | step 3160 | loss 1.6494 | lr 8.7e-04 | norm 1.3129 | dt 0.006 +type train | step 3170 | loss 1.4826 | lr 8.6e-04 | norm 1.2535 | dt 0.006 +type train | step 3180 | loss 1.5093 | lr 8.6e-04 | norm 1.2184 | dt 0.006 +type train | step 3190 | loss 1.4181 | lr 8.6e-04 | norm 1.3492 | dt 0.006 +type train | step 3200 | loss 1.5934 | lr 8.6e-04 | norm 1.2813 | dt 0.006 +type train | step 3210 | loss 1.5171 | lr 8.6e-04 | norm 1.2003 | dt 0.006 +type train | step 3220 | loss 1.6720 | lr 8.6e-04 | norm 1.4778 | dt 0.006 +type train | step 3230 | loss 1.4973 | lr 8.6e-04 | norm 1.6875 | dt 0.006 +type train | step 3240 | loss 1.4892 | lr 8.6e-04 | norm 1.5539 | dt 0.006 +type train | step 3250 | loss 1.5283 | lr 8.6e-04 | norm 1.3351 | dt 0.006 +type train | step 3260 | loss 1.4832 | lr 8.5e-04 | norm 1.1365 | dt 0.005 +type train | step 3270 | loss 1.5647 | lr 8.5e-04 | norm 1.2316 | dt 0.005 +type train | step 3280 | loss 1.6191 | lr 8.5e-04 | norm 1.1580 | dt 0.006 +type train | step 3290 | loss 1.5302 | lr 8.5e-04 | norm 1.5492 | dt 0.006 +type train | step 3300 | loss 1.5201 | lr 8.5e-04 | norm 1.3119 | dt 0.006 +type train | step 3310 | loss 1.4879 | lr 8.5e-04 | norm 1.0891 | dt 0.006 +type train | step 3320 | loss 1.4695 | lr 8.5e-04 | norm 1.1499 | dt 0.006 +type train | step 3330 | loss 1.4562 | lr 8.5e-04 | norm 1.4023 | dt 0.007 +type train | step 3340 | loss 1.5826 | lr 8.4e-04 | norm 1.4195 | dt 0.006 +type train | step 3350 | loss 1.5259 | lr 8.4e-04 | norm 1.3150 | dt 0.006 +type train | step 3360 | loss 1.5157 | lr 8.4e-04 | norm 1.3857 | dt 0.006 +type train | step 3370 | loss 1.4626 | lr 8.4e-04 | norm 1.1633 | dt 0.006 +type train | step 3380 | loss 1.5127 | lr 8.4e-04 | norm 1.3524 | dt 0.006 +type train | step 3390 | loss 1.5674 | lr 8.4e-04 | norm 1.1716 | dt 0.006 +type train | step 3400 | loss 1.6125 | lr 8.4e-04 | norm 1.3464 | dt 0.006 +type train | step 3410 | loss 1.5216 | lr 8.3e-04 | norm 1.4730 | dt 0.006 +type train | step 3420 | loss 1.5118 | lr 8.3e-04 | norm 1.4814 | dt 0.006 +type train | step 3430 | loss 1.4374 | lr 8.3e-04 | norm 1.2638 | dt 0.006 +type train | step 3440 | loss 1.5144 | lr 8.3e-04 | norm 1.3183 | dt 0.007 +type train | step 3450 | loss 1.5380 | lr 8.3e-04 | norm 1.1979 | dt 0.007 +type train | step 3460 | loss 1.5828 | lr 8.3e-04 | norm 1.3419 | dt 0.006 +type train | step 3470 | loss 1.4882 | lr 8.3e-04 | norm 1.3149 | dt 0.006 +type train | step 3480 | loss 1.5848 | lr 8.3e-04 | norm 1.6926 | dt 0.006 +type train | step 3490 | loss 1.4791 | lr 8.2e-04 | norm 1.3879 | dt 0.006 +type train | step 3500 | loss 1.4885 | lr 8.2e-04 | norm 1.3997 | dt 0.005 +type train | step 3510 | loss 1.4969 | lr 8.2e-04 | norm 1.1718 | dt 0.006 +type train | step 3520 | loss 1.4413 | lr 8.2e-04 | norm 1.0949 | dt 0.005 +type train | step 3530 | loss 1.5257 | lr 8.2e-04 | norm 1.1388 | dt 0.005 +type train | step 3540 | loss 1.5759 | lr 8.2e-04 | norm 1.3318 | dt 0.005 +type train | step 3550 | loss 1.5014 | lr 8.2e-04 | norm 1.4091 | dt 0.006 +type train | step 3560 | loss 1.5420 | lr 8.2e-04 | norm 1.3658 | dt 0.006 +type train | step 3570 | loss 1.5569 | lr 8.1e-04 | norm 1.2986 | dt 0.006 +type train | step 3580 | loss 1.4251 | lr 8.1e-04 | norm 0.9862 | dt 0.006 +type train | step 3590 | loss 1.4826 | lr 8.1e-04 | norm 1.0810 | dt 0.006 +type train | step 3600 | loss 1.5549 | lr 8.1e-04 | norm 1.4726 | dt 0.006 +type train | step 3610 | loss 1.4629 | lr 8.1e-04 | norm 1.1842 | dt 0.006 +type train | step 3620 | loss 1.5116 | lr 8.1e-04 | norm 1.2252 | dt 0.006 +type train | step 3630 | loss 1.5677 | lr 8.1e-04 | norm 1.1319 | dt 0.006 +type train | step 3640 | loss 1.3984 | lr 8.0e-04 | norm 1.1225 | dt 0.006 +type train | step 3650 | loss 1.5132 | lr 8.0e-04 | norm 1.2812 | dt 0.006 +type train | step 3660 | loss 1.6134 | lr 8.0e-04 | norm 1.3701 | dt 0.008 +type train | step 3670 | loss 1.5067 | lr 8.0e-04 | norm 1.3716 | dt 0.005 +type train | step 3680 | loss 1.5148 | lr 8.0e-04 | norm 1.5488 | dt 0.006 +type train | step 3690 | loss 1.5507 | lr 8.0e-04 | norm 1.2639 | dt 0.006 +type train | step 3700 | loss 1.3975 | lr 8.0e-04 | norm 1.0871 | dt 0.006 +type train | step 3710 | loss 1.5181 | lr 7.9e-04 | norm 1.0876 | dt 0.006 +type train | step 3720 | loss 1.6182 | lr 7.9e-04 | norm 1.2233 | dt 0.006 +type train | step 3730 | loss 1.5133 | lr 7.9e-04 | norm 1.0884 | dt 0.006 +type train | step 3740 | loss 1.5105 | lr 7.9e-04 | norm 1.1934 | dt 0.006 +type train | step 3750 | loss 1.5846 | lr 7.9e-04 | norm 1.0252 | dt 0.006 +type train | step 3760 | loss 1.4854 | lr 7.9e-04 | norm 1.1010 | dt 0.005 +type train | step 3770 | loss 1.5971 | lr 7.9e-04 | norm 1.2593 | dt 0.005 +type train | step 3780 | loss 1.4336 | lr 7.8e-04 | norm 1.2825 | dt 0.005 +type train | step 3790 | loss 1.4613 | lr 7.8e-04 | norm 1.3988 | dt 0.005 +type train | step 3800 | loss 1.3618 | lr 7.8e-04 | norm 1.1313 | dt 0.005 +type train | step 3810 | loss 1.5359 | lr 7.8e-04 | norm 1.1232 | dt 0.006 +type train | step 3820 | loss 1.4715 | lr 7.8e-04 | norm 1.2941 | dt 0.005 +type train | step 3830 | loss 1.6335 | lr 7.8e-04 | norm 1.5089 | dt 0.005 +type train | step 3840 | loss 1.4473 | lr 7.8e-04 | norm 1.4405 | dt 0.005 +type train | step 3850 | loss 1.4415 | lr 7.7e-04 | norm 1.1755 | dt 0.006 +type train | step 3860 | loss 1.4757 | lr 7.7e-04 | norm 1.2344 | dt 0.006 +type train | step 3870 | loss 1.4404 | lr 7.7e-04 | norm 1.0891 | dt 0.006 +type train | step 3880 | loss 1.5193 | lr 7.7e-04 | norm 1.1839 | dt 0.006 +type train | step 3890 | loss 1.5630 | lr 7.7e-04 | norm 1.1607 | dt 0.006 +type train | step 3900 | loss 1.4704 | lr 7.7e-04 | norm 1.2213 | dt 0.006 +type train | step 3910 | loss 1.4858 | lr 7.7e-04 | norm 1.4090 | dt 0.006 +type train | step 3920 | loss 1.4437 | lr 7.6e-04 | norm 1.1037 | dt 0.007 +type train | step 3930 | loss 1.4255 | lr 7.6e-04 | norm 0.9756 | dt 0.006 +type train | step 3940 | loss 1.4149 | lr 7.6e-04 | norm 1.5758 | dt 0.007 +type train | step 3950 | loss 1.5238 | lr 7.6e-04 | norm 1.2195 | dt 0.006 +type train | step 3960 | loss 1.4961 | lr 7.6e-04 | norm 1.7128 | dt 0.008 +type train | step 3970 | loss 1.4708 | lr 7.6e-04 | norm 1.3492 | dt 0.006 +type train | step 3980 | loss 1.4231 | lr 7.6e-04 | norm 1.4069 | dt 0.007 +type train | step 3990 | loss 1.4688 | lr 7.5e-04 | norm 1.1631 | dt 0.006 +type train | step 4000 | loss 1.5251 | lr 7.5e-04 | norm 1.2440 | dt 0.007 +type train | step 4010 | loss 1.5629 | lr 7.5e-04 | norm 1.1855 | dt 0.006 +type train | step 4020 | loss 1.4720 | lr 7.5e-04 | norm 1.0433 | dt 0.006 +type train | step 4030 | loss 1.4793 | lr 7.5e-04 | norm 1.6722 | dt 0.005 +type train | step 4040 | loss 1.3956 | lr 7.5e-04 | norm 1.2270 | dt 0.005 +type train | step 4050 | loss 1.4756 | lr 7.4e-04 | norm 1.1365 | dt 0.006 +type train | step 4060 | loss 1.4969 | lr 7.4e-04 | norm 1.2066 | dt 0.006 +type train | step 4070 | loss 1.5467 | lr 7.4e-04 | norm 1.3750 | dt 0.006 +type train | step 4080 | loss 1.4433 | lr 7.4e-04 | norm 1.2013 | dt 0.006 +type train | step 4090 | loss 1.5516 | lr 7.4e-04 | norm 1.4217 | dt 0.005 +type train | step 4100 | loss 1.4404 | lr 7.4e-04 | norm 1.4177 | dt 0.005 +type train | step 4110 | loss 1.4503 | lr 7.4e-04 | norm 1.1964 | dt 0.005 +type train | step 4120 | loss 1.4595 | lr 7.3e-04 | norm 1.1842 | dt 0.006 +type train | step 4130 | loss 1.4045 | lr 7.3e-04 | norm 1.1382 | dt 0.007 +type train | step 4140 | loss 1.4864 | lr 7.3e-04 | norm 1.4103 | dt 0.005 +type train | step 4150 | loss 1.5419 | lr 7.3e-04 | norm 1.6528 | dt 0.006 +type train | step 4160 | loss 1.4530 | lr 7.3e-04 | norm 1.3138 | dt 0.005 +type train | step 4170 | loss 1.5030 | lr 7.3e-04 | norm 1.3739 | dt 0.005 +type train | step 4180 | loss 1.5205 | lr 7.3e-04 | norm 1.2523 | dt 0.005 +type train | step 4190 | loss 1.3874 | lr 7.2e-04 | norm 1.2364 | dt 0.005 +type train | step 4200 | loss 1.4471 | lr 7.2e-04 | norm 1.2232 | dt 0.006 +type train | step 4210 | loss 1.5343 | lr 7.2e-04 | norm 1.9640 | dt 0.005 +type train | step 4220 | loss 1.4264 | lr 7.2e-04 | norm 1.3163 | dt 0.006 +type train | step 4230 | loss 1.4679 | lr 7.2e-04 | norm 1.1740 | dt 0.006 +type train | step 4240 | loss 1.5373 | lr 7.2e-04 | norm 1.1343 | dt 0.006 +type train | step 4250 | loss 1.3627 | lr 7.1e-04 | norm 1.1590 | dt 0.005 +type train | step 4260 | loss 1.4863 | lr 7.1e-04 | norm 1.1233 | dt 0.006 +type train | step 4270 | loss 1.5626 | lr 7.1e-04 | norm 1.3303 | dt 0.011 +type train | step 4280 | loss 1.4603 | lr 7.1e-04 | norm 1.1019 | dt 0.008 +type train | step 4290 | loss 1.4637 | lr 7.1e-04 | norm 1.1016 | dt 0.006 +type train | step 4300 | loss 1.5109 | lr 7.1e-04 | norm 1.1193 | dt 0.005 +type train | step 4310 | loss 1.3631 | lr 7.0e-04 | norm 1.1177 | dt 0.006 +type train | step 4320 | loss 1.4873 | lr 7.0e-04 | norm 1.1171 | dt 0.006 +type train | step 4330 | loss 1.5791 | lr 7.0e-04 | norm 1.3208 | dt 0.006 +type train | step 4340 | loss 1.4764 | lr 7.0e-04 | norm 1.0195 | dt 0.006 +type train | step 4350 | loss 1.4649 | lr 7.0e-04 | norm 1.2664 | dt 0.006 +type train | step 4360 | loss 1.5441 | lr 7.0e-04 | norm 1.1035 | dt 0.006 +type train | step 4370 | loss 1.4596 | lr 7.0e-04 | norm 1.2904 | dt 0.006 +type train | step 4380 | loss 1.5758 | lr 6.9e-04 | norm 1.3818 | dt 0.006 +type train | step 4390 | loss 1.3954 | lr 6.9e-04 | norm 1.1791 | dt 0.006 +type train | step 4400 | loss 1.4291 | lr 6.9e-04 | norm 1.1598 | dt 0.006 +type train | step 4410 | loss 1.3316 | lr 6.9e-04 | norm 1.1477 | dt 0.006 +type train | step 4420 | loss 1.5029 | lr 6.9e-04 | norm 1.1812 | dt 0.006 +type train | step 4430 | loss 1.4343 | lr 6.9e-04 | norm 1.1625 | dt 0.006 +type train | step 4440 | loss 1.6001 | lr 6.8e-04 | norm 1.1924 | dt 0.006 +type train | step 4450 | loss 1.4043 | lr 6.8e-04 | norm 1.1880 | dt 0.006 +type train | step 4460 | loss 1.4088 | lr 6.8e-04 | norm 1.2550 | dt 0.005 +type train | step 4470 | loss 1.4401 | lr 6.8e-04 | norm 1.2046 | dt 0.006 +type train | step 4480 | loss 1.4029 | lr 6.8e-04 | norm 1.0340 | dt 0.006 +type train | step 4490 | loss 1.4882 | lr 6.8e-04 | norm 1.4230 | dt 0.006 +type train | step 4500 | loss 1.5361 | lr 6.7e-04 | norm 1.2578 | dt 0.006 +type train | step 4510 | loss 1.4262 | lr 6.7e-04 | norm 1.2628 | dt 0.006 +type train | step 4520 | loss 1.4421 | lr 6.7e-04 | norm 1.0814 | dt 0.005 +type train | step 4530 | loss 1.4046 | lr 6.7e-04 | norm 1.0262 | dt 0.005 +type train | step 4540 | loss 1.3868 | lr 6.7e-04 | norm 0.9839 | dt 0.005 +type train | step 4550 | loss 1.3708 | lr 6.7e-04 | norm 1.3726 | dt 0.006 +type train | step 4560 | loss 1.4899 | lr 6.6e-04 | norm 1.0936 | dt 0.006 +type train | step 4570 | loss 1.4584 | lr 6.6e-04 | norm 1.5556 | dt 0.006 +type train | step 4580 | loss 1.4386 | lr 6.6e-04 | norm 1.3411 | dt 0.006 +type train | step 4590 | loss 1.3822 | lr 6.6e-04 | norm 1.3427 | dt 0.006 +type train | step 4600 | loss 1.4287 | lr 6.6e-04 | norm 1.1009 | dt 0.006 +type train | step 4610 | loss 1.4806 | lr 6.6e-04 | norm 1.1608 | dt 0.005 +type train | step 4620 | loss 1.5314 | lr 6.5e-04 | norm 1.1523 | dt 0.006 +type train | step 4630 | loss 1.4548 | lr 6.5e-04 | norm 1.4246 | dt 0.006 +type train | step 4640 | loss 1.4569 | lr 6.5e-04 | norm 1.8054 | dt 0.008 +type train | step 4650 | loss 1.3503 | lr 6.5e-04 | norm 1.0366 | dt 0.006 +type train | step 4660 | loss 1.4277 | lr 6.5e-04 | norm 1.0500 | dt 0.006 +type train | step 4670 | loss 1.4676 | lr 6.5e-04 | norm 1.2806 | dt 0.006 +type train | step 4680 | loss 1.5175 | lr 6.4e-04 | norm 1.4452 | dt 0.006 +type train | step 4690 | loss 1.4091 | lr 6.4e-04 | norm 1.1484 | dt 0.006 +type train | step 4700 | loss 1.5133 | lr 6.4e-04 | norm 1.3283 | dt 0.006 +type train | step 4710 | loss 1.3869 | lr 6.4e-04 | norm 1.1004 | dt 0.006 +type train | step 4720 | loss 1.4157 | lr 6.4e-04 | norm 1.2776 | dt 0.006 +type train | step 4730 | loss 1.4260 | lr 6.4e-04 | norm 1.0884 | dt 0.006 +type train | step 4740 | loss 1.3713 | lr 6.3e-04 | norm 1.0747 | dt 0.006 +type train | step 4750 | loss 1.4517 | lr 6.3e-04 | norm 1.0905 | dt 0.006 +type train | step 4760 | loss 1.5093 | lr 6.3e-04 | norm 1.5858 | dt 0.005 +type train | step 4770 | loss 1.4131 | lr 6.3e-04 | norm 1.2156 | dt 0.005 +type train | step 4780 | loss 1.4651 | lr 6.3e-04 | norm 1.3938 | dt 0.006 +type train | step 4790 | loss 1.4897 | lr 6.3e-04 | norm 1.2476 | dt 0.006 +type train | step 4800 | loss 1.3492 | lr 6.2e-04 | norm 1.0693 | dt 0.006 +type train | step 4810 | loss 1.4172 | lr 6.2e-04 | norm 1.1373 | dt 0.006 +type train | step 4820 | loss 1.5002 | lr 6.2e-04 | norm 1.4512 | dt 0.006 +type train | step 4830 | loss 1.3923 | lr 6.2e-04 | norm 1.3984 | dt 0.006 +type train | step 4840 | loss 1.4225 | lr 6.2e-04 | norm 1.1176 | dt 0.006 +type train | step 4850 | loss 1.5069 | lr 6.2e-04 | norm 1.2688 | dt 0.006 +type train | step 4860 | loss 1.3267 | lr 6.1e-04 | norm 1.1136 | dt 0.006 +type train | step 4870 | loss 1.4566 | lr 6.1e-04 | norm 1.2504 | dt 0.006 +type train | step 4880 | loss 1.5316 | lr 6.1e-04 | norm 1.5322 | dt 0.009 +type train | step 4890 | loss 1.4331 | lr 6.1e-04 | norm 1.2502 | dt 0.006 +type train | step 4900 | loss 1.4423 | lr 6.1e-04 | norm 1.4861 | dt 0.006 +type train | step 4910 | loss 1.4825 | lr 6.1e-04 | norm 1.1574 | dt 0.006 +type train | step 4920 | loss 1.3248 | lr 6.0e-04 | norm 1.1510 | dt 0.006 +type train | step 4930 | loss 1.4578 | lr 6.0e-04 | norm 1.1489 | dt 0.006 +type train | step 4940 | loss 1.5498 | lr 6.0e-04 | norm 1.3975 | dt 0.006 +type train | step 4950 | loss 1.4562 | lr 6.0e-04 | norm 1.2726 | dt 0.006 +type train | step 4960 | loss 1.4407 | lr 6.0e-04 | norm 1.6194 | dt 0.006 +type train | step 4970 | loss 1.5185 | lr 6.0e-04 | norm 1.3220 | dt 0.006 +type train | step 4980 | loss 1.4223 | lr 5.9e-04 | norm 1.1087 | dt 0.006 +type train | step 4990 | loss 1.5521 | lr 5.9e-04 | norm 1.3542 | dt 0.006 +type train | step 5000 | loss 1.3658 | lr 5.9e-04 | norm 1.1458 | dt 0.006 +type train | step 5010 | loss 1.4073 | lr 5.9e-04 | norm 1.3775 | dt 0.005 +type train | step 5020 | loss 1.3084 | lr 5.9e-04 | norm 1.2864 | dt 0.006 +type train | step 5030 | loss 1.4787 | lr 5.9e-04 | norm 1.3186 | dt 0.006 +type train | step 5040 | loss 1.4095 | lr 5.8e-04 | norm 1.1402 | dt 0.006 +type train | step 5050 | loss 1.5712 | lr 5.8e-04 | norm 1.3407 | dt 0.006 +type train | step 5060 | loss 1.3722 | lr 5.8e-04 | norm 1.0570 | dt 0.006 +type train | step 5070 | loss 1.3824 | lr 5.8e-04 | norm 1.1683 | dt 0.006 +type train | step 5080 | loss 1.4119 | lr 5.8e-04 | norm 1.1174 | dt 0.006 +type train | step 5090 | loss 1.3725 | lr 5.8e-04 | norm 1.0115 | dt 0.006 +type train | step 5100 | loss 1.4582 | lr 5.7e-04 | norm 1.2529 | dt 0.006 +type train | step 5110 | loss 1.5093 | lr 5.7e-04 | norm 1.1673 | dt 0.006 +type train | step 5120 | loss 1.3993 | lr 5.7e-04 | norm 1.2864 | dt 0.006 +type train | step 5130 | loss 1.4149 | lr 5.7e-04 | norm 1.0748 | dt 0.006 +type train | step 5140 | loss 1.3831 | lr 5.7e-04 | norm 1.1737 | dt 0.005 +type train | step 5150 | loss 1.3645 | lr 5.7e-04 | norm 0.9676 | dt 0.006 +type train | step 5160 | loss 1.3335 | lr 5.6e-04 | norm 1.2814 | dt 0.006 +type train | step 5170 | loss 1.4585 | lr 5.6e-04 | norm 1.0679 | dt 0.006 +type train | step 5180 | loss 1.4257 | lr 5.6e-04 | norm 1.3515 | dt 0.006 +type train | step 5190 | loss 1.4114 | lr 5.6e-04 | norm 1.1837 | dt 0.006 +type train | step 5200 | loss 1.3499 | lr 5.6e-04 | norm 1.2274 | dt 0.006 +type train | step 5210 | loss 1.4054 | lr 5.6e-04 | norm 1.5139 | dt 0.006 +type train | step 5220 | loss 1.4602 | lr 5.5e-04 | norm 1.3641 | dt 0.006 +type train | step 5230 | loss 1.5060 | lr 5.5e-04 | norm 1.2453 | dt 0.006 +type train | step 5240 | loss 1.4204 | lr 5.5e-04 | norm 1.2072 | dt 0.006 +type train | step 5250 | loss 1.4193 | lr 5.5e-04 | norm 1.3524 | dt 0.006 +type train | step 5260 | loss 1.3282 | lr 5.5e-04 | norm 1.1440 | dt 0.005 +type train | step 5270 | loss 1.4067 | lr 5.4e-04 | norm 1.2596 | dt 0.005 +type train | step 5280 | loss 1.4483 | lr 5.4e-04 | norm 1.4345 | dt 0.006 +type train | step 5290 | loss 1.4818 | lr 5.4e-04 | norm 1.1596 | dt 0.006 +type train | step 5300 | loss 1.3905 | lr 5.4e-04 | norm 1.3146 | dt 0.006 +type train | step 5310 | loss 1.4836 | lr 5.4e-04 | norm 1.1338 | dt 0.006 +type train | step 5320 | loss 1.3732 | lr 5.4e-04 | norm 1.4177 | dt 0.006 +type train | step 5330 | loss 1.3935 | lr 5.3e-04 | norm 1.7972 | dt 0.006 +type train | step 5340 | loss 1.4061 | lr 5.3e-04 | norm 1.1761 | dt 0.006 +type train | step 5350 | loss 1.3435 | lr 5.3e-04 | norm 1.0485 | dt 0.006 +type train | step 5360 | loss 1.4307 | lr 5.3e-04 | norm 1.2195 | dt 0.006 +type train | step 5370 | loss 1.4752 | lr 5.3e-04 | norm 1.1927 | dt 0.006 +type train | step 5380 | loss 1.3885 | lr 5.3e-04 | norm 1.1842 | dt 0.007 +type train | step 5390 | loss 1.4365 | lr 5.2e-04 | norm 1.4121 | dt 0.006 +type train | step 5400 | loss 1.4652 | lr 5.2e-04 | norm 1.2834 | dt 0.006 +type train | step 5410 | loss 1.3259 | lr 5.2e-04 | norm 1.0909 | dt 0.006 +type train | step 5420 | loss 1.4016 | lr 5.2e-04 | norm 1.2915 | dt 0.006 +type train | step 5430 | loss 1.4625 | lr 5.2e-04 | norm 1.3270 | dt 0.006 +type train | step 5440 | loss 1.3658 | lr 5.2e-04 | norm 1.1706 | dt 0.006 +type train | step 5450 | loss 1.4007 | lr 5.1e-04 | norm 1.1443 | dt 0.006 +type train | step 5460 | loss 1.4822 | lr 5.1e-04 | norm 1.1348 | dt 0.006 +type train | step 5470 | loss 1.3030 | lr 5.1e-04 | norm 1.0849 | dt 0.006 +type train | step 5480 | loss 1.4298 | lr 5.1e-04 | norm 1.1169 | dt 0.006 +type train | step 5490 | loss 1.4979 | lr 5.1e-04 | norm 1.2893 | dt 0.007 +type train | step 5500 | loss 1.4011 | lr 5.1e-04 | norm 1.1234 | dt 0.006 +type train | step 5510 | loss 1.4189 | lr 5.0e-04 | norm 1.4352 | dt 0.007 +type train | step 5520 | loss 1.4651 | lr 5.0e-04 | norm 1.4369 | dt 0.006 +type train | step 5530 | loss 1.2926 | lr 5.0e-04 | norm 1.0577 | dt 0.006 +type train | step 5540 | loss 1.4439 | lr 5.0e-04 | norm 1.4722 | dt 0.006 +type train | step 5550 | loss 1.5158 | lr 5.0e-04 | norm 1.2663 | dt 0.006 +type train | step 5560 | loss 1.4249 | lr 4.9e-04 | norm 1.1379 | dt 0.006 +type train | step 5570 | loss 1.4182 | lr 4.9e-04 | norm 1.7736 | dt 0.006 +type train | step 5580 | loss 1.5048 | lr 4.9e-04 | norm 1.2781 | dt 0.006 +type train | step 5590 | loss 1.4015 | lr 4.9e-04 | norm 1.1439 | dt 0.006 +type train | step 5600 | loss 1.5315 | lr 4.9e-04 | norm 1.3531 | dt 0.006 +type train | step 5610 | loss 1.3429 | lr 4.9e-04 | norm 1.1510 | dt 0.006 +type train | step 5620 | loss 1.3799 | lr 4.8e-04 | norm 1.2747 | dt 0.006 +type train | step 5630 | loss 1.2834 | lr 4.8e-04 | norm 1.1381 | dt 0.006 +type train | step 5640 | loss 1.4548 | lr 4.8e-04 | norm 1.4611 | dt 0.006 +type train | step 5650 | loss 1.3876 | lr 4.8e-04 | norm 1.3399 | dt 0.006 +type train | step 5660 | loss 1.5489 | lr 4.8e-04 | norm 1.3822 | dt 0.006 +type train | step 5670 | loss 1.3524 | lr 4.8e-04 | norm 1.2558 | dt 0.006 +type train | step 5680 | loss 1.3594 | lr 4.7e-04 | norm 1.2413 | dt 0.006 +type train | step 5690 | loss 1.3908 | lr 4.7e-04 | norm 1.3995 | dt 0.006 +type train | step 5700 | loss 1.3680 | lr 4.7e-04 | norm 1.3912 | dt 0.006 +type train | step 5710 | loss 1.4391 | lr 4.7e-04 | norm 1.2764 | dt 0.006 +type train | step 5720 | loss 1.4825 | lr 4.7e-04 | norm 1.1704 | dt 0.006 +type train | step 5730 | loss 1.3772 | lr 4.7e-04 | norm 1.4872 | dt 0.006 +type train | step 5740 | loss 1.3943 | lr 4.6e-04 | norm 1.1692 | dt 0.006 +type train | step 5750 | loss 1.3719 | lr 4.6e-04 | norm 1.3081 | dt 0.006 +type train | step 5760 | loss 1.3559 | lr 4.6e-04 | norm 1.2076 | dt 0.005 +type train | step 5770 | loss 1.3219 | lr 4.6e-04 | norm 1.4468 | dt 0.006 +type train | step 5780 | loss 1.4418 | lr 4.6e-04 | norm 1.3076 | dt 0.006 +type train | step 5790 | loss 1.4051 | lr 4.5e-04 | norm 1.3934 | dt 0.006 +type train | step 5800 | loss 1.3909 | lr 4.5e-04 | norm 1.1776 | dt 0.006 +type train | step 5810 | loss 1.3308 | lr 4.5e-04 | norm 1.1509 | dt 0.006 +type train | step 5820 | loss 1.3897 | lr 4.5e-04 | norm 1.2424 | dt 0.006 +type train | step 5830 | loss 1.4327 | lr 4.5e-04 | norm 1.2826 | dt 0.006 +type train | step 5840 | loss 1.4858 | lr 4.5e-04 | norm 1.2111 | dt 0.006 +type train | step 5850 | loss 1.3933 | lr 4.4e-04 | norm 1.1720 | dt 0.006 +type train | step 5860 | loss 1.3934 | lr 4.4e-04 | norm 1.1184 | dt 0.006 +type train | step 5870 | loss 1.3145 | lr 4.4e-04 | norm 1.2699 | dt 0.006 +type train | step 5880 | loss 1.3989 | lr 4.4e-04 | norm 1.7616 | dt 0.006 +type train | step 5890 | loss 1.4201 | lr 4.4e-04 | norm 1.2169 | dt 0.006 +type train | step 5900 | loss 1.4622 | lr 4.4e-04 | norm 1.2906 | dt 0.006 +type train | step 5910 | loss 1.3718 | lr 4.3e-04 | norm 1.4738 | dt 0.006 +type train | step 5920 | loss 1.4575 | lr 4.3e-04 | norm 1.1344 | dt 0.006 +type train | step 5930 | loss 1.3579 | lr 4.3e-04 | norm 1.4434 | dt 0.006 +type train | step 5940 | loss 1.3710 | lr 4.3e-04 | norm 1.2766 | dt 0.006 +type train | step 5950 | loss 1.3842 | lr 4.3e-04 | norm 1.0542 | dt 0.006 +type train | step 5960 | loss 1.3305 | lr 4.3e-04 | norm 1.2088 | dt 0.006 +type train | step 5970 | loss 1.4129 | lr 4.2e-04 | norm 1.2024 | dt 0.006 +type train | step 5980 | loss 1.4505 | lr 4.2e-04 | norm 1.2330 | dt 0.006 +type train | step 5990 | loss 1.3698 | lr 4.2e-04 | norm 1.2680 | dt 0.006 +type train | step 6000 | loss 1.4165 | lr 4.2e-04 | norm 1.3425 | dt 0.006 +type train | step 6010 | loss 1.4494 | lr 4.2e-04 | norm 1.4167 | dt 0.005 +type train | step 6020 | loss 1.3076 | lr 4.2e-04 | norm 1.0905 | dt 0.005 +type train | step 6030 | loss 1.3838 | lr 4.1e-04 | norm 1.1491 | dt 0.005 +type train | step 6040 | loss 1.4399 | lr 4.1e-04 | norm 1.3658 | dt 0.005 +type train | step 6050 | loss 1.3507 | lr 4.1e-04 | norm 1.3194 | dt 0.005 +type train | step 6060 | loss 1.4033 | lr 4.1e-04 | norm 1.9490 | dt 0.005 +type train | step 6070 | loss 1.4650 | lr 4.1e-04 | norm 1.3818 | dt 0.005 +type train | step 6080 | loss 1.2830 | lr 4.1e-04 | norm 1.0503 | dt 0.005 +type train | step 6090 | loss 1.4139 | lr 4.0e-04 | norm 1.1583 | dt 0.005 +type train | step 6100 | loss 1.4773 | lr 4.0e-04 | norm 1.3269 | dt 0.007 +type train | step 6110 | loss 1.3855 | lr 4.0e-04 | norm 1.3260 | dt 0.006 +type train | step 6120 | loss 1.4093 | lr 4.0e-04 | norm 1.5901 | dt 0.006 +type train | step 6130 | loss 1.4447 | lr 4.0e-04 | norm 1.1014 | dt 0.006 +type train | step 6140 | loss 1.2862 | lr 4.0e-04 | norm 1.2533 | dt 0.006 +type train | step 6150 | loss 1.4241 | lr 3.9e-04 | norm 1.2893 | dt 0.006 +type train | step 6160 | loss 1.4926 | lr 3.9e-04 | norm 1.3684 | dt 0.006 +type train | step 6170 | loss 1.4114 | lr 3.9e-04 | norm 1.3962 | dt 0.006 +type train | step 6180 | loss 1.4016 | lr 3.9e-04 | norm 1.2975 | dt 0.006 +type train | step 6190 | loss 1.4849 | lr 3.9e-04 | norm 1.4270 | dt 0.006 +type train | step 6200 | loss 1.3881 | lr 3.9e-04 | norm 1.1177 | dt 0.006 +type train | step 6210 | loss 1.5091 | lr 3.8e-04 | norm 1.3004 | dt 0.006 +type train | step 6220 | loss 1.3256 | lr 3.8e-04 | norm 1.2616 | dt 0.006 +type train | step 6230 | loss 1.3655 | lr 3.8e-04 | norm 1.3581 | dt 0.006 +type train | step 6240 | loss 1.2871 | lr 3.8e-04 | norm 1.8082 | dt 0.006 +type train | step 6250 | loss 1.4414 | lr 3.8e-04 | norm 1.2613 | dt 0.006 +type train | step 6260 | loss 1.3719 | lr 3.8e-04 | norm 1.1612 | dt 0.005 +type train | step 6270 | loss 1.5292 | lr 3.7e-04 | norm 1.5722 | dt 0.006 +type train | step 6280 | loss 1.3233 | lr 3.7e-04 | norm 1.0434 | dt 0.006 +type train | step 6290 | loss 1.3471 | lr 3.7e-04 | norm 1.3396 | dt 0.006 +type train | step 6300 | loss 1.3784 | lr 3.7e-04 | norm 1.3784 | dt 0.006 +type train | step 6310 | loss 1.3447 | lr 3.7e-04 | norm 1.0226 | dt 0.006 +type train | step 6320 | loss 1.4255 | lr 3.7e-04 | norm 1.4351 | dt 0.006 +type train | step 6330 | loss 1.4665 | lr 3.6e-04 | norm 1.2710 | dt 0.006 +type train | step 6340 | loss 1.3572 | lr 3.6e-04 | norm 1.4295 | dt 0.006 +type train | step 6350 | loss 1.3718 | lr 3.6e-04 | norm 1.1524 | dt 0.006 +type train | step 6360 | loss 1.3554 | lr 3.6e-04 | norm 1.2629 | dt 0.006 +type train | step 6370 | loss 1.3346 | lr 3.6e-04 | norm 1.0546 | dt 0.006 +type train | step 6380 | loss 1.3003 | lr 3.6e-04 | norm 1.3817 | dt 0.005 +type train | step 6390 | loss 1.4229 | lr 3.5e-04 | norm 1.2486 | dt 0.006 +type train | step 6400 | loss 1.3826 | lr 3.5e-04 | norm 1.2484 | dt 0.007 +type train | step 6410 | loss 1.3712 | lr 3.5e-04 | norm 1.2217 | dt 0.007 +type train | step 6420 | loss 1.3194 | lr 3.5e-04 | norm 1.4159 | dt 0.008 +type train | step 6430 | loss 1.3625 | lr 3.5e-04 | norm 1.0778 | dt 0.006 +type train | step 6440 | loss 1.4113 | lr 3.5e-04 | norm 1.1170 | dt 0.006 +type train | step 6450 | loss 1.4628 | lr 3.4e-04 | norm 1.3326 | dt 0.008 +type train | step 6460 | loss 1.3795 | lr 3.4e-04 | norm 1.2144 | dt 0.006 +type train | step 6470 | loss 1.3801 | lr 3.4e-04 | norm 1.2747 | dt 0.007 +type train | step 6480 | loss 1.3037 | lr 3.4e-04 | norm 1.5298 | dt 0.006 +type train | step 6490 | loss 1.3736 | lr 3.4e-04 | norm 1.1716 | dt 0.006 +type train | step 6500 | loss 1.4007 | lr 3.4e-04 | norm 1.2123 | dt 0.006 +type train | step 6510 | loss 1.4405 | lr 3.3e-04 | norm 1.2931 | dt 0.005 +type train | step 6520 | loss 1.3512 | lr 3.3e-04 | norm 1.2106 | dt 0.005 +type train | step 6530 | loss 1.4442 | lr 3.3e-04 | norm 1.3668 | dt 0.006 +type train | step 6540 | loss 1.3416 | lr 3.3e-04 | norm 1.4654 | dt 0.006 +type train | step 6550 | loss 1.3577 | lr 3.3e-04 | norm 1.5893 | dt 0.006 +type train | step 6560 | loss 1.3762 | lr 3.3e-04 | norm 1.1275 | dt 0.005 +type train | step 6570 | loss 1.3095 | lr 3.2e-04 | norm 1.1761 | dt 0.005 +type train | step 6580 | loss 1.3887 | lr 3.2e-04 | norm 1.1609 | dt 0.005 +type train | step 6590 | loss 1.4262 | lr 3.2e-04 | norm 1.2162 | dt 0.006 +type train | step 6600 | loss 1.3490 | lr 3.2e-04 | norm 1.1730 | dt 0.006 +type train | step 6610 | loss 1.4112 | lr 3.2e-04 | norm 1.6423 | dt 0.006 +type train | step 6620 | loss 1.4242 | lr 3.2e-04 | norm 1.0960 | dt 0.007 +type train | step 6630 | loss 1.2974 | lr 3.1e-04 | norm 1.1652 | dt 0.006 +type train | step 6640 | loss 1.3692 | lr 3.1e-04 | norm 1.2189 | dt 0.006 +type train | step 6650 | loss 1.4151 | lr 3.1e-04 | norm 1.2559 | dt 0.006 +type train | step 6660 | loss 1.3371 | lr 3.1e-04 | norm 1.4284 | dt 0.005 +type train | step 6670 | loss 1.3886 | lr 3.1e-04 | norm 1.5917 | dt 0.006 +type train | step 6680 | loss 1.4441 | lr 3.1e-04 | norm 1.3419 | dt 0.006 +type train | step 6690 | loss 1.2701 | lr 3.1e-04 | norm 1.0525 | dt 0.006 +type train | step 6700 | loss 1.3983 | lr 3.0e-04 | norm 1.2271 | dt 0.006 +type train | step 6710 | loss 1.4560 | lr 3.0e-04 | norm 1.4333 | dt 0.013 +type train | step 6720 | loss 1.3701 | lr 3.0e-04 | norm 1.4087 | dt 0.007 +type train | step 6730 | loss 1.3810 | lr 3.0e-04 | norm 1.2246 | dt 0.007 +type train | step 6740 | loss 1.4287 | lr 3.0e-04 | norm 1.2283 | dt 0.008 +type train | step 6750 | loss 1.2735 | lr 3.0e-04 | norm 1.1754 | dt 0.006 +type train | step 6760 | loss 1.4110 | lr 2.9e-04 | norm 1.2458 | dt 0.005 +type train | step 6770 | loss 1.4755 | lr 2.9e-04 | norm 1.4882 | dt 0.006 +type train | step 6780 | loss 1.3983 | lr 2.9e-04 | norm 1.2906 | dt 0.006 +type train | step 6790 | loss 1.3826 | lr 2.9e-04 | norm 1.6432 | dt 0.005 +type train | step 6800 | loss 1.4688 | lr 2.9e-04 | norm 1.2201 | dt 0.006 +type train | step 6810 | loss 1.3725 | lr 2.9e-04 | norm 1.0873 | dt 0.006 +type train | step 6820 | loss 1.4919 | lr 2.8e-04 | norm 1.1778 | dt 0.006 +type train | step 6830 | loss 1.3094 | lr 2.8e-04 | norm 1.1814 | dt 0.006 +type train | step 6840 | loss 1.3472 | lr 2.8e-04 | norm 1.1081 | dt 0.005 +type train | step 6850 | loss 1.2787 | lr 2.8e-04 | norm 1.6212 | dt 0.005 +type train | step 6860 | loss 1.4166 | lr 2.8e-04 | norm 1.2651 | dt 0.005 +type train | step 6870 | loss 1.3551 | lr 2.8e-04 | norm 1.1171 | dt 0.005 +type train | step 6880 | loss 1.5137 | lr 2.8e-04 | norm 1.4785 | dt 0.005 +type train | step 6890 | loss 1.3040 | lr 2.7e-04 | norm 1.0241 | dt 0.005 +type train | step 6900 | loss 1.3320 | lr 2.7e-04 | norm 1.3209 | dt 0.005 +type train | step 6910 | loss 1.3649 | lr 2.7e-04 | norm 1.3332 | dt 0.006 +type train | step 6920 | loss 1.3296 | lr 2.7e-04 | norm 1.1585 | dt 0.007 +type train | step 6930 | loss 1.3984 | lr 2.7e-04 | norm 1.1472 | dt 0.006 +type train | step 6940 | loss 1.4492 | lr 2.7e-04 | norm 1.2820 | dt 0.006 +type train | step 6950 | loss 1.3426 | lr 2.7e-04 | norm 1.2017 | dt 0.006 +type train | step 6960 | loss 1.3637 | lr 2.6e-04 | norm 1.2621 | dt 0.006 +type train | step 6970 | loss 1.3415 | lr 2.6e-04 | norm 1.2491 | dt 0.006 +type train | step 6980 | loss 1.3223 | lr 2.6e-04 | norm 1.0902 | dt 0.006 +type train | step 6990 | loss 1.2802 | lr 2.6e-04 | norm 1.3134 | dt 0.006 +type train | step 7000 | loss 1.4042 | lr 2.6e-04 | norm 1.2262 | dt 0.006 +type train | step 7010 | loss 1.3650 | lr 2.6e-04 | norm 1.1284 | dt 0.005 +type train | step 7020 | loss 1.3627 | lr 2.5e-04 | norm 1.2534 | dt 0.005 +type train | step 7030 | loss 1.3092 | lr 2.5e-04 | norm 1.5164 | dt 0.005 +type train | step 7040 | loss 1.3503 | lr 2.5e-04 | norm 1.1861 | dt 0.005 +type train | step 7050 | loss 1.4011 | lr 2.5e-04 | norm 1.2625 | dt 0.006 +type train | step 7060 | loss 1.4470 | lr 2.5e-04 | norm 1.3078 | dt 0.005 +type train | step 7070 | loss 1.3654 | lr 2.5e-04 | norm 1.1757 | dt 0.005 +type train | step 7080 | loss 1.3652 | lr 2.5e-04 | norm 1.2197 | dt 0.005 +type train | step 7090 | loss 1.2856 | lr 2.4e-04 | norm 1.3784 | dt 0.005 +type train | step 7100 | loss 1.3558 | lr 2.4e-04 | norm 1.1020 | dt 0.005 +type train | step 7110 | loss 1.3829 | lr 2.4e-04 | norm 1.0673 | dt 0.006 +type train | step 7120 | loss 1.4234 | lr 2.4e-04 | norm 1.2057 | dt 0.006 +type train | step 7130 | loss 1.3398 | lr 2.4e-04 | norm 1.2740 | dt 0.005 +type train | step 7140 | loss 1.4285 | lr 2.4e-04 | norm 1.3570 | dt 0.005 +type train | step 7150 | loss 1.3230 | lr 2.4e-04 | norm 1.2472 | dt 0.006 +type train | step 7160 | loss 1.3374 | lr 2.3e-04 | norm 1.2442 | dt 0.005 +type train | step 7170 | loss 1.3555 | lr 2.3e-04 | norm 1.0096 | dt 0.006 +type train | step 7180 | loss 1.2974 | lr 2.3e-04 | norm 1.0679 | dt 0.005 +type train | step 7190 | loss 1.3732 | lr 2.3e-04 | norm 1.1747 | dt 0.006 +type train | step 7200 | loss 1.4166 | lr 2.3e-04 | norm 1.5789 | dt 0.006 +type train | step 7210 | loss 1.3377 | lr 2.3e-04 | norm 1.5114 | dt 0.006 +type train | step 7220 | loss 1.3871 | lr 2.3e-04 | norm 1.1815 | dt 0.006 +type train | step 7230 | loss 1.4119 | lr 2.2e-04 | norm 1.1809 | dt 0.006 +type train | step 7240 | loss 1.2852 | lr 2.2e-04 | norm 1.0991 | dt 0.007 +type train | step 7250 | loss 1.3543 | lr 2.2e-04 | norm 1.1531 | dt 0.006 +type train | step 7260 | loss 1.4031 | lr 2.2e-04 | norm 1.4247 | dt 0.005 +type train | step 7270 | loss 1.3244 | lr 2.2e-04 | norm 1.5206 | dt 0.005 +type train | step 7280 | loss 1.3595 | lr 2.2e-04 | norm 1.1807 | dt 0.006 +type train | step 7290 | loss 1.4294 | lr 2.2e-04 | norm 1.2243 | dt 0.006 +type train | step 7300 | loss 1.2594 | lr 2.1e-04 | norm 1.1083 | dt 0.006 +type train | step 7310 | loss 1.3890 | lr 2.1e-04 | norm 1.2515 | dt 0.006 +type train | step 7320 | loss 1.4401 | lr 2.1e-04 | norm 1.4496 | dt 0.009 +type train | step 7330 | loss 1.3566 | lr 2.1e-04 | norm 1.2372 | dt 0.006 +type train | step 7340 | loss 1.3608 | lr 2.1e-04 | norm 1.1318 | dt 0.005 +type train | step 7350 | loss 1.4081 | lr 2.1e-04 | norm 1.0140 | dt 0.005 +type train | step 7360 | loss 1.2576 | lr 2.1e-04 | norm 1.1364 | dt 0.005 +type train | step 7370 | loss 1.3990 | lr 2.0e-04 | norm 1.2775 | dt 0.005 +type train | step 7380 | loss 1.4598 | lr 2.0e-04 | norm 1.5926 | dt 0.005 +type train | step 7390 | loss 1.3834 | lr 2.0e-04 | norm 1.2742 | dt 0.005 +type train | step 7400 | loss 1.3582 | lr 2.0e-04 | norm 1.2565 | dt 0.005 +type train | step 7410 | loss 1.4504 | lr 2.0e-04 | norm 1.1484 | dt 0.006 +type train | step 7420 | loss 1.3554 | lr 2.0e-04 | norm 1.0654 | dt 0.006 +type train | step 7430 | loss 1.4770 | lr 2.0e-04 | norm 1.2725 | dt 0.006 +type train | step 7440 | loss 1.2926 | lr 1.9e-04 | norm 1.1422 | dt 0.006 +type train | step 7450 | loss 1.3352 | lr 1.9e-04 | norm 1.2590 | dt 0.006 +type train | step 7460 | loss 1.2490 | lr 1.9e-04 | norm 1.2765 | dt 0.006 +type train | step 7470 | loss 1.3997 | lr 1.9e-04 | norm 1.0891 | dt 0.006 +type train | step 7480 | loss 1.3380 | lr 1.9e-04 | norm 1.1666 | dt 0.006 +type train | step 7490 | loss 1.4940 | lr 1.9e-04 | norm 1.3588 | dt 0.006 +type train | step 7500 | loss 1.2979 | lr 1.9e-04 | norm 1.2110 | dt 0.006 +type train | step 7510 | loss 1.3218 | lr 1.9e-04 | norm 1.3311 | dt 0.006 +type train | step 7520 | loss 1.3332 | lr 1.8e-04 | norm 1.0837 | dt 0.005 +type train | step 7530 | loss 1.3146 | lr 1.8e-04 | norm 1.0475 | dt 0.005 +type train | step 7540 | loss 1.3831 | lr 1.8e-04 | norm 1.2708 | dt 0.005 +type train | step 7550 | loss 1.4335 | lr 1.8e-04 | norm 1.2644 | dt 0.005 +type train | step 7560 | loss 1.3281 | lr 1.8e-04 | norm 1.1704 | dt 0.006 +type train | step 7570 | loss 1.3491 | lr 1.8e-04 | norm 1.0371 | dt 0.006 +type train | step 7580 | loss 1.3172 | lr 1.8e-04 | norm 1.1323 | dt 0.005 +type train | step 7590 | loss 1.3105 | lr 1.8e-04 | norm 1.0380 | dt 0.006 +type train | step 7600 | loss 1.2666 | lr 1.7e-04 | norm 1.4110 | dt 0.006 +type train | step 7610 | loss 1.3892 | lr 1.7e-04 | norm 1.2902 | dt 0.006 +type train | step 7620 | loss 1.3630 | lr 1.7e-04 | norm 1.2630 | dt 0.006 +type train | step 7630 | loss 1.3452 | lr 1.7e-04 | norm 1.2143 | dt 0.006 +type train | step 7640 | loss 1.2803 | lr 1.7e-04 | norm 1.1161 | dt 0.005 +type train | step 7650 | loss 1.3348 | lr 1.7e-04 | norm 1.0485 | dt 0.005 +type train | step 7660 | loss 1.3877 | lr 1.7e-04 | norm 1.2372 | dt 0.005 +type train | step 7670 | loss 1.4357 | lr 1.6e-04 | norm 1.3771 | dt 0.005 +type train | step 7680 | loss 1.3509 | lr 1.6e-04 | norm 1.1387 | dt 0.005 +type train | step 7690 | loss 1.3542 | lr 1.6e-04 | norm 1.3624 | dt 0.005 +type train | step 7700 | loss 1.2621 | lr 1.6e-04 | norm 1.0881 | dt 0.006 +type train | step 7710 | loss 1.3397 | lr 1.6e-04 | norm 1.0953 | dt 0.006 +type train | step 7720 | loss 1.3750 | lr 1.6e-04 | norm 1.1574 | dt 0.006 +type train | step 7730 | loss 1.4126 | lr 1.6e-04 | norm 1.2373 | dt 0.006 +type train | step 7740 | loss 1.3274 | lr 1.6e-04 | norm 1.2435 | dt 0.006 +type train | step 7750 | loss 1.4131 | lr 1.5e-04 | norm 1.3858 | dt 0.006 +type train | step 7760 | loss 1.3036 | lr 1.5e-04 | norm 1.1857 | dt 0.005 +type train | step 7770 | loss 1.3175 | lr 1.5e-04 | norm 1.0852 | dt 0.005 +type train | step 7780 | loss 1.3453 | lr 1.5e-04 | norm 1.0810 | dt 0.005 +type train | step 7790 | loss 1.2834 | lr 1.5e-04 | norm 0.9135 | dt 0.005 +type train | step 7800 | loss 1.3614 | lr 1.5e-04 | norm 1.1728 | dt 0.005 +type train | step 7810 | loss 1.3981 | lr 1.5e-04 | norm 1.2970 | dt 0.005 +type train | step 7820 | loss 1.3172 | lr 1.5e-04 | norm 1.2399 | dt 0.005 +type train | step 7830 | loss 1.3664 | lr 1.5e-04 | norm 1.1251 | dt 0.005 +type train | step 7840 | loss 1.3979 | lr 1.4e-04 | norm 1.1620 | dt 0.006 +type train | step 7850 | loss 1.2686 | lr 1.4e-04 | norm 1.0959 | dt 0.005 +type train | step 7860 | loss 1.3436 | lr 1.4e-04 | norm 1.1940 | dt 0.005 +type train | step 7870 | loss 1.3862 | lr 1.4e-04 | norm 1.2641 | dt 0.005 +type train | step 7880 | loss 1.3014 | lr 1.4e-04 | norm 1.2416 | dt 0.006 +type train | step 7890 | loss 1.3445 | lr 1.4e-04 | norm 1.2406 | dt 0.006 +type train | step 7900 | loss 1.4158 | lr 1.4e-04 | norm 1.2841 | dt 0.006 +type train | step 7910 | loss 1.2467 | lr 1.4e-04 | norm 1.2009 | dt 0.006 +type train | step 7920 | loss 1.3727 | lr 1.3e-04 | norm 1.1167 | dt 0.006 +type train | step 7930 | loss 1.4241 | lr 1.3e-04 | norm 1.3612 | dt 0.007 +type train | step 7940 | loss 1.3382 | lr 1.3e-04 | norm 1.1902 | dt 0.006 +type train | step 7950 | loss 1.3447 | lr 1.3e-04 | norm 1.0531 | dt 0.007 +type train | step 7960 | loss 1.3985 | lr 1.3e-04 | norm 1.1422 | dt 0.006 +type train | step 7970 | loss 1.2495 | lr 1.3e-04 | norm 1.3984 | dt 0.006 +type train | step 7980 | loss 1.3790 | lr 1.3e-04 | norm 1.1588 | dt 0.006 +type train | step 7990 | loss 1.4377 | lr 1.3e-04 | norm 1.3525 | dt 0.006 +type train | step 8000 | loss 1.3654 | lr 1.3e-04 | norm 1.1280 | dt 0.006 +type train | step 8010 | loss 1.3398 | lr 1.2e-04 | norm 1.2220 | dt 0.005 +type train | step 8020 | loss 1.4364 | lr 1.2e-04 | norm 1.1798 | dt 0.005 +type train | step 8030 | loss 1.3466 | lr 1.2e-04 | norm 1.1575 | dt 0.005 +type train | step 8040 | loss 1.4671 | lr 1.2e-04 | norm 1.3834 | dt 0.005 +type train | step 8050 | loss 1.2869 | lr 1.2e-04 | norm 1.1421 | dt 0.007 +type train | step 8060 | loss 1.3188 | lr 1.2e-04 | norm 1.0315 | dt 0.006 +type train | step 8070 | loss 1.2333 | lr 1.2e-04 | norm 1.2213 | dt 0.010 +type train | step 8080 | loss 1.3886 | lr 1.2e-04 | norm 1.1832 | dt 0.005 +type train | step 8090 | loss 1.3249 | lr 1.2e-04 | norm 1.2711 | dt 0.006 +type train | step 8100 | loss 1.4752 | lr 1.1e-04 | norm 1.3635 | dt 0.005 +type train | step 8110 | loss 1.2876 | lr 1.1e-04 | norm 1.2020 | dt 0.006 +type train | step 8120 | loss 1.3013 | lr 1.1e-04 | norm 1.0655 | dt 0.006 +type train | step 8130 | loss 1.3198 | lr 1.1e-04 | norm 1.0882 | dt 0.005 +type train | step 8140 | loss 1.3027 | lr 1.1e-04 | norm 1.0957 | dt 0.006 +type train | step 8150 | loss 1.3679 | lr 1.1e-04 | norm 1.2019 | dt 0.006 +type train | step 8160 | loss 1.4190 | lr 1.1e-04 | norm 1.3251 | dt 0.006 +type train | step 8170 | loss 1.3189 | lr 1.1e-04 | norm 1.2610 | dt 0.006 +type train | step 8180 | loss 1.3343 | lr 1.1e-04 | norm 1.1293 | dt 0.005 +type train | step 8190 | loss 1.3043 | lr 1.1e-04 | norm 1.1030 | dt 0.006 +type train | step 8200 | loss 1.3001 | lr 1.0e-04 | norm 1.0386 | dt 0.005 +type train | step 8210 | loss 1.2531 | lr 1.0e-04 | norm 1.3452 | dt 0.006 +type train | step 8220 | loss 1.3725 | lr 1.0e-04 | norm 1.2517 | dt 0.006 +type train | step 8230 | loss 1.3546 | lr 1.0e-04 | norm 1.3299 | dt 0.006 +type train | step 8240 | loss 1.3356 | lr 1.0e-04 | norm 1.3896 | dt 0.005 +type train | step 8250 | loss 1.2689 | lr 1.0e-04 | norm 1.2182 | dt 0.006 +type train | step 8260 | loss 1.3225 | lr 9.9e-05 | norm 1.0581 | dt 0.006 +type train | step 8270 | loss 1.3791 | lr 9.8e-05 | norm 1.2499 | dt 0.005 +type train | step 8280 | loss 1.4197 | lr 9.7e-05 | norm 1.3402 | dt 0.006 +type train | step 8290 | loss 1.3316 | lr 9.6e-05 | norm 0.9901 | dt 0.005 +type train | step 8300 | loss 1.3442 | lr 9.5e-05 | norm 1.3400 | dt 0.005 +type train | step 8310 | loss 1.2499 | lr 9.4e-05 | norm 1.1912 | dt 0.005 +type train | step 8320 | loss 1.3290 | lr 9.3e-05 | norm 1.1069 | dt 0.005 +type train | step 8330 | loss 1.3703 | lr 9.2e-05 | norm 1.2049 | dt 0.005 +type train | step 8340 | loss 1.3982 | lr 9.1e-05 | norm 1.1888 | dt 0.006 +type train | step 8350 | loss 1.3110 | lr 9.0e-05 | norm 1.2142 | dt 0.006 +type train | step 8360 | loss 1.3976 | lr 8.9e-05 | norm 1.1309 | dt 0.005 +type train | step 8370 | loss 1.2939 | lr 8.8e-05 | norm 1.3377 | dt 0.005 +type train | step 8380 | loss 1.3041 | lr 8.7e-05 | norm 1.0649 | dt 0.006 +type train | step 8390 | loss 1.3396 | lr 8.6e-05 | norm 1.1426 | dt 0.006 +type train | step 8400 | loss 1.2768 | lr 8.5e-05 | norm 1.1338 | dt 0.005 +type train | step 8410 | loss 1.3477 | lr 8.4e-05 | norm 1.1862 | dt 0.005 +type train | step 8420 | loss 1.3780 | lr 8.3e-05 | norm 1.1194 | dt 0.008 +type train | step 8430 | loss 1.3041 | lr 8.2e-05 | norm 1.1395 | dt 0.005 +type train | step 8440 | loss 1.3544 | lr 8.2e-05 | norm 1.1370 | dt 0.005 +type train | step 8450 | loss 1.3937 | lr 8.1e-05 | norm 1.3184 | dt 0.005 +type train | step 8460 | loss 1.2607 | lr 8.0e-05 | norm 1.1083 | dt 0.005 +type train | step 8470 | loss 1.3296 | lr 7.9e-05 | norm 1.0898 | dt 0.005 +type train | step 8480 | loss 1.3772 | lr 7.8e-05 | norm 1.4070 | dt 0.006 +type train | step 8490 | loss 1.2819 | lr 7.7e-05 | norm 1.1318 | dt 0.005 +type train | step 8500 | loss 1.3318 | lr 7.6e-05 | norm 1.1717 | dt 0.005 +type train | step 8510 | loss 1.4044 | lr 7.5e-05 | norm 1.3239 | dt 0.006 +type train | step 8520 | loss 1.2385 | lr 7.5e-05 | norm 1.1516 | dt 0.006 +type train | step 8530 | loss 1.3644 | lr 7.4e-05 | norm 1.2023 | dt 0.006 +type train | step 8540 | loss 1.4152 | lr 7.3e-05 | norm 1.4601 | dt 0.008 +type train | step 8550 | loss 1.3211 | lr 7.2e-05 | norm 1.0376 | dt 0.005 +type train | step 8560 | loss 1.3375 | lr 7.1e-05 | norm 1.1274 | dt 0.005 +type train | step 8570 | loss 1.3916 | lr 7.0e-05 | norm 1.2448 | dt 0.006 +type train | step 8580 | loss 1.2349 | lr 7.0e-05 | norm 1.1084 | dt 0.005 +type train | step 8590 | loss 1.3662 | lr 6.9e-05 | norm 1.1659 | dt 0.005 +type train | step 8600 | loss 1.4249 | lr 6.8e-05 | norm 1.3870 | dt 0.005 +type train | step 8610 | loss 1.3537 | lr 6.7e-05 | norm 1.0507 | dt 0.005 +type train | step 8620 | loss 1.3308 | lr 6.6e-05 | norm 1.2512 | dt 0.005 +type train | step 8630 | loss 1.4290 | lr 6.6e-05 | norm 1.2938 | dt 0.006 +type train | step 8640 | loss 1.3348 | lr 6.5e-05 | norm 1.0488 | dt 0.006 +type train | step 8650 | loss 1.4547 | lr 6.4e-05 | norm 1.2944 | dt 0.006 +type train | step 8660 | loss 1.2810 | lr 6.3e-05 | norm 1.0973 | dt 0.006 +type train | step 8670 | loss 1.3114 | lr 6.2e-05 | norm 1.1023 | dt 0.006 +type train | step 8680 | loss 1.2231 | lr 6.2e-05 | norm 1.2175 | dt 0.006 +type train | step 8690 | loss 1.3799 | lr 6.1e-05 | norm 1.2256 | dt 0.006 +type train | step 8700 | loss 1.3159 | lr 6.0e-05 | norm 1.2194 | dt 0.006 +type train | step 8710 | loss 1.4587 | lr 5.9e-05 | norm 1.2586 | dt 0.006 +type train | step 8720 | loss 1.2767 | lr 5.9e-05 | norm 1.0220 | dt 0.006 +type train | step 8730 | loss 1.2938 | lr 5.8e-05 | norm 1.1240 | dt 0.006 +type train | step 8740 | loss 1.3103 | lr 5.7e-05 | norm 1.0910 | dt 0.006 +type train | step 8750 | loss 1.2915 | lr 5.6e-05 | norm 0.9982 | dt 0.006 +type train | step 8760 | loss 1.3584 | lr 5.6e-05 | norm 1.1165 | dt 0.006 +type train | step 8770 | loss 1.4051 | lr 5.5e-05 | norm 1.2121 | dt 0.006 +type train | step 8780 | loss 1.3127 | lr 5.4e-05 | norm 1.2398 | dt 0.006 +type train | step 8790 | loss 1.3291 | lr 5.4e-05 | norm 1.1533 | dt 0.006 +type train | step 8800 | loss 1.2940 | lr 5.3e-05 | norm 1.0708 | dt 0.006 +type train | step 8810 | loss 1.2893 | lr 5.2e-05 | norm 0.9671 | dt 0.006 +type train | step 8820 | loss 1.2459 | lr 5.1e-05 | norm 1.2195 | dt 0.006 +type train | step 8830 | loss 1.3624 | lr 5.1e-05 | norm 1.1730 | dt 0.006 +type train | step 8840 | loss 1.3475 | lr 5.0e-05 | norm 1.3957 | dt 0.006 +type train | step 8850 | loss 1.3275 | lr 4.9e-05 | norm 1.2548 | dt 0.005 +type train | step 8860 | loss 1.2573 | lr 4.9e-05 | norm 1.1281 | dt 0.006 +type train | step 8870 | loss 1.3111 | lr 4.8e-05 | norm 0.9791 | dt 0.005 +type train | step 8880 | loss 1.3724 | lr 4.7e-05 | norm 1.1361 | dt 0.006 +type train | step 8890 | loss 1.4033 | lr 4.7e-05 | norm 1.1677 | dt 0.006 +type train | step 8900 | loss 1.3276 | lr 4.6e-05 | norm 1.1671 | dt 0.006 +type train | step 8910 | loss 1.3369 | lr 4.5e-05 | norm 1.1915 | dt 0.005 +type train | step 8920 | loss 1.2336 | lr 4.5e-05 | norm 0.9712 | dt 0.006 +type train | step 8930 | loss 1.3221 | lr 4.4e-05 | norm 1.1074 | dt 0.006 +type train | step 8940 | loss 1.3640 | lr 4.3e-05 | norm 1.1401 | dt 0.006 +type train | step 8950 | loss 1.3844 | lr 4.3e-05 | norm 1.0481 | dt 0.006 +type train | step 8960 | loss 1.3087 | lr 4.2e-05 | norm 1.3144 | dt 0.006 +type train | step 8970 | loss 1.3899 | lr 4.2e-05 | norm 1.1235 | dt 0.005 +type train | step 8980 | loss 1.2807 | lr 4.1e-05 | norm 1.2081 | dt 0.006 +type train | step 8990 | loss 1.2939 | lr 4.0e-05 | norm 1.0952 | dt 0.006 +type train | step 9000 | loss 1.3331 | lr 4.0e-05 | norm 1.1363 | dt 0.006 +type train | step 9010 | loss 1.2700 | lr 3.9e-05 | norm 1.0058 | dt 0.005 +type train | step 9020 | loss 1.3419 | lr 3.9e-05 | norm 1.1704 | dt 0.006 +type train | step 9030 | loss 1.3717 | lr 3.8e-05 | norm 1.2572 | dt 0.006 +type train | step 9040 | loss 1.2912 | lr 3.8e-05 | norm 1.2333 | dt 0.006 +type train | step 9050 | loss 1.3445 | lr 3.7e-05 | norm 1.0945 | dt 0.006 +type train | step 9060 | loss 1.3857 | lr 3.6e-05 | norm 1.2092 | dt 0.006 +type train | step 9070 | loss 1.2543 | lr 3.6e-05 | norm 1.0544 | dt 0.006 +type train | step 9080 | loss 1.3194 | lr 3.5e-05 | norm 1.0113 | dt 0.006 +type train | step 9090 | loss 1.3720 | lr 3.5e-05 | norm 1.3531 | dt 0.006 +type train | step 9100 | loss 1.2732 | lr 3.4e-05 | norm 1.2640 | dt 0.006 +type train | step 9110 | loss 1.3229 | lr 3.4e-05 | norm 1.1439 | dt 0.006 +type train | step 9120 | loss 1.3974 | lr 3.3e-05 | norm 1.3224 | dt 0.006 +type train | step 9130 | loss 1.2313 | lr 3.3e-05 | norm 1.0688 | dt 0.006 +type train | step 9140 | loss 1.3541 | lr 3.2e-05 | norm 1.0703 | dt 0.006 +type train | step 9150 | loss 1.4073 | lr 3.2e-05 | norm 1.2783 | dt 0.007 +type train | step 9160 | loss 1.3146 | lr 3.1e-05 | norm 1.1209 | dt 0.006 +type train | step 9170 | loss 1.3286 | lr 3.1e-05 | norm 1.0272 | dt 0.006 +type train | step 9180 | loss 1.3821 | lr 3.0e-05 | norm 1.1229 | dt 0.006 +type train | step 9190 | loss 1.2301 | lr 3.0e-05 | norm 1.1079 | dt 0.006 +type train | step 9200 | loss 1.3563 | lr 2.9e-05 | norm 1.1220 | dt 0.006 +type train | step 9210 | loss 1.4125 | lr 2.9e-05 | norm 1.1128 | dt 0.006 +type train | step 9220 | loss 1.3505 | lr 2.8e-05 | norm 1.1023 | dt 0.006 +type train | step 9230 | loss 1.3198 | lr 2.8e-05 | norm 1.2024 | dt 0.006 +type train | step 9240 | loss 1.4183 | lr 2.7e-05 | norm 1.1722 | dt 0.006 +type train | step 9250 | loss 1.3288 | lr 2.7e-05 | norm 0.9913 | dt 0.006 +type train | step 9260 | loss 1.4440 | lr 2.6e-05 | norm 1.2402 | dt 0.006 +type train | step 9270 | loss 1.2742 | lr 2.6e-05 | norm 1.0849 | dt 0.006 +type train | step 9280 | loss 1.3081 | lr 2.6e-05 | norm 1.0909 | dt 0.005 +type train | step 9290 | loss 1.2160 | lr 2.5e-05 | norm 1.2487 | dt 0.005 +type train | step 9300 | loss 1.3690 | lr 2.5e-05 | norm 1.1335 | dt 0.006 +type train | step 9310 | loss 1.3088 | lr 2.4e-05 | norm 1.1671 | dt 0.006 +type train | step 9320 | loss 1.4493 | lr 2.4e-05 | norm 1.2496 | dt 0.006 +type train | step 9330 | loss 1.2707 | lr 2.3e-05 | norm 1.0538 | dt 0.006 +type train | step 9340 | loss 1.2885 | lr 2.3e-05 | norm 1.1079 | dt 0.006 +type train | step 9350 | loss 1.3033 | lr 2.3e-05 | norm 1.1474 | dt 0.006 +type train | step 9360 | loss 1.2863 | lr 2.2e-05 | norm 1.0398 | dt 0.006 +type train | step 9370 | loss 1.3486 | lr 2.2e-05 | norm 1.0088 | dt 0.006 +type train | step 9380 | loss 1.3969 | lr 2.2e-05 | norm 1.1630 | dt 0.006 +type train | step 9390 | loss 1.3104 | lr 2.1e-05 | norm 1.2994 | dt 0.006 +type train | step 9400 | loss 1.3217 | lr 2.1e-05 | norm 1.0762 | dt 0.006 +type train | step 9410 | loss 1.2850 | lr 2.0e-05 | norm 1.0002 | dt 0.006 +type train | step 9420 | loss 1.2855 | lr 2.0e-05 | norm 1.0228 | dt 0.006 +type train | step 9430 | loss 1.2346 | lr 2.0e-05 | norm 1.0930 | dt 0.006 +type train | step 9440 | loss 1.3525 | lr 1.9e-05 | norm 1.0874 | dt 0.006 +type train | step 9450 | loss 1.3429 | lr 1.9e-05 | norm 1.3344 | dt 0.006 +type train | step 9460 | loss 1.3200 | lr 1.9e-05 | norm 1.2000 | dt 0.005 +type train | step 9470 | loss 1.2476 | lr 1.8e-05 | norm 1.0420 | dt 0.006 +type train | step 9480 | loss 1.3070 | lr 1.8e-05 | norm 1.0189 | dt 0.006 +type train | step 9490 | loss 1.3657 | lr 1.8e-05 | norm 1.0911 | dt 0.006 +type train | step 9500 | loss 1.3942 | lr 1.8e-05 | norm 1.1059 | dt 0.006 +type train | step 9510 | loss 1.3233 | lr 1.7e-05 | norm 1.0929 | dt 0.005 +type train | step 9520 | loss 1.3305 | lr 1.7e-05 | norm 1.1361 | dt 0.006 +type train | step 9530 | loss 1.2257 | lr 1.7e-05 | norm 0.9570 | dt 0.006 +type train | step 9540 | loss 1.3166 | lr 1.6e-05 | norm 1.0592 | dt 0.006 +type train | step 9550 | loss 1.3578 | lr 1.6e-05 | norm 1.1693 | dt 0.006 +type train | step 9560 | loss 1.3770 | lr 1.6e-05 | norm 1.0546 | dt 0.006 +type train | step 9570 | loss 1.3018 | lr 1.6e-05 | norm 1.1734 | dt 0.006 +type train | step 9580 | loss 1.3833 | lr 1.5e-05 | norm 1.0599 | dt 0.006 +type train | step 9590 | loss 1.2726 | lr 1.5e-05 | norm 1.1058 | dt 0.006 +type train | step 9600 | loss 1.2874 | lr 1.5e-05 | norm 0.9856 | dt 0.006 +type train | step 9610 | loss 1.3289 | lr 1.5e-05 | norm 1.1997 | dt 0.006 +type train | step 9620 | loss 1.2674 | lr 1.4e-05 | norm 1.0796 | dt 0.006 +type train | step 9630 | loss 1.3350 | lr 1.4e-05 | norm 1.0760 | dt 0.006 +type train | step 9640 | loss 1.3639 | lr 1.4e-05 | norm 1.1015 | dt 0.006 +type train | step 9650 | loss 1.2835 | lr 1.4e-05 | norm 1.0716 | dt 0.006 +type train | step 9660 | loss 1.3389 | lr 1.3e-05 | norm 1.0971 | dt 0.006 +type train | step 9670 | loss 1.3808 | lr 1.3e-05 | norm 1.2415 | dt 0.005 +type train | step 9680 | loss 1.2495 | lr 1.3e-05 | norm 1.0506 | dt 0.006 +type train | step 9690 | loss 1.3138 | lr 1.3e-05 | norm 1.0013 | dt 0.006 +type train | step 9700 | loss 1.3650 | lr 1.3e-05 | norm 1.2282 | dt 0.006 +type train | step 9710 | loss 1.2642 | lr 1.3e-05 | norm 1.0923 | dt 0.006 +type train | step 9720 | loss 1.3166 | lr 1.2e-05 | norm 1.1333 | dt 0.006 +type train | step 9730 | loss 1.3906 | lr 1.2e-05 | norm 1.4164 | dt 0.007 +type train | step 9740 | loss 1.2256 | lr 1.2e-05 | norm 1.0167 | dt 0.006 +type train | step 9750 | loss 1.3484 | lr 1.2e-05 | norm 1.0826 | dt 0.006 +type train | step 9760 | loss 1.4007 | lr 1.2e-05 | norm 1.2330 | dt 0.007 +type train | step 9770 | loss 1.3079 | lr 1.2e-05 | norm 1.0479 | dt 0.008 +type train | step 9780 | loss 1.3236 | lr 1.1e-05 | norm 1.1187 | dt 0.009 +type train | step 9790 | loss 1.3796 | lr 1.1e-05 | norm 1.1654 | dt 0.009 +type train | step 9800 | loss 1.2236 | lr 1.1e-05 | norm 1.0258 | dt 0.007 +type train | step 9810 | loss 1.3498 | lr 1.1e-05 | norm 1.0425 | dt 0.009 +type train | step 9820 | loss 1.4064 | lr 1.1e-05 | norm 1.0744 | dt 0.009 +type train | step 9830 | loss 1.3471 | lr 1.1e-05 | norm 1.1142 | dt 0.009 +type train | step 9840 | loss 1.3141 | lr 1.1e-05 | norm 1.2162 | dt 0.010 +type train | step 9850 | loss 1.4129 | lr 1.1e-05 | norm 1.1115 | dt 0.009 +type train | step 9860 | loss 1.3250 | lr 1.1e-05 | norm 0.9743 | dt 0.009 +type train | step 9870 | loss 1.4372 | lr 1.1e-05 | norm 1.1451 | dt 0.008 +type train | step 9880 | loss 1.2705 | lr 1.0e-05 | norm 1.0792 | dt 0.009 +type train | step 9890 | loss 1.3066 | lr 1.0e-05 | norm 1.1156 | dt 0.007 +type train | step 9900 | loss 1.2108 | lr 1.0e-05 | norm 1.2529 | dt 0.009 +type train | step 9910 | loss 1.3643 | lr 1.0e-05 | norm 1.0495 | dt 0.009 +type train | step 9920 | loss 1.3042 | lr 1.0e-05 | norm 1.0168 | dt 0.009 +type train | step 9930 | loss 1.4422 | lr 1.0e-05 | norm 1.1105 | dt 0.009 +type train | step 9940 | loss 1.2680 | lr 1.0e-05 | norm 1.0738 | dt 0.009 +type train | step 9950 | loss 1.2861 | lr 1.0e-05 | norm 1.1093 | dt 0.006 +type train | step 9960 | loss 1.2977 | lr 1.0e-05 | norm 1.1067 | dt 0.006 +type train | step 9970 | loss 1.2839 | lr 1.0e-05 | norm 0.9874 | dt 0.006 +type train | step 9980 | loss 1.3455 | lr 1.0e-05 | norm 1.1134 | dt 0.007 +type train | step 9990 | loss 1.3917 | lr 1.0e-05 | norm 1.0859 | dt 0.007 +type train | step 10000 | loss 1.3084 | lr 1.0e-05 | norm 1.3428 | dt 0.009 +type train | step 10 | loss 4.8393 | lr 1.0e-05 | norm 3.6649 | dt 0.006 +type train | step 20 | loss 4.7731 | lr 2.0e-05 | norm 3.7735 | dt 0.006 +type train | step 30 | loss 4.6646 | lr 3.0e-05 | norm 2.7793 | dt 0.006 +type train | step 40 | loss 4.5917 | lr 4.0e-05 | norm 1.7059 | dt 0.005 +type train | step 50 | loss 4.5093 | lr 5.0e-05 | norm 1.6290 | dt 0.006 +type train | step 60 | loss 4.4555 | lr 6.0e-05 | norm 1.6327 | dt 0.006 +type train | step 70 | loss 4.3992 | lr 7.0e-05 | norm 1.6006 | dt 0.006 +type train | step 80 | loss 4.3477 | lr 8.0e-05 | norm 1.4920 | dt 0.006 +type train | step 90 | loss 4.2586 | lr 9.0e-05 | norm 1.5167 | dt 0.006 +type train | step 100 | loss 4.1610 | lr 1.0e-04 | norm 1.5364 | dt 0.006 +type train | step 110 | loss 4.0970 | lr 1.1e-04 | norm 1.4694 | dt 0.006 +type train | step 120 | loss 4.0326 | lr 1.2e-04 | norm 1.3805 | dt 0.006 +type train | step 130 | loss 3.9329 | lr 1.3e-04 | norm 1.3548 | dt 0.005 +type train | step 140 | loss 3.9007 | lr 1.4e-04 | norm 1.3265 | dt 0.005 +type train | step 150 | loss 3.7808 | lr 1.5e-04 | norm 1.3089 | dt 0.005 +type train | step 160 | loss 3.6781 | lr 1.6e-04 | norm 1.2634 | dt 0.006 +type train | step 170 | loss 3.5807 | lr 1.7e-04 | norm 1.2496 | dt 0.006 +type train | step 180 | loss 3.5500 | lr 1.8e-04 | norm 1.2376 | dt 0.005 +type train | step 190 | loss 3.4446 | lr 1.9e-04 | norm 1.3882 | dt 0.005 +type train | step 200 | loss 3.4344 | lr 2.0e-04 | norm 1.1198 | dt 0.007 +type train | step 210 | loss 3.3366 | lr 2.1e-04 | norm 1.0219 | dt 0.008 +type train | step 220 | loss 3.2332 | lr 2.2e-04 | norm 1.0567 | dt 0.006 +type train | step 230 | loss 3.1678 | lr 2.3e-04 | norm 0.8694 | dt 0.006 +type train | step 240 | loss 3.1653 | lr 2.4e-04 | norm 0.8789 | dt 0.006 +type train | step 250 | loss 3.0781 | lr 2.5e-04 | norm 0.9141 | dt 0.006 +type train | step 260 | loss 3.0306 | lr 2.6e-04 | norm 0.8140 | dt 0.005 +type train | step 270 | loss 2.9980 | lr 2.7e-04 | norm 0.8791 | dt 0.007 +type train | step 280 | loss 2.9570 | lr 2.8e-04 | norm 0.9748 | dt 0.007 +type train | step 290 | loss 2.8917 | lr 2.9e-04 | norm 0.8125 | dt 0.007 +type train | step 300 | loss 2.8938 | lr 3.0e-04 | norm 0.9107 | dt 0.009 +type train | step 310 | loss 2.8357 | lr 3.1e-04 | norm 0.9558 | dt 0.009 +type train | step 320 | loss 2.7950 | lr 3.2e-04 | norm 0.8596 | dt 0.006 +type train | step 330 | loss 2.7739 | lr 3.3e-04 | norm 1.5411 | dt 0.006 +type train | step 340 | loss 2.7240 | lr 3.4e-04 | norm 1.1688 | dt 0.007 +type train | step 350 | loss 2.7273 | lr 3.5e-04 | norm 1.4085 | dt 0.009 +type train | step 360 | loss 2.6759 | lr 3.6e-04 | norm 1.4021 | dt 0.009 +type train | step 370 | loss 2.6503 | lr 3.7e-04 | norm 1.3121 | dt 0.007 +type train | step 380 | loss 2.6752 | lr 3.8e-04 | norm 1.4314 | dt 0.009 +type train | step 390 | loss 2.6205 | lr 3.9e-04 | norm 1.1058 | dt 0.007 +type train | step 400 | loss 2.6084 | lr 4.0e-04 | norm 1.5072 | dt 0.008 +type train | step 410 | loss 2.6036 | lr 4.1e-04 | norm 1.0364 | dt 0.009 +type train | step 420 | loss 2.5803 | lr 4.2e-04 | norm 1.8896 | dt 0.008 +type train | step 430 | loss 2.5751 | lr 4.3e-04 | norm 1.5226 | dt 0.009 +type train | step 440 | loss 2.5491 | lr 4.4e-04 | norm 1.2587 | dt 0.009 +type train | step 450 | loss 2.5462 | lr 4.5e-04 | norm 1.1710 | dt 0.009 +type train | step 460 | loss 2.5126 | lr 4.6e-04 | norm 2.7615 | dt 0.007 +type train | step 470 | loss 2.5308 | lr 4.7e-04 | norm 2.2896 | dt 0.008 +type train | step 480 | loss 2.5277 | lr 4.8e-04 | norm 2.1038 | dt 0.009 +type train | step 490 | loss 2.5050 | lr 4.9e-04 | norm 0.7960 | dt 0.009 +type train | step 500 | loss 2.4927 | lr 5.0e-04 | norm 0.9504 | dt 0.008 +type train | step 510 | loss 2.4984 | lr 5.1e-04 | norm 1.0880 | dt 0.005 +type train | step 520 | loss 2.5207 | lr 5.2e-04 | norm 1.1958 | dt 0.007 +type train | step 530 | loss 2.4749 | lr 5.3e-04 | norm 1.7377 | dt 0.007 +type train | step 540 | loss 2.4670 | lr 5.4e-04 | norm 1.8031 | dt 0.007 +type train | step 550 | loss 2.4749 | lr 5.5e-04 | norm 1.3900 | dt 0.009 +type train | step 560 | loss 2.4747 | lr 5.6e-04 | norm 2.2157 | dt 0.009 +type train | step 570 | loss 2.4655 | lr 5.7e-04 | norm 1.0657 | dt 0.008 +type train | step 580 | loss 2.4481 | lr 5.8e-04 | norm 1.5157 | dt 0.007 +type train | step 590 | loss 2.4195 | lr 5.9e-04 | norm 2.0334 | dt 0.007 +type train | step 600 | loss 2.4169 | lr 6.0e-04 | norm 1.0105 | dt 0.007 +type train | step 610 | loss 2.4297 | lr 6.1e-04 | norm 0.9789 | dt 0.010 +type train | step 620 | loss 2.4214 | lr 6.2e-04 | norm 2.2353 | dt 0.007 +type train | step 630 | loss 2.4109 | lr 6.3e-04 | norm 1.2381 | dt 0.007 +type train | step 640 | loss 2.3700 | lr 6.4e-04 | norm 1.3986 | dt 0.007 +type train | step 650 | loss 2.3918 | lr 6.5e-04 | norm 1.2899 | dt 0.007 +type train | step 660 | loss 2.3789 | lr 6.6e-04 | norm 1.7303 | dt 0.008 +type train | step 670 | loss 2.3937 | lr 6.7e-04 | norm 1.3816 | dt 0.007 +type train | step 680 | loss 2.3634 | lr 6.8e-04 | norm 1.1278 | dt 0.009 +type train | step 690 | loss 2.4122 | lr 6.9e-04 | norm 1.4256 | dt 0.007 +type train | step 700 | loss 2.3741 | lr 7.0e-04 | norm 1.3116 | dt 0.007 +type train | step 710 | loss 2.2908 | lr 7.1e-04 | norm 1.3133 | dt 0.007 +type train | step 720 | loss 2.3482 | lr 7.2e-04 | norm 2.1003 | dt 0.009 +type train | step 730 | loss 2.3187 | lr 7.3e-04 | norm 1.1539 | dt 0.009 +type train | step 740 | loss 2.3080 | lr 7.4e-04 | norm 1.1723 | dt 0.008 +type train | step 750 | loss 2.3251 | lr 7.5e-04 | norm 1.1932 | dt 0.009 +type train | step 760 | loss 2.3255 | lr 7.6e-04 | norm 1.1939 | dt 0.005 +type train | step 770 | loss 2.2907 | lr 7.7e-04 | norm 1.5785 | dt 0.007 +type train | step 780 | loss 2.3443 | lr 7.8e-04 | norm 1.6662 | dt 0.006 +type train | step 790 | loss 2.3008 | lr 7.9e-04 | norm 1.2822 | dt 0.006 +type train | step 800 | loss 2.2821 | lr 8.0e-04 | norm 1.4196 | dt 0.007 +type train | step 810 | loss 2.3194 | lr 8.1e-04 | norm 1.5736 | dt 0.008 +type train | step 820 | loss 2.2506 | lr 8.2e-04 | norm 1.0866 | dt 0.007 +type train | step 830 | loss 2.2633 | lr 8.3e-04 | norm 1.1543 | dt 0.007 +type train | step 840 | loss 2.2960 | lr 8.4e-04 | norm 1.5077 | dt 0.007 +type train | step 850 | loss 2.2897 | lr 8.5e-04 | norm 1.6835 | dt 0.008 +type train | step 860 | loss 2.2097 | lr 8.6e-04 | norm 1.3157 | dt 0.007 +type train | step 870 | loss 2.2165 | lr 8.7e-04 | norm 1.3682 | dt 0.009 +type train | step 880 | loss 2.2004 | lr 8.8e-04 | norm 1.2751 | dt 0.006 +type train | step 890 | loss 2.2030 | lr 8.9e-04 | norm 1.5933 | dt 0.006 +type train | step 900 | loss 2.2417 | lr 9.0e-04 | norm 1.6154 | dt 0.007 +type train | step 910 | loss 2.2447 | lr 9.1e-04 | norm 2.2924 | dt 0.006 +type train | step 920 | loss 2.2047 | lr 9.2e-04 | norm 2.0040 | dt 0.007 +type train | step 930 | loss 2.1698 | lr 9.3e-04 | norm 1.8046 | dt 0.009 +type train | step 940 | loss 2.1891 | lr 9.4e-04 | norm 1.7798 | dt 0.007 +type train | step 950 | loss 2.1835 | lr 9.5e-04 | norm 1.0551 | dt 0.009 +type train | step 960 | loss 2.2292 | lr 9.6e-04 | norm 1.4108 | dt 0.009 +type train | step 970 | loss 2.1523 | lr 9.7e-04 | norm 1.9651 | dt 0.008 +type train | step 980 | loss 2.1466 | lr 9.8e-04 | norm 1.0533 | dt 0.006 +type train | step 990 | loss 2.1278 | lr 9.9e-04 | norm 1.4825 | dt 0.006 +type train | step 1000 | loss 2.1537 | lr 1.0e-03 | norm 1.8683 | dt 0.007 +type train | step 1010 | loss 2.1422 | lr 1.0e-03 | norm 1.4724 | dt 0.005 +type train | step 1020 | loss 2.1608 | lr 1.0e-03 | norm 1.6546 | dt 0.009 +type train | step 1030 | loss 2.1082 | lr 1.0e-03 | norm 1.2588 | dt 0.006 +type train | step 1040 | loss 2.1363 | lr 1.0e-03 | norm 1.3966 | dt 0.009 +type train | step 1050 | loss 2.0844 | lr 1.0e-03 | norm 1.5022 | dt 0.009 +type train | step 1060 | loss 2.1117 | lr 1.0e-03 | norm 1.4749 | dt 0.007 +type train | step 1070 | loss 2.0714 | lr 1.0e-03 | norm 1.5814 | dt 0.009 +type train | step 1080 | loss 2.0569 | lr 1.0e-03 | norm 1.5734 | dt 0.009 +type train | step 1090 | loss 2.1000 | lr 1.0e-03 | norm 1.6028 | dt 0.009 +type train | step 1100 | loss 2.0854 | lr 1.0e-03 | norm 1.7298 | dt 0.007 +type train | step 1110 | loss 2.0774 | lr 1.0e-03 | norm 1.2271 | dt 0.007 +type train | step 1120 | loss 2.0770 | lr 1.0e-03 | norm 1.6423 | dt 0.009 +type train | step 1130 | loss 2.1419 | lr 1.0e-03 | norm 1.3576 | dt 0.008 +type train | step 1140 | loss 2.0247 | lr 1.0e-03 | norm 1.7122 | dt 0.009 +type train | step 1150 | loss 2.0209 | lr 1.0e-03 | norm 1.3680 | dt 0.007 +type train | step 1160 | loss 2.0475 | lr 1.0e-03 | norm 1.2015 | dt 0.007 +type train | step 1170 | loss 2.0736 | lr 1.0e-03 | norm 1.9559 | dt 0.009 +type train | step 1180 | loss 2.0740 | lr 1.0e-03 | norm 1.9486 | dt 0.009 +type train | step 1190 | loss 2.0381 | lr 1.0e-03 | norm 1.3727 | dt 0.007 +type train | step 1200 | loss 1.9911 | lr 1.0e-03 | norm 2.3795 | dt 0.009 +type train | step 1210 | loss 2.0058 | lr 1.0e-03 | norm 1.6194 | dt 0.010 +type train | step 1220 | loss 2.0390 | lr 1.0e-03 | norm 1.3787 | dt 0.011 +type train | step 1230 | loss 2.0218 | lr 1.0e-03 | norm 1.9271 | dt 0.007 +type train | step 1240 | loss 2.0361 | lr 1.0e-03 | norm 1.7903 | dt 0.009 +type train | step 1250 | loss 1.9940 | lr 1.0e-03 | norm 1.8956 | dt 0.007 +type train | step 1260 | loss 1.9549 | lr 1.0e-03 | norm 2.1846 | dt 0.005 +type train | step 1270 | loss 1.9672 | lr 1.0e-03 | norm 1.4962 | dt 0.008 +type train | step 1280 | loss 2.0222 | lr 1.0e-03 | norm 1.6042 | dt 0.007 +type train | step 1290 | loss 1.9560 | lr 1.0e-03 | norm 1.4917 | dt 0.009 +type train | step 1300 | loss 2.0351 | lr 1.0e-03 | norm 2.0543 | dt 0.009 +type train | step 1310 | loss 2.0010 | lr 1.0e-03 | norm 1.5649 | dt 0.009 +type train | step 1320 | loss 1.9057 | lr 1.0e-03 | norm 1.5288 | dt 0.007 +type train | step 1330 | loss 1.9850 | lr 1.0e-03 | norm 1.5646 | dt 0.007 +type train | step 1340 | loss 1.8869 | lr 1.0e-03 | norm 1.7227 | dt 0.007 +type train | step 1350 | loss 1.8909 | lr 1.0e-03 | norm 1.7740 | dt 0.009 +type train | step 1360 | loss 1.8555 | lr 1.0e-03 | norm 2.5084 | dt 0.007 +type train | step 1370 | loss 1.9745 | lr 1.0e-03 | norm 1.7307 | dt 0.008 +type train | step 1380 | loss 1.8959 | lr 1.0e-03 | norm 1.8123 | dt 0.008 +type train | step 1390 | loss 1.9738 | lr 1.0e-03 | norm 1.7084 | dt 0.009 +type train | step 1400 | loss 1.8658 | lr 1.0e-03 | norm 1.6451 | dt 0.009 +type train | step 1410 | loss 1.8715 | lr 9.9e-04 | norm 1.9588 | dt 0.006 +type train | step 1420 | loss 1.9247 | lr 9.9e-04 | norm 1.8852 | dt 0.006 +type train | step 1430 | loss 1.8527 | lr 9.9e-04 | norm 1.7721 | dt 0.006 +type train | step 1440 | loss 1.9101 | lr 9.9e-04 | norm 2.3712 | dt 0.007 +type train | step 1450 | loss 1.9438 | lr 9.9e-04 | norm 1.5649 | dt 0.007 +type train | step 1460 | loss 1.8657 | lr 9.9e-04 | norm 1.6438 | dt 0.008 +type train | step 1470 | loss 1.8343 | lr 9.9e-04 | norm 1.5473 | dt 0.007 +type train | step 1480 | loss 1.8270 | lr 9.9e-04 | norm 1.4705 | dt 0.006 +type train | step 1490 | loss 1.8019 | lr 9.9e-04 | norm 1.3923 | dt 0.009 +type train | step 1500 | loss 1.8067 | lr 9.9e-04 | norm 1.6710 | dt 0.007 +type train | step 1510 | loss 1.8849 | lr 9.9e-04 | norm 1.6745 | dt 0.005 +type train | step 1520 | loss 1.8513 | lr 9.9e-04 | norm 2.1295 | dt 0.006 +type train | step 1530 | loss 1.8329 | lr 9.9e-04 | norm 1.5258 | dt 0.007 +type train | step 1540 | loss 1.8148 | lr 9.9e-04 | norm 1.7174 | dt 0.006 +type train | step 1550 | loss 1.8500 | lr 9.9e-04 | norm 1.8050 | dt 0.007 +type train | step 1560 | loss 1.8691 | lr 9.9e-04 | norm 1.8135 | dt 0.006 +type train | step 1570 | loss 1.8938 | lr 9.9e-04 | norm 1.4277 | dt 0.006 +type train | step 1580 | loss 1.7913 | lr 9.9e-04 | norm 1.4636 | dt 0.006 +type train | step 1590 | loss 1.7945 | lr 9.9e-04 | norm 1.5754 | dt 0.005 +type train | step 1600 | loss 1.7630 | lr 9.9e-04 | norm 1.8048 | dt 0.006 +type train | step 1610 | loss 1.8136 | lr 9.9e-04 | norm 1.5065 | dt 0.006 +type train | step 1620 | loss 1.8117 | lr 9.9e-04 | norm 1.7385 | dt 0.006 +type train | step 1630 | loss 1.8516 | lr 9.9e-04 | norm 1.8811 | dt 0.006 +type train | step 1640 | loss 1.7916 | lr 9.9e-04 | norm 1.8736 | dt 0.006 +type train | step 1650 | loss 1.8430 | lr 9.9e-04 | norm 1.4424 | dt 0.007 +type train | step 1660 | loss 1.7475 | lr 9.9e-04 | norm 1.4353 | dt 0.008 +type train | step 1670 | loss 1.7735 | lr 9.9e-04 | norm 1.3299 | dt 0.007 +type train | step 1680 | loss 1.7594 | lr 9.9e-04 | norm 1.6420 | dt 0.007 +type train | step 1690 | loss 1.7171 | lr 9.9e-04 | norm 1.5386 | dt 0.008 +type train | step 1700 | loss 1.8060 | lr 9.9e-04 | norm 1.8002 | dt 0.009 +type train | step 1710 | loss 1.8254 | lr 9.8e-04 | norm 1.5694 | dt 0.009 +type train | step 1720 | loss 1.7864 | lr 9.8e-04 | norm 1.5306 | dt 0.006 +type train | step 1730 | loss 1.8004 | lr 9.8e-04 | norm 1.6978 | dt 0.006 +type train | step 1740 | loss 1.8179 | lr 9.8e-04 | norm 1.4578 | dt 0.007 +type train | step 1750 | loss 1.7115 | lr 9.8e-04 | norm 1.8357 | dt 0.008 +type train | step 1760 | loss 1.7260 | lr 9.8e-04 | norm 1.9217 | dt 0.006 +type train | step 1770 | loss 1.8057 | lr 9.8e-04 | norm 1.6643 | dt 0.007 +type train | step 1780 | loss 1.7625 | lr 9.8e-04 | norm 2.1619 | dt 0.007 +type train | step 1790 | loss 1.7802 | lr 9.8e-04 | norm 1.5112 | dt 0.009 +type train | step 1800 | loss 1.7815 | lr 9.8e-04 | norm 1.2210 | dt 0.009 +type train | step 1810 | loss 1.6506 | lr 9.8e-04 | norm 1.5688 | dt 0.007 +type train | step 1820 | loss 1.7330 | lr 9.8e-04 | norm 1.6498 | dt 0.007 +type train | step 1830 | loss 1.8357 | lr 9.8e-04 | norm 1.5965 | dt 0.011 +type train | step 1840 | loss 1.7435 | lr 9.8e-04 | norm 1.7068 | dt 0.009 +type train | step 1850 | loss 1.7767 | lr 9.8e-04 | norm 1.6144 | dt 0.009 +type train | step 1860 | loss 1.7456 | lr 9.8e-04 | norm 1.8709 | dt 0.007 +type train | step 1870 | loss 1.6544 | lr 9.8e-04 | norm 2.1250 | dt 0.007 +type train | step 1880 | loss 1.7262 | lr 9.8e-04 | norm 1.4930 | dt 0.007 +type train | step 1890 | loss 1.8226 | lr 9.8e-04 | norm 1.6762 | dt 0.007 +type train | step 1900 | loss 1.7097 | lr 9.8e-04 | norm 1.8627 | dt 0.007 +type train | step 1910 | loss 1.7728 | lr 9.8e-04 | norm 1.9974 | dt 0.009 +type train | step 1920 | loss 1.7743 | lr 9.7e-04 | norm 1.1792 | dt 0.009 +type train | step 1930 | loss 1.6682 | lr 9.7e-04 | norm 1.2895 | dt 0.009 +type train | step 1940 | loss 1.7830 | lr 9.7e-04 | norm 1.7372 | dt 0.009 +type train | step 1950 | loss 1.6258 | lr 9.7e-04 | norm 2.0451 | dt 0.007 +type train | step 1960 | loss 1.6488 | lr 9.7e-04 | norm 1.4348 | dt 0.007 +type train | step 1970 | loss 1.5603 | lr 9.7e-04 | norm 1.9986 | dt 0.007 +type train | step 1980 | loss 1.7538 | lr 9.7e-04 | norm 1.7944 | dt 0.007 +type train | step 1990 | loss 1.6698 | lr 9.7e-04 | norm 1.7637 | dt 0.006 +type train | step 2000 | loss 1.7816 | lr 9.7e-04 | norm 1.5505 | dt 0.007 +type train | step 2010 | loss 1.6346 | lr 9.7e-04 | norm 1.5544 | dt 0.006 +type train | step 2020 | loss 1.6273 | lr 9.7e-04 | norm 1.4197 | dt 0.006 +type train | step 2030 | loss 1.6813 | lr 9.7e-04 | norm 1.5592 | dt 0.006 +type train | step 2040 | loss 1.6084 | lr 9.7e-04 | norm 1.1698 | dt 0.009 +type train | step 2050 | loss 1.6989 | lr 9.7e-04 | norm 1.3519 | dt 0.008 +type train | step 2060 | loss 1.7531 | lr 9.7e-04 | norm 1.3092 | dt 0.009 +type train | step 2070 | loss 1.6545 | lr 9.7e-04 | norm 1.5283 | dt 0.007 +type train | step 2080 | loss 1.6266 | lr 9.7e-04 | norm 1.3679 | dt 0.006 +type train | step 2090 | loss 1.6354 | lr 9.6e-04 | norm 2.1942 | dt 0.007 +type train | step 2100 | loss 1.5900 | lr 9.6e-04 | norm 1.4452 | dt 0.007 +type train | step 2110 | loss 1.5964 | lr 9.6e-04 | norm 2.1177 | dt 0.007 +type train | step 2120 | loss 1.6942 | lr 9.6e-04 | norm 1.9782 | dt 0.006 +type train | step 2130 | loss 1.6462 | lr 9.6e-04 | norm 2.0149 | dt 0.009 +type train | step 2140 | loss 1.6342 | lr 9.6e-04 | norm 1.7536 | dt 0.008 +type train | step 2150 | loss 1.6251 | lr 9.6e-04 | norm 1.8719 | dt 0.008 +type train | step 2160 | loss 1.6364 | lr 9.6e-04 | norm 1.2670 | dt 0.009 +type train | step 2170 | loss 1.6854 | lr 9.6e-04 | norm 1.3154 | dt 0.006 +type train | step 2180 | loss 1.7153 | lr 9.6e-04 | norm 1.2576 | dt 0.007 +type train | step 2190 | loss 1.6139 | lr 9.6e-04 | norm 1.5191 | dt 0.007 +type train | step 2200 | loss 1.6053 | lr 9.6e-04 | norm 1.1794 | dt 0.009 +type train | step 2210 | loss 1.5781 | lr 9.6e-04 | norm 1.6776 | dt 0.007 +type train | step 2220 | loss 1.6390 | lr 9.6e-04 | norm 1.8943 | dt 0.006 +type train | step 2230 | loss 1.6473 | lr 9.6e-04 | norm 1.4906 | dt 0.006 +type train | step 2240 | loss 1.6730 | lr 9.5e-04 | norm 1.3690 | dt 0.007 +type train | step 2250 | loss 1.6072 | lr 9.5e-04 | norm 1.7785 | dt 0.007 +type train | step 2260 | loss 1.6788 | lr 9.5e-04 | norm 1.6215 | dt 0.005 +type train | step 2270 | loss 1.5883 | lr 9.5e-04 | norm 1.4479 | dt 0.009 +type train | step 2280 | loss 1.5973 | lr 9.5e-04 | norm 1.4261 | dt 0.006 +type train | step 2290 | loss 1.5991 | lr 9.5e-04 | norm 1.5579 | dt 0.006 +type train | step 2300 | loss 1.5400 | lr 9.5e-04 | norm 1.5582 | dt 0.006 +type train | step 2310 | loss 1.6433 | lr 9.5e-04 | norm 1.4389 | dt 0.007 +type train | step 2320 | loss 1.6818 | lr 9.5e-04 | norm 1.8364 | dt 0.007 +type train | step 2330 | loss 1.6136 | lr 9.5e-04 | norm 1.7874 | dt 0.007 +type train | step 2340 | loss 1.6569 | lr 9.5e-04 | norm 1.9576 | dt 0.009 +type train | step 2350 | loss 1.6476 | lr 9.5e-04 | norm 1.9086 | dt 0.007 +type train | step 2360 | loss 1.5279 | lr 9.5e-04 | norm 1.3098 | dt 0.008 +type train | step 2370 | loss 1.5845 | lr 9.4e-04 | norm 1.6272 | dt 0.007 +type train | step 2380 | loss 1.6453 | lr 9.4e-04 | norm 1.6979 | dt 0.009 +type train | step 2390 | loss 1.5737 | lr 9.4e-04 | norm 1.6788 | dt 0.007 +type train | step 2400 | loss 1.6242 | lr 9.4e-04 | norm 1.3954 | dt 0.007 +type train | step 2410 | loss 1.6431 | lr 9.4e-04 | norm 1.3477 | dt 0.006 +type train | step 2420 | loss 1.4938 | lr 9.4e-04 | norm 1.4213 | dt 0.006 +type train | step 2430 | loss 1.5893 | lr 9.4e-04 | norm 1.4673 | dt 0.006 +type train | step 2440 | loss 1.7189 | lr 9.4e-04 | norm 2.6312 | dt 0.011 +type train | step 2450 | loss 1.5945 | lr 9.4e-04 | norm 1.4586 | dt 0.006 +type train | step 2460 | loss 1.6222 | lr 9.4e-04 | norm 1.4686 | dt 0.006 +type train | step 2470 | loss 1.6226 | lr 9.4e-04 | norm 1.8771 | dt 0.007 +type train | step 2480 | loss 1.4811 | lr 9.4e-04 | norm 1.4355 | dt 0.006 +type train | step 2490 | loss 1.5951 | lr 9.3e-04 | norm 1.4800 | dt 0.006 +type train | step 2500 | loss 1.7051 | lr 9.3e-04 | norm 1.5724 | dt 0.006 +type train | step 2510 | loss 1.5758 | lr 9.3e-04 | norm 1.3015 | dt 0.006 +type train | step 2520 | loss 1.6178 | lr 9.3e-04 | norm 1.6407 | dt 0.008 +type train | step 2530 | loss 1.6483 | lr 9.3e-04 | norm 1.1408 | dt 0.006 +type train | step 2540 | loss 1.5435 | lr 9.3e-04 | norm 1.3357 | dt 0.007 +type train | step 2550 | loss 1.6571 | lr 9.3e-04 | norm 1.2892 | dt 0.009 +type train | step 2560 | loss 1.5010 | lr 9.3e-04 | norm 1.5330 | dt 0.008 +type train | step 2570 | loss 1.5297 | lr 9.3e-04 | norm 1.3541 | dt 0.005 +type train | step 2580 | loss 1.4353 | lr 9.3e-04 | norm 1.3222 | dt 0.009 +type train | step 2590 | loss 1.6199 | lr 9.3e-04 | norm 1.4465 | dt 0.009 +type train | step 2600 | loss 1.5447 | lr 9.2e-04 | norm 1.2638 | dt 0.009 +type train | step 2610 | loss 1.6767 | lr 9.2e-04 | norm 1.4753 | dt 0.009 +type train | step 2620 | loss 1.5145 | lr 9.2e-04 | norm 1.5151 | dt 0.009 +type train | step 2630 | loss 1.5012 | lr 9.2e-04 | norm 1.5154 | dt 0.009 +type train | step 2640 | loss 1.5541 | lr 9.2e-04 | norm 1.6295 | dt 0.007 +type train | step 2650 | loss 1.5021 | lr 9.2e-04 | norm 1.2350 | dt 0.006 +type train | step 2660 | loss 1.5726 | lr 9.2e-04 | norm 1.1643 | dt 0.005 +type train | step 2670 | loss 1.6359 | lr 9.2e-04 | norm 1.5965 | dt 0.009 +type train | step 2680 | loss 1.5498 | lr 9.2e-04 | norm 1.4893 | dt 0.008 +type train | step 2690 | loss 1.5404 | lr 9.2e-04 | norm 1.8828 | dt 0.008 +type train | step 2700 | loss 1.5148 | lr 9.2e-04 | norm 1.6126 | dt 0.007 +type train | step 2710 | loss 1.4868 | lr 9.1e-04 | norm 1.2191 | dt 0.009 +type train | step 2720 | loss 1.4641 | lr 9.1e-04 | norm 1.5698 | dt 0.007 +type train | step 2730 | loss 1.5909 | lr 9.1e-04 | norm 1.3963 | dt 0.009 +type train | step 2740 | loss 1.5566 | lr 9.1e-04 | norm 1.8897 | dt 0.007 +type train | step 2750 | loss 1.5416 | lr 9.1e-04 | norm 1.8416 | dt 0.007 +type train | step 2760 | loss 1.4788 | lr 9.1e-04 | norm 1.6597 | dt 0.006 +type train | step 2770 | loss 1.5357 | lr 9.1e-04 | norm 1.6799 | dt 0.007 +type train | step 2780 | loss 1.5769 | lr 9.1e-04 | norm 1.5533 | dt 0.006 +type train | step 2790 | loss 1.6246 | lr 9.1e-04 | norm 1.5544 | dt 0.006 +type train | step 2800 | loss 1.5281 | lr 9.1e-04 | norm 1.4089 | dt 0.007 +type train | step 2810 | loss 1.5273 | lr 9.0e-04 | norm 1.3707 | dt 0.007 +type train | step 2820 | loss 1.4503 | lr 9.0e-04 | norm 1.3463 | dt 0.009 +type train | step 2830 | loss 1.5384 | lr 9.0e-04 | norm 2.1557 | dt 0.009 +type train | step 2840 | loss 1.5438 | lr 9.0e-04 | norm 1.1617 | dt 0.009 +type train | step 2850 | loss 1.5921 | lr 9.0e-04 | norm 1.3739 | dt 0.009 +type train | step 2860 | loss 1.5020 | lr 9.0e-04 | norm 1.3534 | dt 0.009 +type train | step 2870 | loss 1.5999 | lr 9.0e-04 | norm 1.4658 | dt 0.007 +type train | step 2880 | loss 1.4668 | lr 9.0e-04 | norm 1.1493 | dt 0.006 +type train | step 2890 | loss 1.4973 | lr 9.0e-04 | norm 1.2239 | dt 0.006 +type train | step 2900 | loss 1.5034 | lr 9.0e-04 | norm 1.2007 | dt 0.006 +type train | step 2910 | loss 1.4504 | lr 8.9e-04 | norm 1.3798 | dt 0.007 +type train | step 2920 | loss 1.5419 | lr 8.9e-04 | norm 1.5792 | dt 0.007 +type train | step 2930 | loss 1.5958 | lr 8.9e-04 | norm 1.6202 | dt 0.008 +type train | step 2940 | loss 1.5007 | lr 8.9e-04 | norm 1.5132 | dt 0.009 +type train | step 2950 | loss 1.5467 | lr 8.9e-04 | norm 1.4928 | dt 0.007 +type train | step 2960 | loss 1.5668 | lr 8.9e-04 | norm 1.5727 | dt 0.007 +type train | step 2970 | loss 1.4389 | lr 8.9e-04 | norm 1.5089 | dt 0.006 +type train | step 2980 | loss 1.4999 | lr 8.9e-04 | norm 1.2960 | dt 0.006 +type train | step 2990 | loss 1.5708 | lr 8.9e-04 | norm 1.5819 | dt 0.007 +type train | step 3000 | loss 1.4852 | lr 8.8e-04 | norm 1.8359 | dt 0.009 +type train | step 3010 | loss 1.5322 | lr 8.8e-04 | norm 1.8187 | dt 0.006 +type train | step 3020 | loss 1.5723 | lr 8.8e-04 | norm 1.5834 | dt 0.007 +type train | step 3030 | loss 1.3987 | lr 8.8e-04 | norm 1.3139 | dt 0.009 +type train | step 3040 | loss 1.5217 | lr 8.8e-04 | norm 1.6063 | dt 0.009 +type train | step 3050 | loss 1.6121 | lr 8.8e-04 | norm 1.7828 | dt 0.014 +type train | step 3060 | loss 1.5029 | lr 8.8e-04 | norm 1.2176 | dt 0.009 +type train | step 3070 | loss 1.5248 | lr 8.8e-04 | norm 1.3395 | dt 0.009 +type train | step 3080 | loss 1.5528 | lr 8.8e-04 | norm 1.5818 | dt 0.009 +type train | step 3090 | loss 1.4046 | lr 8.7e-04 | norm 1.3313 | dt 0.007 +type train | step 3100 | loss 1.5209 | lr 8.7e-04 | norm 1.5063 | dt 0.009 +type train | step 3110 | loss 1.6246 | lr 8.7e-04 | norm 1.4789 | dt 0.007 +type train | step 3120 | loss 1.5165 | lr 8.7e-04 | norm 1.4702 | dt 0.009 +type train | step 3130 | loss 1.5214 | lr 8.7e-04 | norm 2.3102 | dt 0.007 +type train | step 3140 | loss 1.5902 | lr 8.7e-04 | norm 1.3665 | dt 0.006 +type train | step 3150 | loss 1.4806 | lr 8.7e-04 | norm 1.5206 | dt 0.007 +type train | step 3160 | loss 1.6029 | lr 8.7e-04 | norm 1.4479 | dt 0.007 +type train | step 3170 | loss 1.4268 | lr 8.6e-04 | norm 1.4417 | dt 0.007 +type train | step 3180 | loss 1.4544 | lr 8.6e-04 | norm 1.2833 | dt 0.009 +type train | step 3190 | loss 1.3639 | lr 8.6e-04 | norm 1.2590 | dt 0.007 +type train | step 3200 | loss 1.5551 | lr 8.6e-04 | norm 1.3036 | dt 0.009 +type train | step 3210 | loss 1.4732 | lr 8.6e-04 | norm 1.4461 | dt 0.007 +type train | step 3220 | loss 1.6184 | lr 8.6e-04 | norm 1.3657 | dt 0.009 +type train | step 3230 | loss 1.4360 | lr 8.6e-04 | norm 1.2958 | dt 0.007 +type train | step 3240 | loss 1.4297 | lr 8.6e-04 | norm 1.2963 | dt 0.005 +type train | step 3250 | loss 1.4667 | lr 8.6e-04 | norm 1.1860 | dt 0.009 +type train | step 3260 | loss 1.4396 | lr 8.5e-04 | norm 1.2760 | dt 0.005 +type train | step 3270 | loss 1.5114 | lr 8.5e-04 | norm 1.3373 | dt 0.009 +type train | step 3280 | loss 1.5695 | lr 8.5e-04 | norm 1.4974 | dt 0.009 +type train | step 3290 | loss 1.4654 | lr 8.5e-04 | norm 1.4006 | dt 0.009 +type train | step 3300 | loss 1.4688 | lr 8.5e-04 | norm 1.2374 | dt 0.009 +type train | step 3310 | loss 1.4502 | lr 8.5e-04 | norm 2.0967 | dt 0.009 +type train | step 3320 | loss 1.4324 | lr 8.5e-04 | norm 1.6057 | dt 0.009 +type train | step 3330 | loss 1.3987 | lr 8.5e-04 | norm 1.2327 | dt 0.009 +type train | step 3340 | loss 1.5334 | lr 8.4e-04 | norm 1.7162 | dt 0.009 +type train | step 3350 | loss 1.4900 | lr 8.4e-04 | norm 1.3602 | dt 0.009 +type train | step 3360 | loss 1.4639 | lr 8.4e-04 | norm 1.5405 | dt 0.006 +type train | step 3370 | loss 1.3969 | lr 8.4e-04 | norm 1.2860 | dt 0.007 +type train | step 3380 | loss 1.4699 | lr 8.4e-04 | norm 1.3173 | dt 0.009 +type train | step 3390 | loss 1.5135 | lr 8.4e-04 | norm 1.5677 | dt 0.009 +type train | step 3400 | loss 1.5759 | lr 8.4e-04 | norm 1.6526 | dt 0.007 +type train | step 3410 | loss 1.4761 | lr 8.3e-04 | norm 1.5114 | dt 0.009 +type train | step 3420 | loss 1.4520 | lr 8.3e-04 | norm 1.4956 | dt 0.007 +type train | step 3430 | loss 1.3834 | lr 8.3e-04 | norm 1.4106 | dt 0.009 +type train | step 3440 | loss 1.4775 | lr 8.3e-04 | norm 1.3308 | dt 0.006 +type train | step 3450 | loss 1.4837 | lr 8.3e-04 | norm 1.3728 | dt 0.007 +type train | step 3460 | loss 1.5424 | lr 8.3e-04 | norm 1.6625 | dt 0.007 +type train | step 3470 | loss 1.4549 | lr 8.3e-04 | norm 1.6667 | dt 0.007 +type train | step 3480 | loss 1.5270 | lr 8.3e-04 | norm 1.4761 | dt 0.009 +type train | step 3490 | loss 1.4135 | lr 8.2e-04 | norm 1.3183 | dt 0.009 +type train | step 3500 | loss 1.4499 | lr 8.2e-04 | norm 1.4797 | dt 0.009 +type train | step 3510 | loss 1.4491 | lr 8.2e-04 | norm 1.0930 | dt 0.006 +type train | step 3520 | loss 1.4007 | lr 8.2e-04 | norm 1.2574 | dt 0.006 +type train | step 3530 | loss 1.4926 | lr 8.2e-04 | norm 1.5053 | dt 0.006 +type train | step 3540 | loss 1.5203 | lr 8.2e-04 | norm 1.1996 | dt 0.006 +type train | step 3550 | loss 1.4364 | lr 8.2e-04 | norm 1.0580 | dt 0.007 +type train | step 3560 | loss 1.4877 | lr 8.2e-04 | norm 1.2855 | dt 0.009 +type train | step 3570 | loss 1.5046 | lr 8.1e-04 | norm 1.5335 | dt 0.007 +type train | step 3580 | loss 1.3758 | lr 8.1e-04 | norm 1.2175 | dt 0.007 +type train | step 3590 | loss 1.4530 | lr 8.1e-04 | norm 1.2638 | dt 0.006 +type train | step 3600 | loss 1.5050 | lr 8.1e-04 | norm 1.4873 | dt 0.006 +type train | step 3610 | loss 1.4133 | lr 8.1e-04 | norm 1.1682 | dt 0.006 +type train | step 3620 | loss 1.4660 | lr 8.1e-04 | norm 1.2604 | dt 0.007 +type train | step 3630 | loss 1.5193 | lr 8.1e-04 | norm 1.2973 | dt 0.007 +type train | step 3640 | loss 1.3586 | lr 8.0e-04 | norm 1.4502 | dt 0.007 +type train | step 3650 | loss 1.4861 | lr 8.0e-04 | norm 1.6337 | dt 0.009 +type train | step 3660 | loss 1.5578 | lr 8.0e-04 | norm 1.5107 | dt 0.010 +type train | step 3670 | loss 1.4427 | lr 8.0e-04 | norm 1.2816 | dt 0.009 +type train | step 3680 | loss 1.4674 | lr 8.0e-04 | norm 1.2469 | dt 0.009 +type train | step 3690 | loss 1.5039 | lr 8.0e-04 | norm 1.3201 | dt 0.009 +type train | step 3700 | loss 1.3533 | lr 8.0e-04 | norm 1.2885 | dt 0.007 +type train | step 3710 | loss 1.4849 | lr 7.9e-04 | norm 1.2894 | dt 0.009 +type train | step 3720 | loss 1.5716 | lr 7.9e-04 | norm 1.2603 | dt 0.009 +type train | step 3730 | loss 1.4680 | lr 7.9e-04 | norm 1.2819 | dt 0.007 +type train | step 3740 | loss 1.4565 | lr 7.9e-04 | norm 1.2207 | dt 0.008 +type train | step 3750 | loss 1.5339 | lr 7.9e-04 | norm 1.1432 | dt 0.007 +type train | step 3760 | loss 1.4281 | lr 7.9e-04 | norm 1.1593 | dt 0.005 +type train | step 3770 | loss 1.5521 | lr 7.9e-04 | norm 1.2893 | dt 0.009 +type train | step 3780 | loss 1.3761 | lr 7.8e-04 | norm 1.0950 | dt 0.007 +type train | step 3790 | loss 1.4100 | lr 7.8e-04 | norm 1.2071 | dt 0.009 +type train | step 3800 | loss 1.3256 | lr 7.8e-04 | norm 1.3802 | dt 0.009 +type train | step 3810 | loss 1.5069 | lr 7.8e-04 | norm 1.2420 | dt 0.009 +type train | step 3820 | loss 1.4264 | lr 7.8e-04 | norm 1.1816 | dt 0.008 +type train | step 3830 | loss 1.5926 | lr 7.8e-04 | norm 1.6983 | dt 0.007 +type train | step 3840 | loss 1.3909 | lr 7.8e-04 | norm 1.3013 | dt 0.007 +type train | step 3850 | loss 1.3949 | lr 7.7e-04 | norm 1.5195 | dt 0.007 +type train | step 3860 | loss 1.4326 | lr 7.7e-04 | norm 1.4458 | dt 0.007 +type train | step 3870 | loss 1.3947 | lr 7.7e-04 | norm 1.2328 | dt 0.007 +type train | step 3880 | loss 1.4695 | lr 7.7e-04 | norm 1.2834 | dt 0.007 +type train | step 3890 | loss 1.5402 | lr 7.7e-04 | norm 1.7059 | dt 0.009 +type train | step 3900 | loss 1.4234 | lr 7.7e-04 | norm 1.6772 | dt 0.009 +type train | step 3910 | loss 1.4370 | lr 7.7e-04 | norm 1.8405 | dt 0.009 +type train | step 3920 | loss 1.4117 | lr 7.6e-04 | norm 1.2787 | dt 0.007 +type train | step 3930 | loss 1.3807 | lr 7.6e-04 | norm 1.0871 | dt 0.009 +type train | step 3940 | loss 1.3555 | lr 7.6e-04 | norm 1.4068 | dt 0.009 +type train | step 3950 | loss 1.4945 | lr 7.6e-04 | norm 1.7116 | dt 0.009 +type train | step 3960 | loss 1.4569 | lr 7.6e-04 | norm 1.9247 | dt 0.008 +type train | step 3970 | loss 1.4408 | lr 7.6e-04 | norm 1.7587 | dt 0.007 +type train | step 3980 | loss 1.3564 | lr 7.6e-04 | norm 1.2827 | dt 0.006 +type train | step 3990 | loss 1.4236 | lr 7.5e-04 | norm 1.2518 | dt 0.009 +type train | step 4000 | loss 1.4708 | lr 7.5e-04 | norm 1.2709 | dt 0.006 +type train | step 4010 | loss 1.5269 | lr 7.5e-04 | norm 1.3553 | dt 0.006 +type train | step 4020 | loss 1.4548 | lr 7.5e-04 | norm 1.5486 | dt 0.007 +type train | step 4030 | loss 1.4258 | lr 7.5e-04 | norm 1.6180 | dt 0.009 +type train | step 4040 | loss 1.3405 | lr 7.5e-04 | norm 1.1055 | dt 0.009 +type train | step 4050 | loss 1.4310 | lr 7.4e-04 | norm 1.3010 | dt 0.006 +type train | step 4060 | loss 1.4463 | lr 7.4e-04 | norm 1.0266 | dt 0.007 +type train | step 4070 | loss 1.4923 | lr 7.4e-04 | norm 1.0784 | dt 0.006 +type train | step 4080 | loss 1.4149 | lr 7.4e-04 | norm 1.4362 | dt 0.009 +type train | step 4090 | loss 1.5001 | lr 7.4e-04 | norm 1.2016 | dt 0.007 +type train | step 4100 | loss 1.3700 | lr 7.4e-04 | norm 1.1622 | dt 0.007 +type train | step 4110 | loss 1.4071 | lr 7.4e-04 | norm 1.3303 | dt 0.009 +type train | step 4120 | loss 1.4127 | lr 7.3e-04 | norm 1.0658 | dt 0.007 +type train | step 4130 | loss 1.3600 | lr 7.3e-04 | norm 1.0332 | dt 0.009 +type train | step 4140 | loss 1.4622 | lr 7.3e-04 | norm 1.7472 | dt 0.007 +type train | step 4150 | loss 1.4980 | lr 7.3e-04 | norm 1.9123 | dt 0.006 +type train | step 4160 | loss 1.3992 | lr 7.3e-04 | norm 1.4272 | dt 0.007 +type train | step 4170 | loss 1.4482 | lr 7.3e-04 | norm 1.3864 | dt 0.007 +type train | step 4180 | loss 1.4740 | lr 7.3e-04 | norm 1.3443 | dt 0.007 +type train | step 4190 | loss 1.3423 | lr 7.2e-04 | norm 1.3710 | dt 0.007 +type train | step 4200 | loss 1.4184 | lr 7.2e-04 | norm 1.2376 | dt 0.007 +type train | step 4210 | loss 1.4872 | lr 7.2e-04 | norm 1.7386 | dt 0.009 +type train | step 4220 | loss 1.3839 | lr 7.2e-04 | norm 1.1615 | dt 0.007 +type train | step 4230 | loss 1.4244 | lr 7.2e-04 | norm 1.3027 | dt 0.006 +type train | step 4240 | loss 1.4813 | lr 7.2e-04 | norm 1.0845 | dt 0.006 +type train | step 4250 | loss 1.3258 | lr 7.1e-04 | norm 1.0534 | dt 0.007 +type train | step 4260 | loss 1.4539 | lr 7.1e-04 | norm 1.3967 | dt 0.006 +type train | step 4270 | loss 1.5174 | lr 7.1e-04 | norm 1.1705 | dt 0.010 +type train | step 4280 | loss 1.4080 | lr 7.1e-04 | norm 1.3103 | dt 0.009 +type train | step 4290 | loss 1.4311 | lr 7.1e-04 | norm 1.3569 | dt 0.009 +type train | step 4300 | loss 1.4704 | lr 7.1e-04 | norm 1.2114 | dt 0.007 +type train | step 4310 | loss 1.3242 | lr 7.0e-04 | norm 1.4135 | dt 0.009 +type train | step 4320 | loss 1.4685 | lr 7.0e-04 | norm 1.4946 | dt 0.007 +type train | step 4330 | loss 1.5379 | lr 7.0e-04 | norm 1.4989 | dt 0.009 +type train | step 4340 | loss 1.4309 | lr 7.0e-04 | norm 1.1465 | dt 0.009 +type train | step 4350 | loss 1.4133 | lr 7.0e-04 | norm 1.3628 | dt 0.009 +type train | step 4360 | loss 1.5113 | lr 7.0e-04 | norm 1.2301 | dt 0.009 +type train | step 4370 | loss 1.4095 | lr 7.0e-04 | norm 1.3143 | dt 0.006 +type train | step 4380 | loss 1.5417 | lr 6.9e-04 | norm 1.6738 | dt 0.007 +type train | step 4390 | loss 1.3596 | lr 6.9e-04 | norm 1.4472 | dt 0.007 +type train | step 4400 | loss 1.3819 | lr 6.9e-04 | norm 1.3367 | dt 0.009 +type train | step 4410 | loss 1.2978 | lr 6.9e-04 | norm 1.2511 | dt 0.008 +type train | step 4420 | loss 1.4824 | lr 6.9e-04 | norm 1.4114 | dt 0.009 +type train | step 4430 | loss 1.3984 | lr 6.9e-04 | norm 1.2857 | dt 0.009 +type train | step 4440 | loss 1.5707 | lr 6.8e-04 | norm 1.6206 | dt 0.009 +type train | step 4450 | loss 1.3663 | lr 6.8e-04 | norm 1.5090 | dt 0.009 +type train | step 4460 | loss 1.3612 | lr 6.8e-04 | norm 1.2435 | dt 0.009 +type train | step 4470 | loss 1.3939 | lr 6.8e-04 | norm 1.1934 | dt 0.009 +type train | step 4480 | loss 1.3683 | lr 6.8e-04 | norm 1.2287 | dt 0.007 +type train | step 4490 | loss 1.4414 | lr 6.8e-04 | norm 1.2715 | dt 0.008 +type train | step 4500 | loss 1.5007 | lr 6.7e-04 | norm 1.3188 | dt 0.007 +type train | step 4510 | loss 1.3836 | lr 6.7e-04 | norm 1.2855 | dt 0.006 +type train | step 4520 | loss 1.3911 | lr 6.7e-04 | norm 1.0898 | dt 0.005 +type train | step 4530 | loss 1.3698 | lr 6.7e-04 | norm 1.1894 | dt 0.009 +type train | step 4540 | loss 1.3575 | lr 6.7e-04 | norm 1.5882 | dt 0.008 +type train | step 4550 | loss 1.3322 | lr 6.7e-04 | norm 1.2902 | dt 0.007 +type train | step 4560 | loss 1.4628 | lr 6.6e-04 | norm 1.3942 | dt 0.009 +type train | step 4570 | loss 1.4218 | lr 6.6e-04 | norm 1.7073 | dt 0.009 +type train | step 4580 | loss 1.4158 | lr 6.6e-04 | norm 1.6918 | dt 0.009 +type train | step 4590 | loss 1.3328 | lr 6.6e-04 | norm 1.6961 | dt 0.008 +type train | step 4600 | loss 1.3955 | lr 6.6e-04 | norm 1.3166 | dt 0.007 +type train | step 4610 | loss 1.4471 | lr 6.6e-04 | norm 1.3422 | dt 0.006 +type train | step 4620 | loss 1.5037 | lr 6.5e-04 | norm 1.9187 | dt 0.006 +type train | step 4630 | loss 1.4303 | lr 6.5e-04 | norm 1.8020 | dt 0.007 +type train | step 4640 | loss 1.4019 | lr 6.5e-04 | norm 1.5646 | dt 0.009 +type train | step 4650 | loss 1.3159 | lr 6.5e-04 | norm 1.2914 | dt 0.005 +type train | step 4660 | loss 1.4020 | lr 6.5e-04 | norm 1.9216 | dt 0.009 +type train | step 4670 | loss 1.4287 | lr 6.5e-04 | norm 1.1491 | dt 0.009 +type train | step 4680 | loss 1.4560 | lr 6.4e-04 | norm 1.0999 | dt 0.007 +type train | step 4690 | loss 1.3766 | lr 6.4e-04 | norm 1.3630 | dt 0.007 +type train | step 4700 | loss 1.4735 | lr 6.4e-04 | norm 1.5946 | dt 0.009 +type train | step 4710 | loss 1.3416 | lr 6.4e-04 | norm 1.3089 | dt 0.009 +type train | step 4720 | loss 1.3875 | lr 6.4e-04 | norm 1.5520 | dt 0.009 +type train | step 4730 | loss 1.3958 | lr 6.4e-04 | norm 1.5582 | dt 0.009 +type train | step 4740 | loss 1.3361 | lr 6.3e-04 | norm 1.3062 | dt 0.009 +type train | step 4750 | loss 1.4262 | lr 6.3e-04 | norm 1.9423 | dt 0.009 +type train | step 4760 | loss 1.4672 | lr 6.3e-04 | norm 1.5968 | dt 0.006 +type train | step 4770 | loss 1.3688 | lr 6.3e-04 | norm 1.1993 | dt 0.006 +type train | step 4780 | loss 1.4179 | lr 6.3e-04 | norm 1.3308 | dt 0.007 +type train | step 4790 | loss 1.4647 | lr 6.3e-04 | norm 1.6438 | dt 0.007 +type train | step 4800 | loss 1.3152 | lr 6.2e-04 | norm 1.1775 | dt 0.006 +type train | step 4810 | loss 1.3869 | lr 6.2e-04 | norm 1.2656 | dt 0.009 +type train | step 4820 | loss 1.4460 | lr 6.2e-04 | norm 1.2636 | dt 0.007 +type train | step 4830 | loss 1.3566 | lr 6.2e-04 | norm 1.1154 | dt 0.007 +type train | step 4840 | loss 1.4049 | lr 6.2e-04 | norm 1.5008 | dt 0.009 +type train | step 4850 | loss 1.4509 | lr 6.2e-04 | norm 1.0789 | dt 0.006 +type train | step 4860 | loss 1.3007 | lr 6.1e-04 | norm 1.2186 | dt 0.006 +type train | step 4870 | loss 1.4232 | lr 6.1e-04 | norm 1.3415 | dt 0.007 +type train | step 4880 | loss 1.4878 | lr 6.1e-04 | norm 1.3853 | dt 0.011 +type train | step 4890 | loss 1.3900 | lr 6.1e-04 | norm 1.3094 | dt 0.009 +type train | step 4900 | loss 1.4045 | lr 6.1e-04 | norm 1.3287 | dt 0.008 +type train | step 4910 | loss 1.4409 | lr 6.1e-04 | norm 1.0767 | dt 0.007 +type train | step 4920 | loss 1.2952 | lr 6.0e-04 | norm 1.2086 | dt 0.009 +type train | step 4930 | loss 1.4329 | lr 6.0e-04 | norm 1.2123 | dt 0.009 +type train | step 4940 | loss 1.5060 | lr 6.0e-04 | norm 1.3969 | dt 0.006 +type train | step 4950 | loss 1.4021 | lr 6.0e-04 | norm 1.0278 | dt 0.009 +type train | step 4960 | loss 1.3809 | lr 6.0e-04 | norm 1.3171 | dt 0.009 +type train | step 4970 | loss 1.4924 | lr 6.0e-04 | norm 1.6102 | dt 0.009 +type train | step 4980 | loss 1.3851 | lr 5.9e-04 | norm 1.3284 | dt 0.009 +type train | step 4990 | loss 1.5155 | lr 5.9e-04 | norm 1.6952 | dt 0.007 +type train | step 5000 | loss 1.3298 | lr 5.9e-04 | norm 1.2721 | dt 0.009 +type train | step 5010 | loss 1.3539 | lr 5.9e-04 | norm 1.2302 | dt 0.005 +type train | step 5020 | loss 1.2727 | lr 5.9e-04 | norm 1.4449 | dt 0.007 +type train | step 5030 | loss 1.4476 | lr 5.9e-04 | norm 1.4722 | dt 0.006 +type train | step 5040 | loss 1.3684 | lr 5.8e-04 | norm 1.3402 | dt 0.007 +type train | step 5050 | loss 1.5344 | lr 5.8e-04 | norm 1.3014 | dt 0.009 +type train | step 5060 | loss 1.3428 | lr 5.8e-04 | norm 1.1981 | dt 0.007 +type train | step 5070 | loss 1.3408 | lr 5.8e-04 | norm 1.1750 | dt 0.009 +type train | step 5080 | loss 1.3698 | lr 5.8e-04 | norm 1.1325 | dt 0.009 +type train | step 5090 | loss 1.3481 | lr 5.8e-04 | norm 1.3815 | dt 0.009 +type train | step 5100 | loss 1.4198 | lr 5.7e-04 | norm 1.3032 | dt 0.007 +type train | step 5110 | loss 1.4764 | lr 5.7e-04 | norm 1.2825 | dt 0.007 +type train | step 5120 | loss 1.3596 | lr 5.7e-04 | norm 1.1961 | dt 0.009 +type train | step 5130 | loss 1.3737 | lr 5.7e-04 | norm 1.1875 | dt 0.009 +type train | step 5140 | loss 1.3442 | lr 5.7e-04 | norm 1.0657 | dt 0.009 +type train | step 5150 | loss 1.3428 | lr 5.7e-04 | norm 1.4555 | dt 0.009 +type train | step 5160 | loss 1.3084 | lr 5.6e-04 | norm 1.3959 | dt 0.008 +type train | step 5170 | loss 1.4455 | lr 5.6e-04 | norm 1.4158 | dt 0.009 +type train | step 5180 | loss 1.3926 | lr 5.6e-04 | norm 1.3113 | dt 0.009 +type train | step 5190 | loss 1.3745 | lr 5.6e-04 | norm 1.1421 | dt 0.007 +type train | step 5200 | loss 1.3064 | lr 5.6e-04 | norm 1.3728 | dt 0.009 +type train | step 5210 | loss 1.3623 | lr 5.6e-04 | norm 1.2108 | dt 0.008 +type train | step 5220 | loss 1.4220 | lr 5.5e-04 | norm 1.4661 | dt 0.007 +type train | step 5230 | loss 1.4788 | lr 5.5e-04 | norm 1.8274 | dt 0.009 +type train | step 5240 | loss 1.4012 | lr 5.5e-04 | norm 1.6779 | dt 0.008 +type train | step 5250 | loss 1.3719 | lr 5.5e-04 | norm 1.3331 | dt 0.009 +type train | step 5260 | loss 1.2820 | lr 5.5e-04 | norm 1.2007 | dt 0.005 +type train | step 5270 | loss 1.3897 | lr 5.4e-04 | norm 1.4997 | dt 0.009 +type train | step 5280 | loss 1.3998 | lr 5.4e-04 | norm 1.4487 | dt 0.007 +type train | step 5290 | loss 1.4484 | lr 5.4e-04 | norm 1.6968 | dt 0.007 +type train | step 5300 | loss 1.3586 | lr 5.4e-04 | norm 2.2317 | dt 0.007 +type train | step 5310 | loss 1.4427 | lr 5.4e-04 | norm 1.2807 | dt 0.007 +type train | step 5320 | loss 1.3172 | lr 5.4e-04 | norm 1.3523 | dt 0.006 +type train | step 5330 | loss 1.3709 | lr 5.3e-04 | norm 1.9145 | dt 0.006 +type train | step 5340 | loss 1.3728 | lr 5.3e-04 | norm 1.5255 | dt 0.007 +type train | step 5350 | loss 1.3196 | lr 5.3e-04 | norm 1.3629 | dt 0.007 +type train | step 5360 | loss 1.4076 | lr 5.3e-04 | norm 2.0176 | dt 0.009 +type train | step 5370 | loss 1.4267 | lr 5.3e-04 | norm 1.1721 | dt 0.009 +type train | step 5380 | loss 1.3414 | lr 5.3e-04 | norm 1.1076 | dt 0.009 +type train | step 5390 | loss 1.4104 | lr 5.2e-04 | norm 1.8312 | dt 0.009 +type train | step 5400 | loss 1.4383 | lr 5.2e-04 | norm 1.3870 | dt 0.008 +type train | step 5410 | loss 1.2962 | lr 5.2e-04 | norm 1.1912 | dt 0.006 +type train | step 5420 | loss 1.3678 | lr 5.2e-04 | norm 1.1326 | dt 0.007 +type train | step 5430 | loss 1.4105 | lr 5.2e-04 | norm 1.2041 | dt 0.007 +type train | step 5440 | loss 1.3323 | lr 5.2e-04 | norm 1.2139 | dt 0.007 +type train | step 5450 | loss 1.3905 | lr 5.1e-04 | norm 1.4356 | dt 0.007 +type train | step 5460 | loss 1.4376 | lr 5.1e-04 | norm 1.5213 | dt 0.006 +type train | step 5470 | loss 1.2800 | lr 5.1e-04 | norm 1.2390 | dt 0.006 +type train | step 5480 | loss 1.4162 | lr 5.1e-04 | norm 1.6726 | dt 0.006 +type train | step 5490 | loss 1.4589 | lr 5.1e-04 | norm 1.4049 | dt 0.007 +type train | step 5500 | loss 1.3680 | lr 5.1e-04 | norm 1.2365 | dt 0.007 +type train | step 5510 | loss 1.3968 | lr 5.0e-04 | norm 1.7883 | dt 0.006 +type train | step 5520 | loss 1.4225 | lr 5.0e-04 | norm 0.9976 | dt 0.008 +type train | step 5530 | loss 1.2721 | lr 5.0e-04 | norm 1.3581 | dt 0.009 +type train | step 5540 | loss 1.4282 | lr 5.0e-04 | norm 1.9462 | dt 0.009 +type train | step 5550 | loss 1.4815 | lr 5.0e-04 | norm 1.4591 | dt 0.009 +type train | step 5560 | loss 1.3763 | lr 4.9e-04 | norm 1.1827 | dt 0.009 +type train | step 5570 | loss 1.3720 | lr 4.9e-04 | norm 1.5371 | dt 0.009 +type train | step 5580 | loss 1.4818 | lr 4.9e-04 | norm 1.8136 | dt 0.009 +type train | step 5590 | loss 1.3612 | lr 4.9e-04 | norm 1.2816 | dt 0.009 +type train | step 5600 | loss 1.4884 | lr 4.9e-04 | norm 1.2201 | dt 0.009 +type train | step 5610 | loss 1.3037 | lr 4.9e-04 | norm 1.0586 | dt 0.009 +type train | step 5620 | loss 1.3210 | lr 4.8e-04 | norm 1.1139 | dt 0.009 +type train | step 5630 | loss 1.2579 | lr 4.8e-04 | norm 1.5956 | dt 0.009 +type train | step 5640 | loss 1.4230 | lr 4.8e-04 | norm 1.4547 | dt 0.008 +type train | step 5650 | loss 1.3434 | lr 4.8e-04 | norm 1.3916 | dt 0.006 +type train | step 5660 | loss 1.5175 | lr 4.8e-04 | norm 1.5245 | dt 0.007 +type train | step 5670 | loss 1.3225 | lr 4.8e-04 | norm 1.3383 | dt 0.008 +type train | step 5680 | loss 1.3076 | lr 4.7e-04 | norm 1.1737 | dt 0.007 +type train | step 5690 | loss 1.3544 | lr 4.7e-04 | norm 1.4735 | dt 0.007 +type train | step 5700 | loss 1.3294 | lr 4.7e-04 | norm 1.2815 | dt 0.007 +type train | step 5710 | loss 1.3899 | lr 4.7e-04 | norm 1.1041 | dt 0.009 +type train | step 5720 | loss 1.4614 | lr 4.7e-04 | norm 1.5607 | dt 0.009 +type train | step 5730 | loss 1.3490 | lr 4.7e-04 | norm 1.6427 | dt 0.009 +type train | step 5740 | loss 1.3433 | lr 4.6e-04 | norm 1.2433 | dt 0.007 +type train | step 5750 | loss 1.3351 | lr 4.6e-04 | norm 1.4031 | dt 0.007 +type train | step 5760 | loss 1.3281 | lr 4.6e-04 | norm 1.3239 | dt 0.006 +type train | step 5770 | loss 1.2790 | lr 4.6e-04 | norm 1.3599 | dt 0.007 +type train | step 5780 | loss 1.4115 | lr 4.6e-04 | norm 1.2362 | dt 0.009 +type train | step 5790 | loss 1.3741 | lr 4.5e-04 | norm 1.2050 | dt 0.006 +type train | step 5800 | loss 1.3459 | lr 4.5e-04 | norm 1.4843 | dt 0.007 +type train | step 5810 | loss 1.2851 | lr 4.5e-04 | norm 1.3909 | dt 0.007 +type train | step 5820 | loss 1.3423 | lr 4.5e-04 | norm 1.2226 | dt 0.009 +type train | step 5830 | loss 1.3968 | lr 4.5e-04 | norm 1.1099 | dt 0.007 +type train | step 5840 | loss 1.4405 | lr 4.5e-04 | norm 1.1957 | dt 0.007 +type train | step 5850 | loss 1.3740 | lr 4.4e-04 | norm 1.2694 | dt 0.009 +type train | step 5860 | loss 1.3458 | lr 4.4e-04 | norm 1.2390 | dt 0.008 +type train | step 5870 | loss 1.2724 | lr 4.4e-04 | norm 1.4564 | dt 0.009 +type train | step 5880 | loss 1.3601 | lr 4.4e-04 | norm 1.5714 | dt 0.009 +type train | step 5890 | loss 1.3851 | lr 4.4e-04 | norm 1.5864 | dt 0.007 +type train | step 5900 | loss 1.4164 | lr 4.4e-04 | norm 1.3365 | dt 0.007 +type train | step 5910 | loss 1.3434 | lr 4.3e-04 | norm 1.5792 | dt 0.009 +type train | step 5920 | loss 1.4172 | lr 4.3e-04 | norm 1.1998 | dt 0.008 +type train | step 5930 | loss 1.3057 | lr 4.3e-04 | norm 1.2951 | dt 0.009 +type train | step 5940 | loss 1.3395 | lr 4.3e-04 | norm 1.3504 | dt 0.009 +type train | step 5950 | loss 1.3534 | lr 4.3e-04 | norm 1.2348 | dt 0.009 +type train | step 5960 | loss 1.2920 | lr 4.3e-04 | norm 1.0989 | dt 0.009 +type train | step 5970 | loss 1.3865 | lr 4.2e-04 | norm 1.4001 | dt 0.009 +type train | step 5980 | loss 1.4125 | lr 4.2e-04 | norm 1.2885 | dt 0.007 +type train | step 5990 | loss 1.3278 | lr 4.2e-04 | norm 1.2375 | dt 0.009 +type train | step 6000 | loss 1.3758 | lr 4.2e-04 | norm 1.2406 | dt 0.009 +type train | step 6010 | loss 1.4125 | lr 4.2e-04 | norm 1.2128 | dt 0.006 +type train | step 6020 | loss 1.2646 | lr 4.2e-04 | norm 0.9941 | dt 0.005 +type train | step 6030 | loss 1.3489 | lr 4.1e-04 | norm 1.1777 | dt 0.006 +type train | step 6040 | loss 1.4021 | lr 4.1e-04 | norm 1.5769 | dt 0.007 +type train | step 6050 | loss 1.3170 | lr 4.1e-04 | norm 1.4319 | dt 0.007 +type train | step 6060 | loss 1.3646 | lr 4.1e-04 | norm 1.3799 | dt 0.008 +type train | step 6070 | loss 1.4262 | lr 4.1e-04 | norm 1.3730 | dt 0.009 +type train | step 6080 | loss 1.2590 | lr 4.1e-04 | norm 1.2154 | dt 0.008 +type train | step 6090 | loss 1.3946 | lr 4.0e-04 | norm 1.4843 | dt 0.007 +type train | step 6100 | loss 1.4433 | lr 4.0e-04 | norm 1.4720 | dt 0.007 +type train | step 6110 | loss 1.3663 | lr 4.0e-04 | norm 1.7474 | dt 0.007 +type train | step 6120 | loss 1.3805 | lr 4.0e-04 | norm 1.8019 | dt 0.009 +type train | step 6130 | loss 1.4145 | lr 4.0e-04 | norm 1.3368 | dt 0.009 +type train | step 6140 | loss 1.2495 | lr 4.0e-04 | norm 1.3182 | dt 0.007 +type train | step 6150 | loss 1.4001 | lr 3.9e-04 | norm 1.3472 | dt 0.008 +type train | step 6160 | loss 1.4760 | lr 3.9e-04 | norm 2.9066 | dt 0.007 +type train | step 6170 | loss 1.3752 | lr 3.9e-04 | norm 1.6533 | dt 0.009 +type train | step 6180 | loss 1.3505 | lr 3.9e-04 | norm 1.6603 | dt 0.008 +type train | step 6190 | loss 1.4470 | lr 3.9e-04 | norm 1.2724 | dt 0.009 +type train | step 6200 | loss 1.3379 | lr 3.9e-04 | norm 1.0081 | dt 0.009 +type train | step 6210 | loss 1.4620 | lr 3.8e-04 | norm 1.3447 | dt 0.009 +type train | step 6220 | loss 1.2994 | lr 3.8e-04 | norm 1.5945 | dt 0.009 +type train | step 6230 | loss 1.3265 | lr 3.8e-04 | norm 1.7722 | dt 0.007 +type train | step 6240 | loss 1.2362 | lr 3.8e-04 | norm 1.2841 | dt 0.009 +type train | step 6250 | loss 1.3966 | lr 3.8e-04 | norm 1.2097 | dt 0.009 +type train | step 6260 | loss 1.3213 | lr 3.8e-04 | norm 1.1235 | dt 0.005 +type train | step 6270 | loss 1.5063 | lr 3.7e-04 | norm 1.6379 | dt 0.007 +type train | step 6280 | loss 1.3071 | lr 3.7e-04 | norm 1.4326 | dt 0.007 +type train | step 6290 | loss 1.3057 | lr 3.7e-04 | norm 1.4641 | dt 0.007 +type train | step 6300 | loss 1.3266 | lr 3.7e-04 | norm 1.2410 | dt 0.007 +type train | step 6310 | loss 1.3088 | lr 3.7e-04 | norm 1.2297 | dt 0.008 +type train | step 6320 | loss 1.3714 | lr 3.7e-04 | norm 1.1899 | dt 0.009 +type train | step 6330 | loss 1.4418 | lr 3.6e-04 | norm 1.6642 | dt 0.009 +type train | step 6340 | loss 1.3324 | lr 3.6e-04 | norm 1.5265 | dt 0.007 +type train | step 6350 | loss 1.3338 | lr 3.6e-04 | norm 1.3466 | dt 0.008 +type train | step 6360 | loss 1.3132 | lr 3.6e-04 | norm 1.2299 | dt 0.007 +type train | step 6370 | loss 1.3018 | lr 3.6e-04 | norm 1.3505 | dt 0.009 +type train | step 6380 | loss 1.2606 | lr 3.6e-04 | norm 1.5714 | dt 0.007 +type train | step 6390 | loss 1.3906 | lr 3.5e-04 | norm 1.3282 | dt 0.009 +type train | step 6400 | loss 1.3794 | lr 3.5e-04 | norm 1.9356 | dt 0.009 +type train | step 6410 | loss 1.3497 | lr 3.5e-04 | norm 1.9822 | dt 0.009 +type train | step 6420 | loss 1.2617 | lr 3.5e-04 | norm 1.2248 | dt 0.007 +type train | step 6430 | loss 1.3200 | lr 3.5e-04 | norm 1.1704 | dt 0.009 +type train | step 6440 | loss 1.3786 | lr 3.5e-04 | norm 1.2321 | dt 0.006 +type train | step 6450 | loss 1.4302 | lr 3.4e-04 | norm 1.5539 | dt 0.006 +type train | step 6460 | loss 1.3658 | lr 3.4e-04 | norm 1.5207 | dt 0.007 +type train | step 6470 | loss 1.3422 | lr 3.4e-04 | norm 1.7583 | dt 0.007 +type train | step 6480 | loss 1.2501 | lr 3.4e-04 | norm 1.1992 | dt 0.009 +type train | step 6490 | loss 1.3305 | lr 3.4e-04 | norm 1.0418 | dt 0.009 +type train | step 6500 | loss 1.3617 | lr 3.4e-04 | norm 1.0802 | dt 0.009 +type train | step 6510 | loss 1.4097 | lr 3.3e-04 | norm 1.7221 | dt 0.006 +type train | step 6520 | loss 1.3245 | lr 3.3e-04 | norm 1.2573 | dt 0.008 +type train | step 6530 | loss 1.4125 | lr 3.3e-04 | norm 1.5647 | dt 0.007 +type train | step 6540 | loss 1.2871 | lr 3.3e-04 | norm 1.3935 | dt 0.009 +type train | step 6550 | loss 1.3143 | lr 3.3e-04 | norm 1.3256 | dt 0.009 +type train | step 6560 | loss 1.3303 | lr 3.3e-04 | norm 1.1856 | dt 0.009 +type train | step 6570 | loss 1.2783 | lr 3.2e-04 | norm 1.2801 | dt 0.009 +type train | step 6580 | loss 1.3708 | lr 3.2e-04 | norm 1.6292 | dt 0.008 +type train | step 6590 | loss 1.3894 | lr 3.2e-04 | norm 1.1613 | dt 0.009 +type train | step 6600 | loss 1.3139 | lr 3.2e-04 | norm 1.4290 | dt 0.009 +type train | step 6610 | loss 1.3534 | lr 3.2e-04 | norm 1.2714 | dt 0.008 +type train | step 6620 | loss 1.3965 | lr 3.2e-04 | norm 1.3226 | dt 0.008 +type train | step 6630 | loss 1.2509 | lr 3.1e-04 | norm 1.0965 | dt 0.008 +type train | step 6640 | loss 1.3543 | lr 3.1e-04 | norm 1.7217 | dt 0.009 +type train | step 6650 | loss 1.3950 | lr 3.1e-04 | norm 1.7888 | dt 0.006 +type train | step 6660 | loss 1.2908 | lr 3.1e-04 | norm 1.3160 | dt 0.009 +type train | step 6670 | loss 1.3357 | lr 3.1e-04 | norm 1.2311 | dt 0.006 +type train | step 6680 | loss 1.4020 | lr 3.1e-04 | norm 1.2040 | dt 0.006 +type train | step 6690 | loss 1.2399 | lr 3.1e-04 | norm 1.3638 | dt 0.006 +type train | step 6700 | loss 1.3912 | lr 3.0e-04 | norm 1.4146 | dt 0.008 +type train | step 6710 | loss 1.4362 | lr 3.0e-04 | norm 1.8570 | dt 0.008 +type train | step 6720 | loss 1.3329 | lr 3.0e-04 | norm 1.3225 | dt 0.007 +type train | step 6730 | loss 1.3414 | lr 3.0e-04 | norm 1.2309 | dt 0.008 +type train | step 6740 | loss 1.3907 | lr 3.0e-04 | norm 1.1690 | dt 0.006 +type train | step 6750 | loss 1.2331 | lr 3.0e-04 | norm 1.2582 | dt 0.006 +type train | step 6760 | loss 1.3873 | lr 2.9e-04 | norm 1.6121 | dt 0.006 +type train | step 6770 | loss 1.4467 | lr 2.9e-04 | norm 1.4187 | dt 0.007 +type train | step 6780 | loss 1.3595 | lr 2.9e-04 | norm 1.3555 | dt 0.006 +type train | step 6790 | loss 1.3147 | lr 2.9e-04 | norm 1.2133 | dt 0.007 +type train | step 6800 | loss 1.4227 | lr 2.9e-04 | norm 1.1244 | dt 0.008 +type train | step 6810 | loss 1.3248 | lr 2.9e-04 | norm 1.1208 | dt 0.006 +type train | step 6820 | loss 1.4632 | lr 2.8e-04 | norm 1.7442 | dt 0.007 +type train | step 6830 | loss 1.2744 | lr 2.8e-04 | norm 1.1468 | dt 0.006 +type train | step 6840 | loss 1.3092 | lr 2.8e-04 | norm 1.2215 | dt 0.006 +type train | step 6850 | loss 1.2220 | lr 2.8e-04 | norm 1.2686 | dt 0.006 +type train | step 6860 | loss 1.3827 | lr 2.8e-04 | norm 1.1650 | dt 0.007 +type train | step 6870 | loss 1.3067 | lr 2.8e-04 | norm 1.2528 | dt 0.007 +type train | step 6880 | loss 1.4990 | lr 2.8e-04 | norm 1.6203 | dt 0.007 +type train | step 6890 | loss 1.2929 | lr 2.7e-04 | norm 1.4371 | dt 0.006 +type train | step 6900 | loss 1.2871 | lr 2.7e-04 | norm 1.2886 | dt 0.007 +type train | step 6910 | loss 1.3042 | lr 2.7e-04 | norm 1.1414 | dt 0.006 +type train | step 6920 | loss 1.2889 | lr 2.7e-04 | norm 1.0187 | dt 0.006 +type train | step 6930 | loss 1.3571 | lr 2.7e-04 | norm 1.2819 | dt 0.006 +type train | step 6940 | loss 1.4270 | lr 2.7e-04 | norm 1.3795 | dt 0.006 +type train | step 6950 | loss 1.3265 | lr 2.7e-04 | norm 1.4283 | dt 0.006 +type train | step 6960 | loss 1.3211 | lr 2.6e-04 | norm 1.3576 | dt 0.006 +type train | step 6970 | loss 1.2901 | lr 2.6e-04 | norm 1.1161 | dt 0.006 +type train | step 6980 | loss 1.2832 | lr 2.6e-04 | norm 1.0327 | dt 0.006 +type train | step 6990 | loss 1.2402 | lr 2.6e-04 | norm 1.2760 | dt 0.007 +type train | step 7000 | loss 1.3785 | lr 2.6e-04 | norm 1.7238 | dt 0.006 +type train | step 7010 | loss 1.3563 | lr 2.6e-04 | norm 1.3338 | dt 0.006 +type train | step 7020 | loss 1.3289 | lr 2.5e-04 | norm 1.3594 | dt 0.006 +type train | step 7030 | loss 1.2436 | lr 2.5e-04 | norm 1.0844 | dt 0.006 +type train | step 7040 | loss 1.3001 | lr 2.5e-04 | norm 1.1162 | dt 0.006 +type train | step 7050 | loss 1.3644 | lr 2.5e-04 | norm 1.1889 | dt 0.006 +type train | step 7060 | loss 1.4221 | lr 2.5e-04 | norm 1.6579 | dt 0.006 +type train | step 7070 | loss 1.3415 | lr 2.5e-04 | norm 1.2855 | dt 0.006 +type train | step 7080 | loss 1.3207 | lr 2.5e-04 | norm 1.2128 | dt 0.006 +type train | step 7090 | loss 1.2251 | lr 2.4e-04 | norm 1.0448 | dt 0.006 +type train | step 7100 | loss 1.3161 | lr 2.4e-04 | norm 1.1409 | dt 0.007 +type train | step 7110 | loss 1.3469 | lr 2.4e-04 | norm 1.1190 | dt 0.006 +type train | step 7120 | loss 1.3921 | lr 2.4e-04 | norm 1.2727 | dt 0.006 +type train | step 7130 | loss 1.3155 | lr 2.4e-04 | norm 1.3350 | dt 0.006 +type train | step 7140 | loss 1.3885 | lr 2.4e-04 | norm 1.3760 | dt 0.006 +type train | step 7150 | loss 1.2662 | lr 2.4e-04 | norm 1.1751 | dt 0.006 +type train | step 7160 | loss 1.2975 | lr 2.3e-04 | norm 1.2636 | dt 0.006 +type train | step 7170 | loss 1.3168 | lr 2.3e-04 | norm 1.0667 | dt 0.006 +type train | step 7180 | loss 1.2633 | lr 2.3e-04 | norm 1.0613 | dt 0.006 +type train | step 7190 | loss 1.3525 | lr 2.3e-04 | norm 1.3775 | dt 0.006 +type train | step 7200 | loss 1.3750 | lr 2.3e-04 | norm 1.3312 | dt 0.006 +type train | step 7210 | loss 1.2974 | lr 2.3e-04 | norm 1.2482 | dt 0.006 +type train | step 7220 | loss 1.3355 | lr 2.3e-04 | norm 1.2384 | dt 0.006 +type train | step 7230 | loss 1.3785 | lr 2.2e-04 | norm 1.1953 | dt 0.006 +type train | step 7240 | loss 1.2478 | lr 2.2e-04 | norm 1.3449 | dt 0.006 +type train | step 7250 | loss 1.3267 | lr 2.2e-04 | norm 1.2796 | dt 0.006 +type train | step 7260 | loss 1.3771 | lr 2.2e-04 | norm 1.5369 | dt 0.006 +type train | step 7270 | loss 1.2725 | lr 2.2e-04 | norm 1.1489 | dt 0.007 +type train | step 7280 | loss 1.3167 | lr 2.2e-04 | norm 1.1288 | dt 0.006 +type train | step 7290 | loss 1.3909 | lr 2.2e-04 | norm 1.3421 | dt 0.009 +type train | step 7300 | loss 1.2339 | lr 2.1e-04 | norm 1.2427 | dt 0.006 +type train | step 7310 | loss 1.3699 | lr 2.1e-04 | norm 1.2979 | dt 0.006 +type train | step 7320 | loss 1.4106 | lr 2.1e-04 | norm 1.3952 | dt 0.007 +type train | step 7330 | loss 1.3151 | lr 2.1e-04 | norm 1.1391 | dt 0.006 +type train | step 7340 | loss 1.3224 | lr 2.1e-04 | norm 1.1535 | dt 0.006 +type train | step 7350 | loss 1.3786 | lr 2.1e-04 | norm 1.1139 | dt 0.006 +type train | step 7360 | loss 1.2198 | lr 2.1e-04 | norm 1.1728 | dt 0.006 +type train | step 7370 | loss 1.3673 | lr 2.0e-04 | norm 1.2367 | dt 0.006 +type train | step 7380 | loss 1.4247 | lr 2.0e-04 | norm 1.3159 | dt 0.009 +type train | step 7390 | loss 1.3447 | lr 2.0e-04 | norm 1.1646 | dt 0.005 +type train | step 7400 | loss 1.3047 | lr 2.0e-04 | norm 1.3211 | dt 0.005 +type train | step 7410 | loss 1.4093 | lr 2.0e-04 | norm 1.1424 | dt 0.007 +type train | step 7420 | loss 1.3202 | lr 2.0e-04 | norm 1.2061 | dt 0.008 +type train | step 7430 | loss 1.4417 | lr 2.0e-04 | norm 1.3318 | dt 0.005 +type train | step 7440 | loss 1.2595 | lr 1.9e-04 | norm 1.1275 | dt 0.009 +type train | step 7450 | loss 1.2917 | lr 1.9e-04 | norm 1.0882 | dt 0.005 +type train | step 7460 | loss 1.2062 | lr 1.9e-04 | norm 1.1957 | dt 0.007 +type train | step 7470 | loss 1.3695 | lr 1.9e-04 | norm 1.1827 | dt 0.006 +type train | step 7480 | loss 1.2939 | lr 1.9e-04 | norm 1.1542 | dt 0.006 +type train | step 7490 | loss 1.4709 | lr 1.9e-04 | norm 1.3873 | dt 0.006 +type train | step 7500 | loss 1.2789 | lr 1.9e-04 | norm 1.2733 | dt 0.006 +type train | step 7510 | loss 1.2802 | lr 1.9e-04 | norm 1.2888 | dt 0.006 +type train | step 7520 | loss 1.2894 | lr 1.8e-04 | norm 1.1564 | dt 0.006 +type train | step 7530 | loss 1.2761 | lr 1.8e-04 | norm 1.0392 | dt 0.008 +type train | step 7540 | loss 1.3401 | lr 1.8e-04 | norm 1.1504 | dt 0.008 +type train | step 7550 | loss 1.4050 | lr 1.8e-04 | norm 1.2674 | dt 0.009 +type train | step 7560 | loss 1.3106 | lr 1.8e-04 | norm 1.2644 | dt 0.008 +type train | step 7570 | loss 1.3056 | lr 1.8e-04 | norm 1.1720 | dt 0.009 +type train | step 7580 | loss 1.2788 | lr 1.8e-04 | norm 1.1144 | dt 0.008 +type train | step 7590 | loss 1.2727 | lr 1.8e-04 | norm 1.0213 | dt 0.008 +type train | step 7600 | loss 1.2170 | lr 1.7e-04 | norm 1.0528 | dt 0.007 +type train | step 7610 | loss 1.3560 | lr 1.7e-04 | norm 1.3644 | dt 0.009 +type train | step 7620 | loss 1.3453 | lr 1.7e-04 | norm 1.3360 | dt 0.007 +type train | step 7630 | loss 1.3117 | lr 1.7e-04 | norm 1.2647 | dt 0.009 +type train | step 7640 | loss 1.2319 | lr 1.7e-04 | norm 1.1118 | dt 0.009 +type train | step 7650 | loss 1.2872 | lr 1.7e-04 | norm 1.1322 | dt 0.009 +type train | step 7660 | loss 1.3483 | lr 1.7e-04 | norm 1.1782 | dt 0.008 +type train | step 7670 | loss 1.4012 | lr 1.6e-04 | norm 1.3407 | dt 0.008 +type train | step 7680 | loss 1.3259 | lr 1.6e-04 | norm 1.1862 | dt 0.009 +type train | step 7690 | loss 1.3111 | lr 1.6e-04 | norm 1.2953 | dt 0.007 +type train | step 7700 | loss 1.2173 | lr 1.6e-04 | norm 1.1846 | dt 0.009 +type train | step 7710 | loss 1.3051 | lr 1.6e-04 | norm 1.1638 | dt 0.009 +type train | step 7720 | loss 1.3338 | lr 1.6e-04 | norm 1.0907 | dt 0.009 +type train | step 7730 | loss 1.3709 | lr 1.6e-04 | norm 1.1136 | dt 0.009 +type train | step 7740 | loss 1.3005 | lr 1.6e-04 | norm 1.2544 | dt 0.009 +type train | step 7750 | loss 1.3767 | lr 1.5e-04 | norm 1.3141 | dt 0.009 +type train | step 7760 | loss 1.2530 | lr 1.5e-04 | norm 1.0767 | dt 0.005 +type train | step 7770 | loss 1.2827 | lr 1.5e-04 | norm 1.1936 | dt 0.007 +type train | step 7780 | loss 1.3099 | lr 1.5e-04 | norm 1.1499 | dt 0.007 +type train | step 7790 | loss 1.2526 | lr 1.5e-04 | norm 1.0495 | dt 0.009 +type train | step 7800 | loss 1.3324 | lr 1.5e-04 | norm 1.1906 | dt 0.008 +type train | step 7810 | loss 1.3606 | lr 1.5e-04 | norm 1.2461 | dt 0.005 +type train | step 7820 | loss 1.2797 | lr 1.5e-04 | norm 1.1346 | dt 0.006 +type train | step 7830 | loss 1.3217 | lr 1.5e-04 | norm 1.2361 | dt 0.006 +type train | step 7840 | loss 1.3643 | lr 1.4e-04 | norm 1.2110 | dt 0.007 +type train | step 7850 | loss 1.2362 | lr 1.4e-04 | norm 1.2812 | dt 0.007 +type train | step 7860 | loss 1.3086 | lr 1.4e-04 | norm 1.0632 | dt 0.009 +type train | step 7870 | loss 1.3590 | lr 1.4e-04 | norm 1.4054 | dt 0.009 +type train | step 7880 | loss 1.2589 | lr 1.4e-04 | norm 1.1888 | dt 0.009 +type train | step 7890 | loss 1.3057 | lr 1.4e-04 | norm 1.1843 | dt 0.007 +type train | step 7900 | loss 1.3808 | lr 1.4e-04 | norm 1.2813 | dt 0.009 +type train | step 7910 | loss 1.2277 | lr 1.4e-04 | norm 1.3713 | dt 0.009 +type train | step 7920 | loss 1.3495 | lr 1.3e-04 | norm 1.0865 | dt 0.009 +type train | step 7930 | loss 1.3934 | lr 1.3e-04 | norm 1.3497 | dt 0.011 +type train | step 7940 | loss 1.3016 | lr 1.3e-04 | norm 1.1912 | dt 0.009 +type train | step 7950 | loss 1.3063 | lr 1.3e-04 | norm 1.1044 | dt 0.009 +type train | step 7960 | loss 1.3692 | lr 1.3e-04 | norm 1.1847 | dt 0.009 +type train | step 7970 | loss 1.2122 | lr 1.3e-04 | norm 1.2312 | dt 0.009 +type train | step 7980 | loss 1.3477 | lr 1.3e-04 | norm 1.1581 | dt 0.009 +type train | step 7990 | loss 1.4098 | lr 1.3e-04 | norm 1.3373 | dt 0.009 +type train | step 8000 | loss 1.3280 | lr 1.3e-04 | norm 1.0425 | dt 0.009 +type train | step 8010 | loss 1.2960 | lr 1.2e-04 | norm 1.2859 | dt 0.006 +type train | step 8020 | loss 1.4045 | lr 1.2e-04 | norm 1.2968 | dt 0.007 +type train | step 8030 | loss 1.3110 | lr 1.2e-04 | norm 1.1964 | dt 0.009 +type train | step 8040 | loss 1.4281 | lr 1.2e-04 | norm 1.3715 | dt 0.007 +type train | step 8050 | loss 1.2466 | lr 1.2e-04 | norm 1.0559 | dt 0.009 +type train | step 8060 | loss 1.2766 | lr 1.2e-04 | norm 1.0416 | dt 0.009 +type train | step 8070 | loss 1.1994 | lr 1.2e-04 | norm 1.2245 | dt 0.006 +type train | step 8080 | loss 1.3559 | lr 1.2e-04 | norm 1.1595 | dt 0.006 +type train | step 8090 | loss 1.2817 | lr 1.2e-04 | norm 1.3586 | dt 0.007 +type train | step 8100 | loss 1.4507 | lr 1.1e-04 | norm 1.3752 | dt 0.007 +type train | step 8110 | loss 1.2645 | lr 1.1e-04 | norm 1.1256 | dt 0.006 +type train | step 8120 | loss 1.2635 | lr 1.1e-04 | norm 1.1406 | dt 0.006 +type train | step 8130 | loss 1.2869 | lr 1.1e-04 | norm 1.3011 | dt 0.006 +type train | step 8140 | loss 1.2678 | lr 1.1e-04 | norm 1.2502 | dt 0.006 +type train | step 8150 | loss 1.3358 | lr 1.1e-04 | norm 1.3502 | dt 0.007 +type train | step 8160 | loss 1.3899 | lr 1.1e-04 | norm 1.2304 | dt 0.009 +type train | step 8170 | loss 1.2901 | lr 1.1e-04 | norm 1.1543 | dt 0.009 +type train | step 8180 | loss 1.2910 | lr 1.1e-04 | norm 1.2403 | dt 0.007 +type train | step 8190 | loss 1.2741 | lr 1.1e-04 | norm 1.2040 | dt 0.007 +type train | step 8200 | loss 1.2667 | lr 1.0e-04 | norm 1.2475 | dt 0.007 +type train | step 8210 | loss 1.2139 | lr 1.0e-04 | norm 1.3220 | dt 0.007 +type train | step 8220 | loss 1.3372 | lr 1.0e-04 | norm 1.1689 | dt 0.007 +type train | step 8230 | loss 1.3301 | lr 1.0e-04 | norm 1.4154 | dt 0.007 +type train | step 8240 | loss 1.2991 | lr 1.0e-04 | norm 1.3250 | dt 0.008 +type train | step 8250 | loss 1.2248 | lr 1.0e-04 | norm 1.2082 | dt 0.009 +type train | step 8260 | loss 1.2792 | lr 9.9e-05 | norm 1.2130 | dt 0.006 +type train | step 8270 | loss 1.3469 | lr 9.8e-05 | norm 1.4114 | dt 0.009 +type train | step 8280 | loss 1.3813 | lr 9.7e-05 | norm 1.2062 | dt 0.008 +type train | step 8290 | loss 1.3166 | lr 9.6e-05 | norm 1.2386 | dt 0.007 +type train | step 8300 | loss 1.3011 | lr 9.5e-05 | norm 1.3176 | dt 0.009 +type train | step 8310 | loss 1.2061 | lr 9.4e-05 | norm 1.1291 | dt 0.007 +type train | step 8320 | loss 1.2963 | lr 9.3e-05 | norm 1.1924 | dt 0.009 +type train | step 8330 | loss 1.3318 | lr 9.2e-05 | norm 1.4016 | dt 0.007 +type train | step 8340 | loss 1.3551 | lr 9.1e-05 | norm 1.1163 | dt 0.007 +type train | step 8350 | loss 1.2836 | lr 9.0e-05 | norm 1.2300 | dt 0.006 +type train | step 8360 | loss 1.3626 | lr 8.9e-05 | norm 1.2494 | dt 0.009 +type train | step 8370 | loss 1.2433 | lr 8.8e-05 | norm 1.2233 | dt 0.007 +type train | step 8380 | loss 1.2677 | lr 8.7e-05 | norm 1.1095 | dt 0.009 +type train | step 8390 | loss 1.3052 | lr 8.6e-05 | norm 1.2689 | dt 0.008 +type train | step 8400 | loss 1.2476 | lr 8.5e-05 | norm 1.1128 | dt 0.009 +type train | step 8410 | loss 1.3168 | lr 8.4e-05 | norm 1.1868 | dt 0.008 +type train | step 8420 | loss 1.3453 | lr 8.3e-05 | norm 1.2174 | dt 0.006 +type train | step 8430 | loss 1.2691 | lr 8.2e-05 | norm 1.3041 | dt 0.009 +type train | step 8440 | loss 1.3130 | lr 8.2e-05 | norm 1.3591 | dt 0.009 +type train | step 8450 | loss 1.3577 | lr 8.1e-05 | norm 1.3746 | dt 0.007 +type train | step 8460 | loss 1.2286 | lr 8.0e-05 | norm 1.2788 | dt 0.007 +type train | step 8470 | loss 1.2938 | lr 7.9e-05 | norm 1.0826 | dt 0.007 +type train | step 8480 | loss 1.3420 | lr 7.8e-05 | norm 1.2588 | dt 0.009 +type train | step 8490 | loss 1.2438 | lr 7.7e-05 | norm 1.2802 | dt 0.010 +type train | step 8500 | loss 1.2998 | lr 7.6e-05 | norm 1.3036 | dt 0.007 +type train | step 8510 | loss 1.3724 | lr 7.5e-05 | norm 1.4261 | dt 0.006 +type train | step 8520 | loss 1.2166 | lr 7.5e-05 | norm 1.2470 | dt 0.009 +type train | step 8530 | loss 1.3328 | lr 7.4e-05 | norm 1.0975 | dt 0.007 +type train | step 8540 | loss 1.3770 | lr 7.3e-05 | norm 1.2896 | dt 0.013 +type train | step 8550 | loss 1.2904 | lr 7.2e-05 | norm 1.2215 | dt 0.009 +type train | step 8560 | loss 1.3030 | lr 7.1e-05 | norm 1.3233 | dt 0.009 +type train | step 8570 | loss 1.3640 | lr 7.0e-05 | norm 1.4374 | dt 0.009 +type train | step 8580 | loss 1.2033 | lr 7.0e-05 | norm 1.2696 | dt 0.006 +type train | step 8590 | loss 1.3358 | lr 6.9e-05 | norm 1.3492 | dt 0.007 +type train | step 8600 | loss 1.3938 | lr 6.8e-05 | norm 1.3089 | dt 0.007 +type train | step 8610 | loss 1.3176 | lr 6.7e-05 | norm 1.0562 | dt 0.007 +type train | step 8620 | loss 1.2846 | lr 6.6e-05 | norm 1.2696 | dt 0.009 +type train | step 8630 | loss 1.3943 | lr 6.6e-05 | norm 1.2780 | dt 0.009 +type train | step 8640 | loss 1.3066 | lr 6.5e-05 | norm 1.2889 | dt 0.009 +type train | step 8650 | loss 1.4141 | lr 6.4e-05 | norm 1.4513 | dt 0.008 +type train | step 8660 | loss 1.2388 | lr 6.3e-05 | norm 1.1943 | dt 0.009 +type train | step 8670 | loss 1.2691 | lr 6.2e-05 | norm 1.1272 | dt 0.009 +type train | step 8680 | loss 1.1901 | lr 6.2e-05 | norm 1.2173 | dt 0.009 +type train | step 8690 | loss 1.3467 | lr 6.1e-05 | norm 1.2201 | dt 0.009 +type train | step 8700 | loss 1.2727 | lr 6.0e-05 | norm 1.1417 | dt 0.007 +type train | step 8710 | loss 1.4377 | lr 5.9e-05 | norm 1.4441 | dt 0.006 +type train | step 8720 | loss 1.2552 | lr 5.9e-05 | norm 1.1727 | dt 0.006 +type train | step 8730 | loss 1.2567 | lr 5.8e-05 | norm 1.2599 | dt 0.007 +type train | step 8740 | loss 1.2769 | lr 5.7e-05 | norm 1.3707 | dt 0.007 +type train | step 8750 | loss 1.2578 | lr 5.6e-05 | norm 1.0844 | dt 0.007 +type train | step 8760 | loss 1.3245 | lr 5.6e-05 | norm 1.2257 | dt 0.006 +type train | step 8770 | loss 1.3734 | lr 5.5e-05 | norm 1.1030 | dt 0.007 +type train | step 8780 | loss 1.2775 | lr 5.4e-05 | norm 1.0986 | dt 0.009 +type train | step 8790 | loss 1.2839 | lr 5.4e-05 | norm 1.2027 | dt 0.007 +type train | step 8800 | loss 1.2648 | lr 5.3e-05 | norm 1.2322 | dt 0.009 +type train | step 8810 | loss 1.2559 | lr 5.2e-05 | norm 1.1084 | dt 0.010 +type train | step 8820 | loss 1.2030 | lr 5.1e-05 | norm 1.1642 | dt 0.009 +type train | step 8830 | loss 1.3227 | lr 5.1e-05 | norm 1.1764 | dt 0.009 +type train | step 8840 | loss 1.3216 | lr 5.0e-05 | norm 1.3855 | dt 0.009 +type train | step 8850 | loss 1.2899 | lr 4.9e-05 | norm 1.2672 | dt 0.009 +type train | step 8860 | loss 1.2180 | lr 4.9e-05 | norm 1.2170 | dt 0.007 +type train | step 8870 | loss 1.2711 | lr 4.8e-05 | norm 1.2299 | dt 0.009 +type train | step 8880 | loss 1.3342 | lr 4.7e-05 | norm 1.1457 | dt 0.009 +type train | step 8890 | loss 1.3698 | lr 4.7e-05 | norm 1.1919 | dt 0.009 +type train | step 8900 | loss 1.3044 | lr 4.6e-05 | norm 1.1180 | dt 0.010 +type train | step 8910 | loss 1.2931 | lr 4.5e-05 | norm 1.2369 | dt 0.008 +type train | step 8920 | loss 1.1968 | lr 4.5e-05 | norm 1.1031 | dt 0.007 +type train | step 8930 | loss 1.2888 | lr 4.4e-05 | norm 1.1325 | dt 0.009 +type train | step 8940 | loss 1.3229 | lr 4.3e-05 | norm 1.2216 | dt 0.009 +type train | step 8950 | loss 1.3450 | lr 4.3e-05 | norm 1.0893 | dt 0.009 +type train | step 8960 | loss 1.2723 | lr 4.2e-05 | norm 1.1285 | dt 0.009 +type train | step 8970 | loss 1.3524 | lr 4.2e-05 | norm 1.2387 | dt 0.009 +type train | step 8980 | loss 1.2334 | lr 4.1e-05 | norm 1.3311 | dt 0.009 +type train | step 8990 | loss 1.2621 | lr 4.0e-05 | norm 1.2116 | dt 0.009 +type train | step 9000 | loss 1.2941 | lr 4.0e-05 | norm 1.1704 | dt 0.008 +type train | step 9010 | loss 1.2419 | lr 3.9e-05 | norm 1.1160 | dt 0.006 +type train | step 9020 | loss 1.3063 | lr 3.9e-05 | norm 1.1805 | dt 0.006 +type train | step 9030 | loss 1.3376 | lr 3.8e-05 | norm 1.3294 | dt 0.006 +type train | step 9040 | loss 1.2593 | lr 3.8e-05 | norm 1.4295 | dt 0.006 +type train | step 9050 | loss 1.3028 | lr 3.7e-05 | norm 1.1217 | dt 0.006 +type train | step 9060 | loss 1.3474 | lr 3.6e-05 | norm 1.3707 | dt 0.006 +type train | step 9070 | loss 1.2189 | lr 3.6e-05 | norm 1.1525 | dt 0.006 +type train | step 9080 | loss 1.2871 | lr 3.5e-05 | norm 1.2198 | dt 0.006 +type train | step 9090 | loss 1.3311 | lr 3.5e-05 | norm 1.2443 | dt 0.007 +type train | step 9100 | loss 1.2316 | lr 3.4e-05 | norm 1.2545 | dt 0.008 +type train | step 9110 | loss 1.2895 | lr 3.4e-05 | norm 1.1664 | dt 0.007 +type train | step 9120 | loss 1.3591 | lr 3.3e-05 | norm 1.2759 | dt 0.007 +type train | step 9130 | loss 1.2048 | lr 3.3e-05 | norm 1.0904 | dt 0.007 +type train | step 9140 | loss 1.3268 | lr 3.2e-05 | norm 1.1463 | dt 0.009 +type train | step 9150 | loss 1.3708 | lr 3.2e-05 | norm 1.3527 | dt 0.011 +type train | step 9160 | loss 1.2800 | lr 3.1e-05 | norm 1.1387 | dt 0.009 +type train | step 9170 | loss 1.2938 | lr 3.1e-05 | norm 1.1404 | dt 0.007 +type train | step 9180 | loss 1.3517 | lr 3.0e-05 | norm 1.1660 | dt 0.007 +type train | step 9190 | loss 1.1921 | lr 3.0e-05 | norm 1.0209 | dt 0.009 +type train | step 9200 | loss 1.3268 | lr 2.9e-05 | norm 1.1266 | dt 0.009 +type train | step 9210 | loss 1.3824 | lr 2.9e-05 | norm 1.2304 | dt 0.009 +type train | step 9220 | loss 1.3120 | lr 2.8e-05 | norm 1.0425 | dt 0.007 +type train | step 9230 | loss 1.2737 | lr 2.8e-05 | norm 1.1937 | dt 0.009 +type train | step 9240 | loss 1.3835 | lr 2.7e-05 | norm 1.1477 | dt 0.009 +type train | step 9250 | loss 1.2960 | lr 2.7e-05 | norm 0.9885 | dt 0.007 +type train | step 9260 | loss 1.4003 | lr 2.6e-05 | norm 1.1579 | dt 0.005 +type train | step 9270 | loss 1.2346 | lr 2.6e-05 | norm 1.0963 | dt 0.008 +type train | step 9280 | loss 1.2669 | lr 2.6e-05 | norm 1.1637 | dt 0.007 +type train | step 9290 | loss 1.1796 | lr 2.5e-05 | norm 1.1779 | dt 0.009 +type train | step 9300 | loss 1.3345 | lr 2.5e-05 | norm 1.1295 | dt 0.009 +type train | step 9310 | loss 1.2637 | lr 2.4e-05 | norm 1.0798 | dt 0.009 +type train | step 9320 | loss 1.4261 | lr 2.4e-05 | norm 1.2373 | dt 0.009 +type train | step 9330 | loss 1.2494 | lr 2.3e-05 | norm 1.1596 | dt 0.006 +type train | step 9340 | loss 1.2489 | lr 2.3e-05 | norm 1.1853 | dt 0.006 +type train | step 9350 | loss 1.2656 | lr 2.3e-05 | norm 1.1305 | dt 0.007 +type train | step 9360 | loss 1.2532 | lr 2.2e-05 | norm 1.0664 | dt 0.006 +type train | step 9370 | loss 1.3149 | lr 2.2e-05 | norm 1.1648 | dt 0.006 +type train | step 9380 | loss 1.3647 | lr 2.2e-05 | norm 1.2265 | dt 0.006 +type train | step 9390 | loss 1.2767 | lr 2.1e-05 | norm 1.3069 | dt 0.006 +type train | step 9400 | loss 1.2786 | lr 2.1e-05 | norm 1.1477 | dt 0.007 +type train | step 9410 | loss 1.2547 | lr 2.0e-05 | norm 1.0917 | dt 0.007 +type train | step 9420 | loss 1.2512 | lr 2.0e-05 | norm 1.0535 | dt 0.009 +type train | step 9430 | loss 1.1960 | lr 2.0e-05 | norm 1.1210 | dt 0.009 +type train | step 9440 | loss 1.3151 | lr 1.9e-05 | norm 1.2759 | dt 0.009 +type train | step 9450 | loss 1.3187 | lr 1.9e-05 | norm 1.5077 | dt 0.009 +type train | step 9460 | loss 1.2829 | lr 1.9e-05 | norm 1.2229 | dt 0.007 +type train | step 9470 | loss 1.2075 | lr 1.8e-05 | norm 1.1242 | dt 0.009 +type train | step 9480 | loss 1.2648 | lr 1.8e-05 | norm 1.0996 | dt 0.007 +type train | step 9490 | loss 1.3269 | lr 1.8e-05 | norm 1.1428 | dt 0.009 +type train | step 9500 | loss 1.3615 | lr 1.8e-05 | norm 1.2111 | dt 0.007 +type train | step 9510 | loss 1.2982 | lr 1.7e-05 | norm 1.1172 | dt 0.006 +type train | step 9520 | loss 1.2888 | lr 1.7e-05 | norm 1.1522 | dt 0.006 +type train | step 9530 | loss 1.1881 | lr 1.7e-05 | norm 1.0152 | dt 0.006 +type train | step 9540 | loss 1.2815 | lr 1.6e-05 | norm 1.0554 | dt 0.007 +type train | step 9550 | loss 1.3179 | lr 1.6e-05 | norm 1.2381 | dt 0.007 +type train | step 9560 | loss 1.3381 | lr 1.6e-05 | norm 1.0996 | dt 0.009 +type train | step 9570 | loss 1.2667 | lr 1.6e-05 | norm 1.1137 | dt 0.008 +type train | step 9580 | loss 1.3476 | lr 1.5e-05 | norm 1.0914 | dt 0.009 +type train | step 9590 | loss 1.2238 | lr 1.5e-05 | norm 1.0621 | dt 0.009 +type train | step 9600 | loss 1.2550 | lr 1.5e-05 | norm 1.0239 | dt 0.009 +type train | step 9610 | loss 1.2890 | lr 1.5e-05 | norm 1.1275 | dt 0.009 +type train | step 9620 | loss 1.2353 | lr 1.4e-05 | norm 0.9529 | dt 0.009 +type train | step 9630 | loss 1.3009 | lr 1.4e-05 | norm 1.0726 | dt 0.007 +type train | step 9640 | loss 1.3318 | lr 1.4e-05 | norm 1.0875 | dt 0.009 +type train | step 9650 | loss 1.2477 | lr 1.4e-05 | norm 1.0208 | dt 0.009 +type train | step 9660 | loss 1.2943 | lr 1.3e-05 | norm 1.0611 | dt 0.006 +type train | step 9670 | loss 1.3431 | lr 1.3e-05 | norm 1.2012 | dt 0.007 +type train | step 9680 | loss 1.2106 | lr 1.3e-05 | norm 0.9603 | dt 0.007 +type train | step 9690 | loss 1.2808 | lr 1.3e-05 | norm 1.0294 | dt 0.006 +type train | step 9700 | loss 1.3268 | lr 1.3e-05 | norm 1.1410 | dt 0.009 +type train | step 9710 | loss 1.2230 | lr 1.3e-05 | norm 1.0806 | dt 0.009 +type train | step 9720 | loss 1.2824 | lr 1.2e-05 | norm 1.1260 | dt 0.009 +type train | step 9730 | loss 1.3528 | lr 1.2e-05 | norm 1.2720 | dt 0.009 +type train | step 9740 | loss 1.1979 | lr 1.2e-05 | norm 0.9574 | dt 0.008 +type train | step 9750 | loss 1.3209 | lr 1.2e-05 | norm 1.0954 | dt 0.009 +type train | step 9760 | loss 1.3673 | lr 1.2e-05 | norm 1.2734 | dt 0.008 +type train | step 9770 | loss 1.2729 | lr 1.2e-05 | norm 1.0713 | dt 0.007 +type train | step 9780 | loss 1.2862 | lr 1.1e-05 | norm 1.0781 | dt 0.009 +type train | step 9790 | loss 1.3471 | lr 1.1e-05 | norm 1.1209 | dt 0.009 +type train | step 9800 | loss 1.1861 | lr 1.1e-05 | norm 0.9516 | dt 0.010 +type train | step 9810 | loss 1.3212 | lr 1.1e-05 | norm 1.1412 | dt 0.009 +type train | step 9820 | loss 1.3772 | lr 1.1e-05 | norm 1.1956 | dt 0.009 +type train | step 9830 | loss 1.3090 | lr 1.1e-05 | norm 1.0335 | dt 0.009 +type train | step 9840 | loss 1.2679 | lr 1.1e-05 | norm 1.1892 | dt 0.008 +type train | step 9850 | loss 1.3780 | lr 1.1e-05 | norm 1.0630 | dt 0.009 +type train | step 9860 | loss 1.2925 | lr 1.1e-05 | norm 0.9650 | dt 0.009 +type train | step 9870 | loss 1.3948 | lr 1.1e-05 | norm 1.1321 | dt 0.009 +type train | step 9880 | loss 1.2325 | lr 1.0e-05 | norm 1.0993 | dt 0.009 +type train | step 9890 | loss 1.2656 | lr 1.0e-05 | norm 1.1388 | dt 0.009 +type train | step 9900 | loss 1.1731 | lr 1.0e-05 | norm 1.1295 | dt 0.008 +type train | step 9910 | loss 1.3313 | lr 1.0e-05 | norm 1.1038 | dt 0.009 +type train | step 9920 | loss 1.2599 | lr 1.0e-05 | norm 1.0267 | dt 0.009 +type train | step 9930 | loss 1.4215 | lr 1.0e-05 | norm 1.2152 | dt 0.009 +type train | step 9940 | loss 1.2464 | lr 1.0e-05 | norm 1.1242 | dt 0.009 +type train | step 9950 | loss 1.2469 | lr 1.0e-05 | norm 1.0894 | dt 0.006 +type train | step 9960 | loss 1.2604 | lr 1.0e-05 | norm 1.1202 | dt 0.007 +type train | step 9970 | loss 1.2496 | lr 1.0e-05 | norm 1.0175 | dt 0.009 +type train | step 9980 | loss 1.3115 | lr 1.0e-05 | norm 1.1173 | dt 0.007 +type train | step 9990 | loss 1.3611 | lr 1.0e-05 | norm 1.1596 | dt 0.006 +type train | step 10000 | loss 1.2758 | lr 1.0e-05 | norm 1.2685 | dt 0.006