davidquarel's picture
Upload folder using huggingface_hub
3bd6249 verified
type train | step 10 | loss 1320.2253 1502.9182 1771.0946 3083.1682 | lr 1.3e-05 | norm 840.4684 | dt 0.132
type train | step 20 | loss 1316.6228 1496.8206 1767.1798 3094.8828 | lr 2.7e-05 | norm 867.1016 | dt 0.133
type train | step 30 | loss 1309.4080 1482.4259 1765.0361 3055.5767 | lr 4.0e-05 | norm 856.4797 | dt 0.133
type train | step 40 | loss 1297.1993 1480.5312 1761.2125 3030.2134 | lr 5.3e-05 | norm 792.5460 | dt 0.133
type train | step 50 | loss 1283.3635 1469.2888 1740.7783 2997.4531 | lr 6.7e-05 | norm 820.9495 | dt 0.133
type train | step 60 | loss 1267.4468 1451.0823 1721.3135 2943.7986 | lr 8.0e-05 | norm 802.2338 | dt 0.134
type train | step 70 | loss 1246.2009 1434.2174 1706.3185 2885.8159 | lr 9.3e-05 | norm 779.2521 | dt 0.135
type train | step 80 | loss 1225.7589 1413.9950 1684.5330 2836.9019 | lr 1.1e-04 | norm 722.7059 | dt 0.135
type train | step 90 | loss 1202.2896 1386.0873 1663.0564 2771.9868 | lr 1.2e-04 | norm 728.9855 | dt 0.137
type train | step 100 | loss 1178.0363 1358.7917 1629.8915 2696.4233 | lr 1.3e-04 | norm 726.1010 | dt 0.137
type train | step 110 | loss 1144.7585 1335.9614 1601.9222 2614.4392 | lr 1.5e-04 | norm 658.9268 | dt 0.137
type train | step 120 | loss 1114.3221 1307.7828 1575.3301 2541.5227 | lr 1.6e-04 | norm 603.2202 | dt 0.143
type train | step 130 | loss 1080.6958 1275.8517 1543.7444 2449.9573 | lr 1.7e-04 | norm 579.1041 | dt 0.147
type train | step 140 | loss 1048.5308 1248.1492 1509.7574 2387.0164 | lr 1.9e-04 | norm 545.3434 | dt 0.140
type train | step 150 | loss 1010.9284 1202.0996 1469.0042 2304.4629 | lr 2.0e-04 | norm 524.5732 | dt 0.141
type train | step 160 | loss 975.2828 1164.6349 1428.4120 2212.7756 | lr 2.1e-04 | norm 505.2141 | dt 0.144
type train | step 170 | loss 934.2282 1126.6594 1382.5396 2118.8074 | lr 2.3e-04 | norm 473.6605 | dt 0.150
type train | step 180 | loss 894.9310 1092.2067 1349.3146 2043.7264 | lr 2.4e-04 | norm 411.6969 | dt 0.157
type train | step 190 | loss 854.3752 1052.1769 1310.4492 1955.6780 | lr 2.5e-04 | norm 384.6513 | dt 0.139
type train | step 200 | loss 816.5170 1011.7614 1265.9604 1884.4451 | lr 2.7e-04 | norm 357.3604 | dt 0.140
type train | step 210 | loss 776.3078 966.5737 1219.1714 1800.9819 | lr 2.8e-04 | norm 333.0495 | dt 0.160
type train | step 220 | loss 732.9691 924.1536 1169.9020 1711.7355 | lr 2.9e-04 | norm 310.3963 | dt 0.192
type train | step 230 | loss 690.9241 883.7311 1122.0420 1628.8202 | lr 3.1e-04 | norm 284.5296 | dt 0.172
type train | step 240 | loss 656.6611 847.6417 1080.3557 1573.5109 | lr 3.2e-04 | norm 258.8046 | dt 0.159
type train | step 250 | loss 619.9534 804.8060 1034.2931 1488.8634 | lr 3.3e-04 | norm 242.8527 | dt 0.153
type train | step 260 | loss 584.0156 763.9349 984.8782 1421.2786 | lr 3.5e-04 | norm 228.6184 | dt 0.143
type train | step 270 | loss 552.5906 722.9574 943.6636 1350.3412 | lr 3.6e-04 | norm 215.7748 | dt 0.148
type train | step 280 | loss 521.1824 683.7253 901.7806 1289.6484 | lr 3.7e-04 | norm 204.4176 | dt 0.185
type train | step 290 | loss 486.3296 645.4580 851.0154 1213.7518 | lr 3.9e-04 | norm 195.5881 | dt 0.180
type train | step 300 | loss 457.3102 608.7300 808.8862 1156.9468 | lr 4.0e-04 | norm 187.3985 | dt 0.176
type train | step 310 | loss 430.0278 570.8319 763.5021 1088.7448 | lr 4.1e-04 | norm 179.4177 | dt 0.151
type train | step 320 | loss 397.3384 528.4166 718.6334 1024.9802 | lr 4.3e-04 | norm 171.0130 | dt 0.146
type train | step 330 | loss 364.5565 491.0250 676.6274 962.5702 | lr 4.4e-04 | norm 162.5324 | dt 0.140
type train | step 340 | loss 333.7234 453.1100 628.7938 894.1548 | lr 4.5e-04 | norm 155.5582 | dt 0.148
type train | step 350 | loss 307.8714 421.1644 587.6727 835.3716 | lr 4.7e-04 | norm 149.0188 | dt 0.180
type train | step 360 | loss 282.8508 384.7386 540.7574 774.8018 | lr 4.8e-04 | norm 143.2141 | dt 0.171
type train | step 370 | loss 258.4402 355.0417 506.4164 720.1702 | lr 4.9e-04 | norm 136.9152 | dt 0.152
type train | step 380 | loss 242.2844 327.0884 469.1102 674.2409 | lr 5.1e-04 | norm 130.5077 | dt 0.179
type train | step 390 | loss 218.6507 294.9779 423.5523 614.8553 | lr 5.2e-04 | norm 126.0921 | dt 0.157
type train | step 400 | loss 198.8243 268.8935 387.6918 559.6448 | lr 5.3e-04 | norm 120.3461 | dt 0.150
type train | step 410 | loss 182.5685 245.7043 354.3278 515.0153 | lr 5.5e-04 | norm 115.1511 | dt 0.157
type train | step 420 | loss 165.2550 223.6905 321.9675 471.0817 | lr 5.6e-04 | norm 111.3739 | dt 0.150
type train | step 430 | loss 149.2910 202.8458 291.2012 426.1655 | lr 5.7e-04 | norm 105.9708 | dt 0.151
type train | step 440 | loss 138.3564 179.8171 260.6377 384.8161 | lr 5.9e-04 | norm 101.1733 | dt 0.157
type train | step 450 | loss 127.5833 162.7137 234.5510 346.9204 | lr 6.0e-04 | norm 96.8927 | dt 0.191
type train | step 460 | loss 115.0511 142.6408 207.1729 305.2677 | lr 6.1e-04 | norm 93.6141 | dt 0.196
type train | step 470 | loss 108.3356 129.2363 185.3311 277.5768 | lr 6.3e-04 | norm 89.4883 | dt 0.153
type train | step 480 | loss 97.6433 115.1685 163.4894 245.4520 | lr 6.4e-04 | norm 85.0024 | dt 0.163
type train | step 490 | loss 89.5339 101.2975 144.1732 214.8685 | lr 6.5e-04 | norm 81.3401 | dt 0.194
type train | step 500 | loss 83.5054 88.8200 125.8653 189.2982 | lr 6.7e-04 | norm 77.8917 | dt 0.150
type train | step 510 | loss 76.9048 77.8901 109.4813 165.1478 | lr 6.8e-04 | norm 75.0698 | dt 0.196
type train | step 520 | loss 71.2141 67.8847 94.9093 142.6285 | lr 6.9e-04 | norm 72.5409 | dt 0.157
type train | step 530 | loss 68.5977 59.6386 81.9658 125.4353 | lr 7.1e-04 | norm 69.9112 | dt 0.161
type train | step 540 | loss 61.9092 51.2871 68.9178 105.6938 | lr 7.2e-04 | norm 67.6620 | dt 0.196
type train | step 550 | loss 58.7523 44.5095 58.7857 90.7479 | lr 7.3e-04 | norm 66.5654 | dt 0.155
type train | step 560 | loss 59.0204 38.5475 49.4686 78.0673 | lr 7.5e-04 | norm 63.5503 | dt 0.197
type train | step 570 | loss 55.6687 32.5859 41.3201 66.0216 | lr 7.6e-04 | norm 62.2245 | dt 0.191
type train | step 580 | loss 52.6691 27.4599 33.7413 54.8109 | lr 7.7e-04 | norm 60.3430 | dt 0.149
type train | step 590 | loss 50.2671 23.4337 28.1635 46.5649 | lr 7.9e-04 | norm 58.5975 | dt 0.186
type train | step 600 | loss 46.5133 19.4949 22.5985 38.5175 | lr 8.0e-04 | norm 58.7003 | dt 0.166
type train | step 610 | loss 45.3092 16.5107 18.6303 32.5469 | lr 8.1e-04 | norm 56.2705 | dt 0.184
type train | step 620 | loss 43.9679 13.8751 15.1348 27.4898 | lr 8.3e-04 | norm 55.3913 | dt 0.198
type train | step 630 | loss 40.6836 11.6813 12.3691 23.4757 | lr 8.4e-04 | norm 54.2653 | dt 0.187
type train | step 640 | loss 37.5272 9.7539 10.0211 19.5500 | lr 8.5e-04 | norm 53.5076 | dt 0.166
type train | step 650 | loss 37.3680 8.5497 8.6085 17.5810 | lr 8.7e-04 | norm 53.3418 | dt 0.147
type train | step 660 | loss 33.8876 7.2439 7.0223 14.6128 | lr 8.8e-04 | norm 53.7624 | dt 0.157
type train | step 670 | loss 31.2657 6.2562 6.1425 12.9678 | lr 8.9e-04 | norm 57.0374 | dt 0.153
type train | step 680 | loss 30.1845 5.6714 5.5937 11.8980 | lr 9.1e-04 | norm 62.0665 | dt 0.192
type train | step 690 | loss 29.7580 5.2926 5.3957 11.4722 | lr 9.2e-04 | norm 60.7099 | dt 0.167
type train | step 700 | loss 28.9511 4.7657 4.9968 10.3492 | lr 9.3e-04 | norm 69.3181 | dt 0.169
type train | step 710 | loss 28.1053 4.3500 4.7448 9.7171 | lr 9.5e-04 | norm 75.9529 | dt 0.176
type train | step 720 | loss 28.6441 4.1618 4.6086 9.3183 | lr 9.6e-04 | norm 77.2227 | dt 0.176
type train | step 730 | loss 27.5737 4.0064 4.6014 9.1501 | lr 9.7e-04 | norm 71.0415 | dt 0.181
type train | step 740 | loss 25.1598 3.7427 4.5218 8.7850 | lr 9.9e-04 | norm 82.6971 | dt 0.149
type train | step 750 | loss 23.7024 3.7569 4.6004 8.7689 | lr 1.0e-03 | norm 88.1695 | dt 0.152
type train | step 760 | loss 21.2056 3.4846 4.4564 8.2779 | lr 1.0e-03 | norm 95.8795 | dt 0.157
type train | step 770 | loss 19.7097 3.3445 4.4588 8.1136 | lr 1.0e-03 | norm 94.2516 | dt 0.156
type train | step 780 | loss 18.3559 3.2146 4.4472 7.7447 | lr 1.0e-03 | norm 83.8448 | dt 0.201
type train | step 790 | loss 17.9053 3.2193 4.5961 7.7128 | lr 1.0e-03 | norm 84.2247 | dt 0.166
type train | step 800 | loss 16.3454 3.0836 4.6026 7.6407 | lr 1.0e-03 | norm 93.0282 | dt 0.155
type train | step 810 | loss 16.5411 3.0767 4.6898 7.5493 | lr 1.0e-03 | norm 86.9664 | dt 0.152
type train | step 820 | loss 15.6692 2.9641 4.6683 7.3185 | lr 1.0e-03 | norm 94.1708 | dt 0.170
type train | step 830 | loss 14.8360 2.8809 4.6812 7.0809 | lr 1.0e-03 | norm 92.6620 | dt 0.158
type train | step 840 | loss 14.2661 2.7860 4.6028 6.8376 | lr 1.0e-03 | norm 86.2398 | dt 0.171
type train | step 850 | loss 14.6407 2.8337 4.6508 6.9023 | lr 1.0e-03 | norm 87.2597 | dt 0.180
type train | step 860 | loss 13.7568 2.7215 4.4755 6.6842 | lr 1.0e-03 | norm 78.9567 | dt 0.179
type train | step 870 | loss 13.2577 2.7102 4.4753 6.6327 | lr 1.0e-03 | norm 82.7921 | dt 0.164
type train | step 880 | loss 12.8180 2.6551 4.4588 6.5403 | lr 1.0e-03 | norm 88.9640 | dt 0.160
type train | step 890 | loss 12.4348 2.6274 4.4375 6.5651 | lr 1.0e-03 | norm 84.2991 | dt 0.203
type train | step 900 | loss 11.9811 2.5507 4.3204 6.2886 | lr 1.0e-03 | norm 85.7138 | dt 0.188
type train | step 910 | loss 11.4917 2.5821 4.3271 6.2931 | lr 1.0e-03 | norm 82.4116 | dt 0.186
type train | step 920 | loss 11.0457 2.5053 4.4223 6.2918 | lr 1.0e-03 | norm 77.8140 | dt 0.171
type train | step 930 | loss 10.7232 2.4786 4.3179 6.2458 | lr 1.0e-03 | norm 83.9257 | dt 0.152
type train | step 940 | loss 10.3272 2.4585 4.3315 6.1666 | lr 1.0e-03 | norm 78.2649 | dt 0.188
type train | step 950 | loss 9.9275 2.3899 4.2554 6.0579 | lr 1.0e-03 | norm 75.4176 | dt 0.196
type train | step 960 | loss 10.2118 2.3704 4.2016 6.0965 | lr 1.0e-03 | norm 76.8385 | dt 0.184
type train | step 970 | loss 10.0047 2.3217 4.2605 6.0103 | lr 1.0e-03 | norm 72.8160 | dt 0.178
type train | step 980 | loss 9.6193 2.3200 4.2391 6.2468 | lr 1.0e-03 | norm 74.5429 | dt 0.176
type train | step 990 | loss 9.8447 2.3482 4.3270 6.4315 | lr 1.0e-03 | norm 74.4055 | dt 0.169
type train | step 1000 | loss 9.3056 2.2764 4.0805 6.4099 | lr 1.0e-03 | norm 80.3796 | dt 0.204
type train | step 1010 | loss 9.2649 2.2324 4.1537 6.4757 | lr 1.0e-03 | norm 73.9830 | dt 0.183
type train | step 1020 | loss 8.9467 2.2430 4.1023 6.8121 | lr 1.0e-03 | norm 75.6499 | dt 0.158
type train | step 1030 | loss 8.9818 2.2240 4.1666 6.8917 | lr 1.0e-03 | norm 71.5356 | dt 0.162
type train | step 1040 | loss 8.6274 2.2106 4.1520 7.0638 | lr 1.0e-03 | norm 70.0460 | dt 0.179
type train | step 1050 | loss 8.3245 2.1664 4.1775 7.2128 | lr 1.0e-03 | norm 73.4728 | dt 0.163
type train | step 1060 | loss 8.4780 2.1746 4.1118 7.4762 | lr 1.0e-03 | norm 67.4786 | dt 0.160
type train | step 1070 | loss 7.9264 2.0892 4.0493 7.4245 | lr 1.0e-03 | norm 67.4219 | dt 0.217
type train | step 1080 | loss 7.7876 2.1385 4.1468 7.6914 | lr 1.0e-03 | norm 71.9008 | dt 0.196
type train | step 1090 | loss 7.3967 2.1139 4.1209 7.8819 | lr 1.0e-03 | norm 75.5280 | dt 0.195
type train | step 1100 | loss 7.2906 2.0851 4.0826 7.8904 | lr 1.0e-03 | norm 75.5712 | dt 0.173
type train | step 1110 | loss 7.0802 2.0853 4.1291 7.9610 | lr 1.0e-03 | norm 74.7643 | dt 0.172
type train | step 1120 | loss 6.7757 2.0735 4.0197 8.0101 | lr 1.0e-03 | norm 76.9493 | dt 0.155
type train | step 1130 | loss 6.6179 2.0445 4.0774 7.9676 | lr 1.0e-03 | norm 76.5672 | dt 0.188
type train | step 1140 | loss 6.7103 2.0803 4.0978 8.1409 | lr 1.0e-03 | norm 76.5205 | dt 0.183
type train | step 1150 | loss 6.3539 2.0167 4.0399 7.9274 | lr 1.0e-03 | norm 74.4758 | dt 0.171
type train | step 1160 | loss 6.3398 2.0446 4.1171 8.0588 | lr 1.0e-03 | norm 71.8223 | dt 0.181
type train | step 1170 | loss 6.6644 2.0363 4.0800 8.0883 | lr 1.0e-03 | norm 71.5508 | dt 0.167
type train | step 1180 | loss 6.9388 2.0076 4.0265 8.0006 | lr 1.0e-03 | norm 73.9989 | dt 0.177
type train | step 1190 | loss 7.0882 1.9765 4.0299 7.9142 | lr 1.0e-03 | norm 71.5210 | dt 0.230
type train | step 1200 | loss 6.6548 1.9848 4.0114 7.9808 | lr 1.0e-03 | norm 76.2073 | dt 0.195
type train | step 1210 | loss 6.1830 1.9146 3.9535 7.8954 | lr 1.0e-03 | norm 73.8941 | dt 0.185
type train | step 1220 | loss 5.9574 1.9504 4.0284 7.9777 | lr 1.0e-03 | norm 73.7701 | dt 0.204
type train | step 1230 | loss 5.8244 1.9258 3.9826 7.9947 | lr 1.0e-03 | norm 77.3456 | dt 0.165
type train | step 1240 | loss 5.6771 1.9279 3.9964 7.9938 | lr 1.0e-03 | norm 80.0859 | dt 0.164
type train | step 1250 | loss 5.6063 1.8676 4.0286 7.9143 | lr 1.0e-03 | norm 76.8570 | dt 0.205
type train | step 1260 | loss 5.6992 1.9472 4.0897 8.1078 | lr 1.0e-03 | norm 76.3541 | dt 0.178
type train | step 1270 | loss 5.4798 1.8582 3.9678 7.8580 | lr 1.0e-03 | norm 71.4789 | dt 0.199
type train | step 1280 | loss 5.3986 1.8186 3.9701 7.8318 | lr 1.0e-03 | norm 78.3634 | dt 0.184
type train | step 1290 | loss 5.3516 1.8324 3.9700 8.0051 | lr 1.0e-03 | norm 74.3397 | dt 0.179
type train | step 1300 | loss 5.3582 1.8950 4.0055 8.0550 | lr 1.0e-03 | norm 72.0760 | dt 0.161
type train | step 1310 | loss 5.1901 1.8248 3.9816 7.8916 | lr 1.0e-03 | norm 77.2070 | dt 0.173
type train | step 1320 | loss 5.0725 1.7994 3.9177 7.8158 | lr 1.0e-03 | norm 73.3615 | dt 0.167
type train | step 1330 | loss 5.0532 1.8300 3.9557 7.9146 | lr 1.0e-03 | norm 70.4003 | dt 0.197
type train | step 1340 | loss 5.0584 1.8818 4.0105 7.8929 | lr 1.0e-03 | norm 76.5596 | dt 0.194
type train | step 1350 | loss 5.0549 1.8213 4.0169 7.8435 | lr 1.0e-03 | norm 75.8972 | dt 0.164
type train | step 1360 | loss 5.3018 1.9013 4.1122 8.0537 | lr 1.0e-03 | norm 66.9861 | dt 0.174
type train | step 1370 | loss 5.1180 1.8222 3.9579 7.8822 | lr 1.0e-03 | norm 75.4231 | dt 0.164
type train | step 1380 | loss 5.0551 1.8070 3.9908 7.6327 | lr 1.0e-03 | norm 80.6676 | dt 0.193
type train | step 1390 | loss 5.0083 1.7891 3.9478 7.4948 | lr 1.0e-03 | norm 74.9253 | dt 0.194
type train | step 1400 | loss 5.3643 1.8241 4.0334 7.6177 | lr 1.0e-03 | norm 76.7291 | dt 0.156
type train | step 1410 | loss 5.5118 1.7893 3.9959 7.7962 | lr 1.0e-03 | norm 76.4338 | dt 0.180
type train | step 1420 | loss 5.4057 1.8341 3.9971 7.8583 | lr 1.0e-03 | norm 73.8784 | dt 0.165
type train | step 1430 | loss 5.4951 1.8014 3.9885 7.5928 | lr 1.0e-03 | norm 72.8829 | dt 0.201
type train | step 1440 | loss 5.9165 1.7825 3.9723 7.6810 | lr 1.0e-03 | norm 78.5968 | dt 0.169
type train | step 1450 | loss 6.2880 1.7681 3.9230 7.4555 | lr 1.0e-03 | norm 74.9337 | dt 0.196
type train | step 1460 | loss 7.3407 1.8303 4.0014 7.6382 | lr 1.0e-03 | norm 78.7248 | dt 0.163
type train | step 1470 | loss 8.4665 1.7635 3.9250 7.4357 | lr 1.0e-03 | norm 81.0944 | dt 0.164
type train | step 1480 | loss 9.4273 1.7815 3.9343 7.3527 | lr 1.0e-03 | norm 83.5767 | dt 0.163
type train | step 1490 | loss 11.0276 1.7654 3.9586 7.4441 | lr 1.0e-03 | norm 88.9874 | dt 0.203
type train | step 1500 | loss 12.0830 1.7929 4.0074 7.6842 | lr 1.0e-03 | norm 86.0503 | dt 0.218
type train | step 1510 | loss 12.6026 1.7530 3.9437 7.3725 | lr 1.0e-03 | norm 92.9277 | dt 0.190
type train | step 1520 | loss 12.9514 1.7992 3.9553 7.4208 | lr 1.0e-03 | norm 93.7651 | dt 0.186
type train | step 1530 | loss 12.9382 1.7721 3.9506 7.5145 | lr 1.0e-03 | norm 94.6094 | dt 0.183
type train | step 1540 | loss 12.8349 1.7573 3.9522 7.4268 | lr 1.0e-03 | norm 96.6174 | dt 0.188
type train | step 1550 | loss 12.7506 1.7686 3.9511 7.3505 | lr 1.0e-03 | norm 100.1117 | dt 0.158
type train | step 1560 | loss 13.0773 1.7407 3.8984 7.1241 | lr 1.0e-03 | norm 95.1722 | dt 0.166
type train | step 1570 | loss 13.2565 1.7629 3.9151 7.2759 | lr 1.0e-03 | norm 88.7713 | dt 0.167
type train | step 1580 | loss 12.9432 1.7292 3.9150 7.2170 | lr 1.0e-03 | norm 85.1903 | dt 0.192
type train | step 1590 | loss 12.0975 1.7671 3.9923 7.3159 | lr 1.0e-03 | norm 84.7412 | dt 0.187
type train | step 1600 | loss 11.7498 1.8091 3.9828 7.4920 | lr 1.0e-03 | norm 78.7969 | dt 0.197
type train | step 1610 | loss 10.8926 1.7572 3.9135 7.0185 | lr 1.0e-03 | norm 78.7942 | dt 0.192
type train | step 1620 | loss 10.2812 1.7311 3.9142 7.0827 | lr 1.0e-03 | norm 83.4230 | dt 0.178
type train | step 1630 | loss 10.3517 1.7798 3.9461 7.2021 | lr 1.0e-03 | norm 70.3606 | dt 0.190
type train | step 1640 | loss 9.8163 1.7639 3.9797 7.1733 | lr 1.0e-03 | norm 70.8294 | dt 0.167
type train | step 1650 | loss 9.5033 1.7654 3.9991 7.1761 | lr 1.0e-03 | norm 75.7549 | dt 0.189
type train | step 1660 | loss 9.3983 1.7328 3.9354 7.1561 | lr 1.0e-03 | norm 72.1245 | dt 0.161
type train | step 1670 | loss 9.1144 1.7759 3.9559 7.1087 | lr 9.9e-04 | norm 77.8445 | dt 0.176
type train | step 1680 | loss 8.8263 1.6961 3.9236 7.0421 | lr 9.9e-04 | norm 71.6992 | dt 0.182
type train | step 1690 | loss 8.7954 1.7666 3.9620 7.1124 | lr 9.9e-04 | norm 67.3908 | dt 0.206
type train | step 1700 | loss 8.8282 1.7656 3.9497 6.9469 | lr 9.9e-04 | norm 72.9541 | dt 0.173
type train | step 1710 | loss 8.7708 1.7347 3.9292 7.0490 | lr 9.9e-04 | norm 72.2253 | dt 0.174
type train | step 1720 | loss 8.6782 1.7436 3.9877 6.9369 | lr 9.9e-04 | norm 67.8810 | dt 0.196
type train | step 1730 | loss 8.1882 1.7437 3.9228 6.8573 | lr 9.9e-04 | norm 72.1098 | dt 0.183
type train | step 1740 | loss 8.1440 1.7119 3.9485 6.8623 | lr 9.9e-04 | norm 71.2142 | dt 0.158
type train | step 1750 | loss 7.7552 1.7787 3.9448 6.9427 | lr 9.9e-04 | norm 72.3419 | dt 0.155
type train | step 1760 | loss 7.3530 1.7234 3.8773 6.8856 | lr 9.9e-04 | norm 68.3598 | dt 0.191
type train | step 1770 | loss 7.2347 1.7520 3.9535 6.9189 | lr 9.9e-04 | norm 73.8294 | dt 0.162
type train | step 1780 | loss 7.1107 1.7570 3.9680 7.0882 | lr 9.9e-04 | norm 73.6365 | dt 0.194
type train | step 1790 | loss 7.4138 1.7373 3.9072 6.9340 | lr 9.9e-04 | norm 73.9288 | dt 0.189
type train | step 1800 | loss 7.6029 1.6845 3.9521 6.9506 | lr 9.9e-04 | norm 67.0713 | dt 0.204
type train | step 1810 | loss 7.1719 1.7141 3.9381 6.9522 | lr 9.9e-04 | norm 71.7130 | dt 0.203
type train | step 1820 | loss 7.0373 1.6942 3.8942 6.8466 | lr 9.9e-04 | norm 74.2865 | dt 0.160
type train | step 1830 | loss 6.8353 1.7092 3.9421 6.8794 | lr 9.9e-04 | norm 69.6604 | dt 0.158
type train | step 1840 | loss 6.6993 1.7182 3.8908 6.9347 | lr 9.9e-04 | norm 65.6588 | dt 0.162
type train | step 1850 | loss 6.5710 1.7347 3.8985 6.8503 | lr 9.9e-04 | norm 67.0954 | dt 0.202
type train | step 1860 | loss 6.5464 1.6840 3.9182 6.6887 | lr 9.9e-04 | norm 69.3347 | dt 0.165
type train | step 1870 | loss 6.3942 1.7691 3.9901 7.0690 | lr 9.9e-04 | norm 68.9871 | dt 0.175
type train | step 1880 | loss 6.1949 1.7216 3.8716 6.7607 | lr 9.9e-04 | norm 68.1721 | dt 0.165
type train | step 1890 | loss 6.1077 1.7007 3.8903 6.7980 | lr 9.9e-04 | norm 63.3493 | dt 0.209
type train | step 1900 | loss 6.0549 1.6975 3.9129 6.8290 | lr 9.9e-04 | norm 67.6420 | dt 0.198
type train | step 1910 | loss 6.1362 1.7509 3.9094 6.8690 | lr 9.9e-04 | norm 67.4937 | dt 0.176
type train | step 1920 | loss 6.0399 1.7008 3.9075 6.8029 | lr 9.9e-04 | norm 64.8538 | dt 0.164
type train | step 1930 | loss 5.7709 1.6874 3.8957 6.7452 | lr 9.9e-04 | norm 64.5098 | dt 0.171
type train | step 1940 | loss 5.7748 1.7039 3.9025 6.7338 | lr 9.9e-04 | norm 65.2856 | dt 0.199
type train | step 1950 | loss 5.7496 1.7183 3.9419 6.8178 | lr 9.9e-04 | norm 62.9174 | dt 0.163
type train | step 1960 | loss 5.6787 1.6897 3.9397 6.8739 | lr 9.9e-04 | norm 67.7576 | dt 0.162
type train | step 1970 | loss 5.9114 1.7578 3.9930 6.8944 | lr 9.9e-04 | norm 61.9076 | dt 0.204
type train | step 1980 | loss 5.5830 1.7378 3.9412 6.8442 | lr 9.9e-04 | norm 67.4183 | dt 0.196
type train | step 1990 | loss 5.4819 1.7166 3.8875 6.8666 | lr 9.9e-04 | norm 65.5253 | dt 0.171
type train | step 2000 | loss 5.2911 1.6907 3.8433 6.7477 | lr 9.9e-04 | norm 67.7559 | dt 0.182
type train | step 2010 | loss 5.3089 1.7390 3.9026 6.9214 | lr 9.9e-04 | norm 67.5911 | dt 0.189
type train | step 2020 | loss 5.1900 1.7128 3.9026 6.8425 | lr 9.9e-04 | norm 65.6750 | dt 0.195
type train | step 2030 | loss 5.1666 1.7437 3.9264 6.8470 | lr 9.9e-04 | norm 71.0682 | dt 0.162
type train | step 2040 | loss 5.2880 1.7281 3.8769 6.8681 | lr 9.9e-04 | norm 68.6873 | dt 0.192
type train | step 2050 | loss 5.2966 1.7213 3.9424 6.8420 | lr 9.9e-04 | norm 66.2180 | dt 0.193
type train | step 2060 | loss 5.0721 1.6914 3.8662 6.7075 | lr 9.9e-04 | norm 63.0852 | dt 0.191
type train | step 2070 | loss 4.9610 1.7622 3.9379 6.9481 | lr 9.9e-04 | norm 67.3108 | dt 0.168
type train | step 2080 | loss 4.8205 1.7104 3.9183 6.8261 | lr 9.9e-04 | norm 61.2563 | dt 0.159
type train | step 2090 | loss 4.7070 1.7085 3.8805 6.9313 | lr 9.9e-04 | norm 71.3104 | dt 0.181
type train | step 2100 | loss 4.6808 1.7142 3.8758 6.8280 | lr 9.9e-04 | norm 61.2834 | dt 0.166
type train | step 2110 | loss 4.6770 1.7291 3.9299 6.9415 | lr 9.9e-04 | norm 66.7870 | dt 0.199
type train | step 2120 | loss 4.6265 1.6981 3.8523 6.7986 | lr 9.9e-04 | norm 63.4636 | dt 0.168
type train | step 2130 | loss 4.6748 1.7673 3.9037 6.8504 | lr 9.9e-04 | norm 65.6828 | dt 0.194
type train | step 2140 | loss 4.6761 1.7410 3.9460 6.9466 | lr 9.9e-04 | norm 64.7897 | dt 0.181
type train | step 2150 | loss 4.7862 1.7194 3.8896 6.9181 | lr 9.9e-04 | norm 66.5666 | dt 0.163
type train | step 2160 | loss 4.7068 1.7455 3.8562 6.8578 | lr 9.9e-04 | norm 70.3492 | dt 0.160
type train | step 2170 | loss 4.5013 1.7238 3.8934 6.8367 | lr 9.9e-04 | norm 69.1926 | dt 0.166
type train | step 2180 | loss 4.7533 1.7045 3.8597 6.8537 | lr 9.9e-04 | norm 69.0381 | dt 0.196
type train | step 2190 | loss 4.5818 1.6929 3.8463 6.9080 | lr 9.9e-04 | norm 70.6229 | dt 0.205
type train | step 2200 | loss 4.5609 1.7211 3.9128 6.9921 | lr 9.9e-04 | norm 66.0424 | dt 0.199
type train | step 2210 | loss 4.5902 1.7591 3.9970 7.1493 | lr 9.9e-04 | norm 68.2975 | dt 0.205
type train | step 2220 | loss 4.4477 1.7297 3.8445 6.8635 | lr 9.9e-04 | norm 68.0518 | dt 0.184
type train | step 2230 | loss 4.5251 1.7045 3.8967 6.7774 | lr 9.9e-04 | norm 68.6853 | dt 0.189
type train | step 2240 | loss 4.4732 1.7195 3.9118 7.0856 | lr 9.9e-04 | norm 66.9725 | dt 0.205
type train | step 2250 | loss 4.4833 1.7051 3.9622 6.9289 | lr 9.9e-04 | norm 69.7550 | dt 0.172
type train | step 2260 | loss 4.4970 1.7101 3.9224 6.8961 | lr 9.9e-04 | norm 66.3252 | dt 0.174
type train | step 2270 | loss 4.4353 1.6986 3.9165 7.0220 | lr 9.9e-04 | norm 64.4829 | dt 0.200
type train | step 2280 | loss 4.4692 1.7120 3.8860 6.9144 | lr 9.9e-04 | norm 67.4327 | dt 0.178
type train | step 2290 | loss 4.3630 1.6542 3.8328 6.7013 | lr 9.9e-04 | norm 69.9455 | dt 0.204
type train | step 2300 | loss 4.4204 1.7110 3.9130 7.0319 | lr 9.9e-04 | norm 67.4515 | dt 0.177
type train | step 2310 | loss 4.3857 1.7249 3.8913 6.8266 | lr 9.9e-04 | norm 71.6241 | dt 0.197
type train | step 2320 | loss 4.4430 1.6986 3.8789 6.8915 | lr 9.9e-04 | norm 67.4626 | dt 0.161
type train | step 2330 | loss 4.4479 1.7150 3.9063 6.8882 | lr 9.9e-04 | norm 67.7565 | dt 0.171
type train | step 2340 | loss 4.5337 1.7113 3.8305 6.8334 | lr 9.8e-04 | norm 70.6558 | dt 0.193
type train | step 2350 | loss 4.5173 1.6921 3.8638 6.8554 | lr 9.8e-04 | norm 65.7785 | dt 0.194
type train | step 2360 | loss 4.6534 1.7133 3.8915 6.9185 | lr 9.8e-04 | norm 66.5698 | dt 0.164
type train | step 2370 | loss 4.5486 1.6877 3.8473 6.8372 | lr 9.8e-04 | norm 68.1337 | dt 0.169
type train | step 2380 | loss 4.4895 1.7214 3.9290 6.8774 | lr 9.8e-04 | norm 69.4748 | dt 0.192
type train | step 2390 | loss 4.6171 1.7099 3.8940 6.9440 | lr 9.8e-04 | norm 69.7083 | dt 0.184
type train | step 2400 | loss 4.4825 1.6950 3.8598 7.0236 | lr 9.8e-04 | norm 67.4639 | dt 0.208
type train | step 2410 | loss 4.6265 1.6640 3.8633 6.8285 | lr 9.8e-04 | norm 69.6526 | dt 0.194
type train | step 2420 | loss 4.4844 1.6998 3.8871 6.9273 | lr 9.8e-04 | norm 66.3181 | dt 0.199
type train | step 2430 | loss 4.4796 1.6393 3.8488 6.8942 | lr 9.8e-04 | norm 67.3808 | dt 0.216
type train | step 2440 | loss 4.4545 1.6592 3.8914 6.8697 | lr 9.8e-04 | norm 64.8528 | dt 0.166
type train | step 2450 | loss 4.3859 1.6858 3.8266 6.9121 | lr 9.8e-04 | norm 69.5471 | dt 0.201
type train | step 2460 | loss 4.3845 1.6926 3.8321 6.9169 | lr 9.8e-04 | norm 70.1097 | dt 0.196
type train | step 2470 | loss 4.5417 1.6313 3.8179 6.7539 | lr 9.8e-04 | norm 73.3570 | dt 0.202
type train | step 2480 | loss 4.5344 1.7233 3.9030 7.1347 | lr 9.8e-04 | norm 67.5539 | dt 0.164
type train | step 2490 | loss 4.4315 1.6442 3.8358 6.8201 | lr 9.8e-04 | norm 67.7906 | dt 0.195
type train | step 2500 | loss 4.4264 1.6432 3.8101 6.7975 | lr 9.8e-04 | norm 67.7708 | dt 0.180
type train | step 2510 | loss 4.3723 1.6370 3.8357 6.8611 | lr 9.8e-04 | norm 65.8641 | dt 0.183
type train | step 2520 | loss 4.3712 1.7005 3.8714 6.9650 | lr 9.8e-04 | norm 65.4086 | dt 0.175
type train | step 2530 | loss 4.4839 1.6626 3.8427 6.7534 | lr 9.8e-04 | norm 68.2938 | dt 0.170
type train | step 2540 | loss 4.4478 1.6540 3.7795 6.6821 | lr 9.8e-04 | norm 68.0640 | dt 0.204
type train | step 2550 | loss 4.4261 1.6748 3.7891 6.8905 | lr 9.8e-04 | norm 67.7792 | dt 0.179
type train | step 2560 | loss 4.4688 1.6995 3.8440 6.9226 | lr 9.8e-04 | norm 61.5837 | dt 0.191
type train | step 2570 | loss 4.3019 1.6671 3.8421 6.8847 | lr 9.8e-04 | norm 68.2065 | dt 0.168
type train | step 2580 | loss 4.4733 1.7530 3.9301 7.0318 | lr 9.8e-04 | norm 66.3485 | dt 0.202
type train | step 2590 | loss 4.2289 1.6876 3.8469 6.9309 | lr 9.8e-04 | norm 69.4893 | dt 0.166
type train | step 2600 | loss 4.1706 1.6712 3.8715 6.8000 | lr 9.8e-04 | norm 64.5252 | dt 0.196
type train | step 2610 | loss 3.9955 1.6550 3.7853 6.6887 | lr 9.8e-04 | norm 65.7627 | dt 0.181
type train | step 2620 | loss 4.0843 1.7122 3.8765 6.8564 | lr 9.8e-04 | norm 66.8475 | dt 0.207
type train | step 2630 | loss 3.9631 1.6741 3.8635 6.9218 | lr 9.8e-04 | norm 67.4455 | dt 0.198
type train | step 2640 | loss 4.0434 1.7136 3.8621 6.7927 | lr 9.8e-04 | norm 73.5539 | dt 0.197
type train | step 2650 | loss 4.0076 1.6688 3.8465 6.8284 | lr 9.8e-04 | norm 69.6054 | dt 0.177
type train | step 2660 | loss 3.8952 1.6766 3.8055 6.8438 | lr 9.8e-04 | norm 64.6796 | dt 0.169
type train | step 2670 | loss 3.8471 1.6599 3.7985 6.8026 | lr 9.8e-04 | norm 71.5943 | dt 0.193
type train | step 2680 | loss 3.9097 1.7225 3.8755 7.0682 | lr 9.8e-04 | norm 65.0603 | dt 0.194
type train | step 2690 | loss 3.9142 1.6731 3.8349 6.8110 | lr 9.8e-04 | norm 71.8258 | dt 0.199
type train | step 2700 | loss 3.8949 1.6913 3.8796 6.8552 | lr 9.8e-04 | norm 66.6286 | dt 0.210
type train | step 2710 | loss 3.9910 1.6795 3.8199 6.7904 | lr 9.8e-04 | norm 69.4211 | dt 0.202
type train | step 2720 | loss 4.0254 1.6974 3.9246 6.8755 | lr 9.8e-04 | norm 66.5604 | dt 0.164
type train | step 2730 | loss 3.9776 1.6693 3.8147 6.7840 | lr 9.8e-04 | norm 65.6310 | dt 0.208
type train | step 2740 | loss 3.9716 1.7211 3.8067 6.8246 | lr 9.8e-04 | norm 66.8188 | dt 0.205
type train | step 2750 | loss 3.9293 1.7057 3.8555 6.8990 | lr 9.8e-04 | norm 66.5880 | dt 0.205
type train | step 2760 | loss 3.9071 1.6878 3.8309 6.8380 | lr 9.8e-04 | norm 66.2748 | dt 0.209
type train | step 2770 | loss 3.8506 1.7080 3.8223 6.8384 | lr 9.8e-04 | norm 65.4940 | dt 0.204
type train | step 2780 | loss 3.7655 1.6725 3.7978 6.7417 | lr 9.8e-04 | norm 68.5509 | dt 0.187
type train | step 2790 | loss 3.7344 1.7029 3.7590 6.7986 | lr 9.8e-04 | norm 66.0277 | dt 0.230
type train | step 2800 | loss 3.7743 1.6778 3.8001 6.7251 | lr 9.8e-04 | norm 67.0552 | dt 0.200
type train | step 2810 | loss 3.7460 1.6834 3.8865 6.8178 | lr 9.7e-04 | norm 67.6472 | dt 0.194
type train | step 2820 | loss 3.8864 1.7225 3.9341 7.1129 | lr 9.7e-04 | norm 72.8954 | dt 0.159
type train | step 2830 | loss 3.7375 1.6817 3.8341 6.8466 | lr 9.7e-04 | norm 68.0904 | dt 0.158
type train | step 2840 | loss 3.7789 1.6604 3.8118 6.7503 | lr 9.7e-04 | norm 68.4258 | dt 0.170
type train | step 2850 | loss 3.7618 1.6848 3.8539 6.8049 | lr 9.7e-04 | norm 66.3249 | dt 0.168
type train | step 2860 | loss 3.6996 1.6587 3.8516 6.7781 | lr 9.7e-04 | norm 65.1169 | dt 0.203
type train | step 2870 | loss 3.7429 1.6817 3.9279 6.8098 | lr 9.7e-04 | norm 70.3172 | dt 0.166
type train | step 2880 | loss 3.7883 1.6488 3.8560 6.7301 | lr 9.7e-04 | norm 68.7322 | dt 0.169
type train | step 2890 | loss 3.7775 1.6814 3.8418 6.8380 | lr 9.7e-04 | norm 69.0732 | dt 0.199
type train | step 2900 | loss 3.7013 1.6174 3.8218 6.6627 | lr 9.7e-04 | norm 66.3815 | dt 0.180
type train | step 2910 | loss 3.7374 1.6764 3.8707 6.8055 | lr 9.7e-04 | norm 67.6475 | dt 0.162
type train | step 2920 | loss 3.7221 1.6516 3.8499 6.8100 | lr 9.7e-04 | norm 71.2677 | dt 0.191
type train | step 2930 | loss 3.6888 1.6556 3.8285 6.6598 | lr 9.7e-04 | norm 68.2858 | dt 0.176
type train | step 2940 | loss 3.6877 1.6699 3.8702 6.8893 | lr 9.7e-04 | norm 69.1809 | dt 0.179
type train | step 2950 | loss 3.7028 1.6628 3.7786 6.6536 | lr 9.7e-04 | norm 65.0268 | dt 0.205
type train | step 2960 | loss 3.7344 1.6297 3.8168 6.6702 | lr 9.7e-04 | norm 66.4320 | dt 0.185
type train | step 2970 | loss 3.6953 1.6750 3.8307 6.7651 | lr 9.7e-04 | norm 67.2883 | dt 0.194
type train | step 2980 | loss 3.6635 1.6275 3.8230 6.7290 | lr 9.7e-04 | norm 66.3863 | dt 0.162
type train | step 2990 | loss 3.6924 1.6617 3.8515 6.7115 | lr 9.7e-04 | norm 65.9679 | dt 0.175
type train | step 3000 | loss 3.7666 1.6645 3.8368 6.8684 | lr 9.7e-04 | norm 67.5296 | dt 0.176
type train | step 3010 | loss 3.7117 1.6608 3.8175 6.7488 | lr 9.7e-04 | norm 70.5519 | dt 0.183
type train | step 3020 | loss 3.7320 1.6184 3.8461 6.5760 | lr 9.7e-04 | norm 69.0074 | dt 0.186
type train | step 3030 | loss 3.7078 1.6806 3.8476 6.7573 | lr 9.7e-04 | norm 73.5651 | dt 0.174
type train | step 3040 | loss 3.6150 1.6171 3.7970 6.5856 | lr 9.7e-04 | norm 67.4846 | dt 0.202
type train | step 3050 | loss 3.6756 1.6314 3.8086 6.7850 | lr 9.7e-04 | norm 70.4764 | dt 0.202
type train | step 3060 | loss 3.6260 1.6208 3.7970 6.6318 | lr 9.7e-04 | norm 66.1526 | dt 0.170
type train | step 3070 | loss 3.6253 1.6309 3.7649 6.6157 | lr 9.7e-04 | norm 67.5803 | dt 0.201
type train | step 3080 | loss 3.5540 1.5850 3.8042 6.6058 | lr 9.7e-04 | norm 69.7650 | dt 0.191
type train | step 3090 | loss 3.7368 1.6636 3.8755 6.7330 | lr 9.7e-04 | norm 77.0232 | dt 0.170
type train | step 3100 | loss 3.6269 1.5786 3.7909 6.6228 | lr 9.7e-04 | norm 70.4780 | dt 0.173
type train | step 3110 | loss 3.6117 1.5789 3.7887 6.5014 | lr 9.7e-04 | norm 69.6044 | dt 0.208
type train | step 3120 | loss 3.5575 1.5653 3.7848 6.6126 | lr 9.7e-04 | norm 69.7855 | dt 0.186
type train | step 3130 | loss 3.5741 1.6151 3.8182 6.6564 | lr 9.7e-04 | norm 69.8761 | dt 0.165
type train | step 3140 | loss 3.5588 1.5630 3.8714 6.5956 | lr 9.7e-04 | norm 71.1584 | dt 0.195
type train | step 3150 | loss 3.5040 1.5333 3.7930 6.4759 | lr 9.7e-04 | norm 68.8177 | dt 0.203
type train | step 3160 | loss 3.5851 1.5511 3.7714 6.5845 | lr 9.7e-04 | norm 69.3777 | dt 0.196
type train | step 3170 | loss 3.5937 1.5744 3.8336 6.7008 | lr 9.7e-04 | norm 68.5398 | dt 0.180
type train | step 3180 | loss 3.5109 1.5518 3.8313 6.7111 | lr 9.7e-04 | norm 69.0607 | dt 0.175
type train | step 3190 | loss 3.6445 1.6503 3.9036 6.7201 | lr 9.6e-04 | norm 66.8229 | dt 0.198
type train | step 3200 | loss 3.5646 1.5684 3.8397 6.8268 | lr 9.6e-04 | norm 69.2810 | dt 0.196
type train | step 3210 | loss 3.6031 1.5802 3.8158 6.5488 | lr 9.6e-04 | norm 72.7154 | dt 0.183
type train | step 3220 | loss 3.5201 1.5658 3.7840 6.4619 | lr 9.6e-04 | norm 66.9889 | dt 0.181
type train | step 3230 | loss 3.6179 1.6137 3.8348 6.7075 | lr 9.6e-04 | norm 73.3077 | dt 0.183
type train | step 3240 | loss 3.5612 1.5848 3.8700 6.6956 | lr 9.6e-04 | norm 68.8887 | dt 0.169
type train | step 3250 | loss 3.6565 1.6337 3.8352 6.6492 | lr 9.6e-04 | norm 69.9531 | dt 0.163
type train | step 3260 | loss 3.6497 1.5821 3.7923 6.5814 | lr 9.6e-04 | norm 72.7269 | dt 0.164
type train | step 3270 | loss 3.5340 1.5911 3.8329 6.5396 | lr 9.6e-04 | norm 73.5034 | dt 0.219
type train | step 3280 | loss 3.5492 1.5829 3.7407 6.5141 | lr 9.6e-04 | norm 75.1688 | dt 0.173
type train | step 3290 | loss 3.5687 1.6217 3.8492 6.6323 | lr 9.6e-04 | norm 63.8830 | dt 0.168
type train | step 3300 | loss 3.6699 1.5576 3.8234 6.7218 | lr 9.6e-04 | norm 76.9428 | dt 0.206
type train | step 3310 | loss 3.5535 1.5893 3.8406 6.5676 | lr 9.6e-04 | norm 72.6260 | dt 0.170
type train | step 3320 | loss 3.6769 1.5711 3.8264 6.6345 | lr 9.6e-04 | norm 75.7654 | dt 0.165
type train | step 3330 | loss 3.6294 1.5806 3.8787 6.6188 | lr 9.6e-04 | norm 67.1476 | dt 0.196
type train | step 3340 | loss 3.5672 1.5400 3.7712 6.7055 | lr 9.6e-04 | norm 72.4123 | dt 0.172
type train | step 3350 | loss 3.5769 1.5972 3.8349 6.6133 | lr 9.6e-04 | norm 71.9551 | dt 0.164
type train | step 3360 | loss 3.5371 1.5711 3.8426 6.6570 | lr 9.6e-04 | norm 70.8694 | dt 0.214
type train | step 3370 | loss 3.5495 1.5700 3.8292 6.6478 | lr 9.6e-04 | norm 71.8621 | dt 0.198
type train | step 3380 | loss 3.4790 1.5697 3.7797 6.6147 | lr 9.6e-04 | norm 74.5158 | dt 0.208
type train | step 3390 | loss 3.4387 1.5344 3.7688 6.4304 | lr 9.6e-04 | norm 71.5700 | dt 0.170
type train | step 3400 | loss 3.5144 1.5428 3.7758 6.5803 | lr 9.6e-04 | norm 70.9491 | dt 0.186
type train | step 3410 | loss 3.5005 1.5315 3.7906 6.4689 | lr 9.6e-04 | norm 76.5849 | dt 0.164
type train | step 3420 | loss 3.4734 1.5529 3.8701 6.6387 | lr 9.6e-04 | norm 72.0484 | dt 0.167
type train | step 3430 | loss 3.5341 1.5967 3.8793 6.7625 | lr 9.6e-04 | norm 73.8520 | dt 0.208
type train | step 3440 | loss 3.4276 1.5465 3.7554 6.5631 | lr 9.6e-04 | norm 75.8133 | dt 0.175
type train | step 3450 | loss 3.4580 1.5241 3.7900 6.5839 | lr 9.6e-04 | norm 71.7550 | dt 0.162
type train | step 3460 | loss 3.4835 1.5650 3.7905 6.5692 | lr 9.6e-04 | norm 76.6777 | dt 0.217
type train | step 3470 | loss 3.5203 1.5443 3.8618 6.5795 | lr 9.6e-04 | norm 77.4712 | dt 0.166
type train | step 3480 | loss 3.5587 1.5578 3.8285 6.5962 | lr 9.6e-04 | norm 73.2790 | dt 0.183
type train | step 3490 | loss 3.5691 1.5347 3.8079 6.5447 | lr 9.6e-04 | norm 73.7028 | dt 0.167
type train | step 3500 | loss 3.5807 1.5411 3.8267 6.6363 | lr 9.6e-04 | norm 72.4797 | dt 0.184
type train | step 3510 | loss 3.5142 1.5025 3.7855 6.4098 | lr 9.6e-04 | norm 79.7810 | dt 0.185
type train | step 3520 | loss 3.5471 1.5452 3.8293 6.5051 | lr 9.5e-04 | norm 76.0374 | dt 0.199
type train | step 3530 | loss 3.5682 1.5554 3.8265 6.4938 | lr 9.5e-04 | norm 76.2505 | dt 0.162
type train | step 3540 | loss 3.5526 1.5276 3.8146 6.4394 | lr 9.5e-04 | norm 79.1978 | dt 0.177
type train | step 3550 | loss 3.5473 1.5562 3.8345 6.4883 | lr 9.5e-04 | norm 76.0472 | dt 0.212
type train | step 3560 | loss 3.5877 1.5572 3.7578 6.5302 | lr 9.5e-04 | norm 74.7481 | dt 0.178
type train | step 3570 | loss 3.5592 1.5366 3.8000 6.5009 | lr 9.5e-04 | norm 76.9013 | dt 0.194
type train | step 3580 | loss 3.6268 1.5646 3.8378 6.6044 | lr 9.5e-04 | norm 75.2447 | dt 0.194
type train | step 3590 | loss 3.5443 1.5222 3.7926 6.4776 | lr 9.5e-04 | norm 77.2020 | dt 0.200
type train | step 3600 | loss 3.5521 1.5330 3.8007 6.6075 | lr 9.5e-04 | norm 74.9426 | dt 0.177
type train | step 3610 | loss 3.6148 1.5452 3.8325 6.5665 | lr 9.5e-04 | norm 70.9518 | dt 0.166
type train | step 3620 | loss 3.6587 1.5399 3.7850 6.5395 | lr 9.5e-04 | norm 80.2512 | dt 0.201
type train | step 3630 | loss 3.6120 1.4980 3.7758 6.5202 | lr 9.5e-04 | norm 78.4906 | dt 0.174
type train | step 3640 | loss 3.7040 1.5450 3.7889 6.5527 | lr 9.5e-04 | norm 83.0396 | dt 0.197
type train | step 3650 | loss 3.6053 1.5309 3.7674 6.4669 | lr 9.5e-04 | norm 81.8305 | dt 0.196
type train | step 3660 | loss 3.6492 1.5188 3.7865 6.4796 | lr 9.5e-04 | norm 77.1751 | dt 0.198
type train | step 3670 | loss 3.5987 1.5296 3.7607 6.5501 | lr 9.5e-04 | norm 71.5285 | dt 0.192
type train | step 3680 | loss 3.6449 1.5352 3.7720 6.5261 | lr 9.5e-04 | norm 80.3251 | dt 0.181
type train | step 3690 | loss 3.6656 1.4848 3.7684 6.4253 | lr 9.5e-04 | norm 75.0304 | dt 0.177
type train | step 3700 | loss 3.7971 1.5592 3.8523 6.6645 | lr 9.5e-04 | norm 78.8557 | dt 0.175
type train | step 3710 | loss 3.6327 1.5256 3.7490 6.3997 | lr 9.5e-04 | norm 80.2379 | dt 0.197
type train | step 3720 | loss 3.6210 1.5099 3.7744 6.4041 | lr 9.5e-04 | norm 74.8388 | dt 0.205
type train | step 3730 | loss 3.6329 1.4855 3.7530 6.4938 | lr 9.5e-04 | norm 73.6328 | dt 0.182
type train | step 3740 | loss 3.7167 1.5538 3.8027 6.6164 | lr 9.5e-04 | norm 84.1459 | dt 0.196
type train | step 3750 | loss 3.6249 1.4942 3.7664 6.4745 | lr 9.5e-04 | norm 77.7335 | dt 0.196
type train | step 3760 | loss 3.6307 1.4986 3.7600 6.3369 | lr 9.5e-04 | norm 79.5828 | dt 0.187
type train | step 3770 | loss 3.6282 1.4991 3.7776 6.4327 | lr 9.5e-04 | norm 74.4679 | dt 0.182
type train | step 3780 | loss 3.6181 1.5356 3.8351 6.5988 | lr 9.5e-04 | norm 74.1452 | dt 0.180
type train | step 3790 | loss 3.5667 1.4957 3.7925 6.5933 | lr 9.5e-04 | norm 78.9688 | dt 0.208
type train | step 3800 | loss 3.6307 1.5957 3.8963 6.6605 | lr 9.5e-04 | norm 80.6149 | dt 0.179
type train | step 3810 | loss 3.5116 1.5148 3.8451 6.4963 | lr 9.5e-04 | norm 78.2037 | dt 0.164
type train | step 3820 | loss 3.4877 1.5197 3.8264 6.5194 | lr 9.4e-04 | norm 83.3814 | dt 0.198
type train | step 3830 | loss 3.4875 1.5063 3.7716 6.3248 | lr 9.4e-04 | norm 80.0779 | dt 0.198
type train | step 3840 | loss 3.5586 1.5517 3.7760 6.5115 | lr 9.4e-04 | norm 74.5238 | dt 0.196
type train | step 3850 | loss 3.4796 1.5092 3.8083 6.6333 | lr 9.4e-04 | norm 78.9500 | dt 0.183
type train | step 3860 | loss 3.5779 1.5621 3.8043 6.5832 | lr 9.4e-04 | norm 81.5098 | dt 0.167
type train | step 3870 | loss 3.4865 1.5136 3.7905 6.5115 | lr 9.4e-04 | norm 82.7861 | dt 0.167
type train | step 3880 | loss 3.4640 1.5046 3.8128 6.4084 | lr 9.4e-04 | norm 77.3012 | dt 0.185
type train | step 3890 | loss 3.4519 1.5082 3.7489 6.3751 | lr 9.4e-04 | norm 86.4431 | dt 0.194
type train | step 3900 | loss 3.4025 1.5396 3.7937 6.6127 | lr 9.4e-04 | norm 76.5541 | dt 0.197
type train | step 3910 | loss 3.4980 1.4968 3.7605 6.5486 | lr 9.4e-04 | norm 76.8202 | dt 0.200
type train | step 3920 | loss 3.5981 1.5303 3.7928 6.5106 | lr 9.4e-04 | norm 87.5567 | dt 0.174
type train | step 3930 | loss 3.8272 1.5227 3.7748 6.5150 | lr 9.4e-04 | norm 76.7065 | dt 0.171
type train | step 3940 | loss 4.1716 1.5510 3.8202 6.5751 | lr 9.4e-04 | norm 85.3943 | dt 0.167
type train | step 3950 | loss 4.4204 1.5143 3.7761 6.4357 | lr 9.4e-04 | norm 77.4225 | dt 0.202
type train | step 3960 | loss 4.7631 1.5815 3.8349 6.5305 | lr 9.4e-04 | norm 76.7358 | dt 0.212
type train | step 3970 | loss 4.7913 1.5297 3.7886 6.5447 | lr 9.4e-04 | norm 71.7729 | dt 0.193
type train | step 3980 | loss 4.8054 1.5317 3.7786 6.5709 | lr 9.4e-04 | norm 76.9624 | dt 0.200
type train | step 3990 | loss 4.8500 1.5559 3.7701 6.4857 | lr 9.4e-04 | norm 79.1359 | dt 0.207
type train | step 4000 | loss 5.0015 1.5047 3.7135 6.3902 | lr 9.4e-04 | norm 73.6474 | dt 0.200
type train | step 4010 | loss 5.2193 1.5335 3.7632 6.4186 | lr 9.4e-04 | norm 79.2246 | dt 0.176
type train | step 4020 | loss 5.2940 1.5085 3.7397 6.3932 | lr 9.4e-04 | norm 81.3264 | dt 0.175
type train | step 4030 | loss 5.4800 1.5223 3.8488 6.5433 | lr 9.4e-04 | norm 79.0464 | dt 0.220
type train | step 4040 | loss 5.9322 1.5514 3.8430 6.7096 | lr 9.4e-04 | norm 78.6799 | dt 0.179
type train | step 4050 | loss 6.4251 1.5038 3.7954 6.4613 | lr 9.4e-04 | norm 85.7861 | dt 0.219
type train | step 4060 | loss 7.1818 1.4946 3.7834 6.3772 | lr 9.4e-04 | norm 85.4590 | dt 0.191
type train | step 4070 | loss 8.0847 1.5423 3.8193 6.5365 | lr 9.4e-04 | norm 87.1254 | dt 0.184
type train | step 4080 | loss 9.1222 1.5288 3.8348 6.5057 | lr 9.4e-04 | norm 88.1899 | dt 0.167
type train | step 4090 | loss 10.3131 1.5271 3.8251 6.5285 | lr 9.3e-04 | norm 85.8050 | dt 0.204
type train | step 4100 | loss 12.0747 1.5089 3.8127 6.5056 | lr 9.3e-04 | norm 93.2978 | dt 0.200
type train | step 4110 | loss 13.6110 1.5133 3.7937 6.5173 | lr 9.3e-04 | norm 96.9328 | dt 0.196
type train | step 4120 | loss 15.3912 1.4743 3.7467 6.3761 | lr 9.3e-04 | norm 103.1557 | dt 0.201
type train | step 4130 | loss 17.0557 1.5114 3.8182 6.5396 | lr 9.3e-04 | norm 101.4205 | dt 0.166
type train | step 4140 | loss 18.6227 1.4989 3.7992 6.4669 | lr 9.3e-04 | norm 99.5121 | dt 0.209
type train | step 4150 | loss 19.8721 1.4742 3.8080 6.4740 | lr 9.3e-04 | norm 103.5686 | dt 0.179
type train | step 4160 | loss 21.1153 1.5070 3.8423 6.5228 | lr 9.3e-04 | norm 105.5204 | dt 0.209
type train | step 4170 | loss 21.3208 1.4877 3.7210 6.4720 | lr 9.3e-04 | norm 114.1292 | dt 0.193
type train | step 4180 | loss 21.3262 1.4633 3.7643 6.4279 | lr 9.3e-04 | norm 115.3390 | dt 0.206
type train | step 4190 | loss 20.3030 1.5146 3.7861 6.5569 | lr 9.3e-04 | norm 112.6292 | dt 0.197
type train | step 4200 | loss 20.0413 1.4811 3.7528 6.4054 | lr 9.3e-04 | norm 119.8227 | dt 0.211
type train | step 4210 | loss 19.7199 1.5021 3.8276 6.5142 | lr 9.3e-04 | norm 121.0295 | dt 0.184
type train | step 4220 | loss 19.8261 1.5474 3.7888 6.6168 | lr 9.3e-04 | norm 119.3480 | dt 0.184
type train | step 4230 | loss 18.9391 1.5342 3.7724 6.5113 | lr 9.3e-04 | norm 112.9029 | dt 0.195
type train | step 4240 | loss 17.9844 1.4817 3.7896 6.4451 | lr 9.3e-04 | norm 106.6585 | dt 0.193
type train | step 4250 | loss 16.6660 1.5082 3.7640 6.5028 | lr 9.3e-04 | norm 103.3429 | dt 0.197
type train | step 4260 | loss 16.0490 1.4893 3.7488 6.3699 | lr 9.3e-04 | norm 98.9717 | dt 0.196
type train | step 4270 | loss 14.9642 1.4951 3.7916 6.4656 | lr 9.3e-04 | norm 96.3505 | dt 0.199
type train | step 4280 | loss 14.1701 1.4999 3.8015 6.4281 | lr 9.3e-04 | norm 89.8649 | dt 0.168
type train | step 4290 | loss 13.2093 1.4812 3.7384 6.4608 | lr 9.3e-04 | norm 99.3270 | dt 0.178
type train | step 4300 | loss 12.9440 1.4467 3.7101 6.3347 | lr 9.3e-04 | norm 98.8138 | dt 0.206
type train | step 4310 | loss 12.7503 1.5132 3.8072 6.6132 | lr 9.3e-04 | norm 85.6599 | dt 0.184
type train | step 4320 | loss 11.8883 1.4822 3.7389 6.3951 | lr 9.3e-04 | norm 103.7849 | dt 0.201
type train | step 4330 | loss 11.1306 1.4560 3.7622 6.3629 | lr 9.3e-04 | norm 93.1631 | dt 0.164
type train | step 4340 | loss 10.7691 1.4775 3.7323 6.3892 | lr 9.2e-04 | norm 96.9004 | dt 0.175
type train | step 4350 | loss 10.7778 1.5197 3.8136 6.5255 | lr 9.2e-04 | norm 97.7553 | dt 0.172
type train | step 4360 | loss 10.0687 1.4702 3.7906 6.3506 | lr 9.2e-04 | norm 81.1269 | dt 0.171
type train | step 4370 | loss 9.4062 1.4572 3.7261 6.2772 | lr 9.2e-04 | norm 80.8369 | dt 0.171
type train | step 4380 | loss 9.2654 1.4703 3.7191 6.3220 | lr 9.2e-04 | norm 83.6556 | dt 0.180
type train | step 4390 | loss 9.1812 1.5116 3.7947 6.4461 | lr 9.2e-04 | norm 77.5952 | dt 0.208
type train | step 4400 | loss 8.4720 1.4741 3.8130 6.4717 | lr 9.2e-04 | norm 80.9529 | dt 0.159
type train | step 4410 | loss 8.7262 1.5758 3.8468 6.5762 | lr 9.2e-04 | norm 86.0096 | dt 0.184
type train | step 4420 | loss 8.1176 1.4764 3.8303 6.4046 | lr 9.2e-04 | norm 91.0475 | dt 0.175
type train | step 4430 | loss 7.6415 1.4671 3.7858 6.4344 | lr 9.2e-04 | norm 80.6703 | dt 0.180
type train | step 4440 | loss 7.5177 1.4649 3.6956 6.2761 | lr 9.2e-04 | norm 83.2076 | dt 0.217
type train | step 4450 | loss 7.5285 1.5222 3.7814 6.4322 | lr 9.2e-04 | norm 83.3044 | dt 0.165
type train | step 4460 | loss 7.3758 1.4859 3.8184 6.5062 | lr 9.2e-04 | norm 79.7379 | dt 0.174
type train | step 4470 | loss 7.2535 1.5277 3.8390 6.5168 | lr 9.2e-04 | norm 98.8666 | dt 0.161
type train | step 4480 | loss 6.6749 1.4875 3.7877 6.4427 | lr 9.2e-04 | norm 88.8524 | dt 0.241
type train | step 4490 | loss 6.2343 1.4730 3.7929 6.4079 | lr 9.2e-04 | norm 100.5070 | dt 0.203
type train | step 4500 | loss 5.9673 1.4869 3.6936 6.2829 | lr 9.2e-04 | norm 88.0326 | dt 0.167
type train | step 4510 | loss 5.8583 1.5203 3.7844 6.5210 | lr 9.2e-04 | norm 77.9701 | dt 0.231
type train | step 4520 | loss 5.3884 1.4578 3.7772 6.4869 | lr 9.2e-04 | norm 90.7456 | dt 0.207
type train | step 4530 | loss 5.2032 1.4859 3.7283 6.4766 | lr 9.2e-04 | norm 79.5652 | dt 0.182
type train | step 4540 | loss 5.1055 1.4848 3.7210 6.4657 | lr 9.2e-04 | norm 87.8656 | dt 0.207
type train | step 4550 | loss 5.0860 1.5207 3.8194 6.5589 | lr 9.2e-04 | norm 94.6394 | dt 0.195
type train | step 4560 | loss 4.8508 1.4856 3.7529 6.4473 | lr 9.2e-04 | norm 84.6510 | dt 0.196
type train | step 4570 | loss 4.8286 1.5312 3.8068 6.5426 | lr 9.2e-04 | norm 87.3778 | dt 0.191
type train | step 4580 | loss 4.8762 1.5085 3.7784 6.5196 | lr 9.1e-04 | norm 90.4004 | dt 0.199
type train | step 4590 | loss 4.6070 1.5109 3.7795 6.5447 | lr 9.1e-04 | norm 96.9091 | dt 0.193
type train | step 4600 | loss 4.5384 1.4988 3.7721 6.4945 | lr 9.1e-04 | norm 91.8481 | dt 0.192
type train | step 4610 | loss 4.3980 1.4688 3.7754 6.3581 | lr 9.1e-04 | norm 82.9509 | dt 0.199
type train | step 4620 | loss 4.4790 1.4739 3.7408 6.4355 | lr 9.1e-04 | norm 95.9228 | dt 0.168
type train | step 4630 | loss 4.3912 1.4710 3.7605 6.3512 | lr 9.1e-04 | norm 91.0054 | dt 0.169
type train | step 4640 | loss 4.3400 1.4865 3.7689 6.5149 | lr 9.1e-04 | norm 83.1447 | dt 0.171
type train | step 4650 | loss 4.4144 1.5466 3.8232 6.6092 | lr 9.1e-04 | norm 85.8467 | dt 0.164
type train | step 4660 | loss 4.3027 1.5210 3.7079 6.4227 | lr 9.1e-04 | norm 100.5166 | dt 0.172
type train | step 4670 | loss 4.1675 1.4836 3.7538 6.3373 | lr 9.1e-04 | norm 92.8205 | dt 0.168
type train | step 4680 | loss 4.1684 1.5131 3.7504 6.4225 | lr 9.1e-04 | norm 91.1630 | dt 0.211
type train | step 4690 | loss 4.1044 1.4913 3.8133 6.4428 | lr 9.1e-04 | norm 107.7302 | dt 0.179
type train | step 4700 | loss 4.0939 1.5039 3.8092 6.4337 | lr 9.1e-04 | norm 96.0890 | dt 0.164
type train | step 4710 | loss 3.9522 1.4777 3.7493 6.4088 | lr 9.1e-04 | norm 89.5237 | dt 0.204
type train | step 4720 | loss 3.9766 1.5123 3.7635 6.3939 | lr 9.1e-04 | norm 88.5579 | dt 0.199
type train | step 4730 | loss 3.9160 1.4704 3.7728 6.2723 | lr 9.1e-04 | norm 113.2974 | dt 0.161
type train | step 4740 | loss 3.8260 1.4942 3.7978 6.3660 | lr 9.1e-04 | norm 107.1282 | dt 0.163
type train | step 4750 | loss 3.7842 1.5079 3.8069 6.3473 | lr 9.1e-04 | norm 105.0827 | dt 0.183
type train | step 4760 | loss 3.7226 1.5088 3.7711 6.3048 | lr 9.1e-04 | norm 101.0310 | dt 0.191
type train | step 4770 | loss 3.7306 1.5257 3.8167 6.3382 | lr 9.1e-04 | norm 91.5175 | dt 0.173
type train | step 4780 | loss 3.6753 1.5052 3.7592 6.2267 | lr 9.1e-04 | norm 89.6637 | dt 0.162
type train | step 4790 | loss 3.5173 1.4972 3.7400 6.2307 | lr 9.1e-04 | norm 110.2122 | dt 0.208
type train | step 4800 | loss 3.6673 1.5121 3.7818 6.3349 | lr 9.1e-04 | norm 90.0817 | dt 0.196
type train | step 4810 | loss 3.5458 1.5021 3.7673 6.2391 | lr 9.0e-04 | norm 91.0433 | dt 0.202
type train | step 4820 | loss 3.5414 1.5226 3.7890 6.2899 | lr 9.0e-04 | norm 84.6864 | dt 0.177
type train | step 4830 | loss 3.5374 1.5416 3.7946 6.3456 | lr 9.0e-04 | norm 101.0768 | dt 0.194
type train | step 4840 | loss 3.5088 1.5152 3.7787 6.2683 | lr 9.0e-04 | norm 104.1975 | dt 0.166
type train | step 4850 | loss 3.5003 1.4696 3.7562 6.1955 | lr 9.0e-04 | norm 108.2347 | dt 0.202
type train | step 4860 | loss 3.5072 1.5105 3.7422 6.2603 | lr 9.0e-04 | norm 97.8982 | dt 0.178
type train | step 4870 | loss 3.4413 1.4923 3.7148 6.1640 | lr 9.0e-04 | norm 89.1541 | dt 0.177
type train | step 4880 | loss 3.3824 1.4852 3.7609 6.2204 | lr 9.0e-04 | norm 103.6257 | dt 0.175
type train | step 4890 | loss 3.3381 1.4936 3.7310 6.2271 | lr 9.0e-04 | norm 105.0458 | dt 0.205
type train | step 4900 | loss 3.4160 1.4929 3.7119 6.2108 | lr 9.0e-04 | norm 108.8189 | dt 0.193
type train | step 4910 | loss 3.3838 1.4454 3.7258 6.0682 | lr 9.0e-04 | norm 97.1011 | dt 0.172
type train | step 4920 | loss 3.4968 1.5501 3.7872 6.3285 | lr 9.0e-04 | norm 90.1918 | dt 0.193
type train | step 4930 | loss 3.3369 1.5037 3.7283 6.1037 | lr 9.0e-04 | norm 104.3222 | dt 0.203
type train | step 4940 | loss 3.3406 1.4821 3.7370 6.1341 | lr 9.0e-04 | norm 102.8837 | dt 0.173
type train | step 4950 | loss 3.3366 1.4854 3.7394 6.1474 | lr 9.0e-04 | norm 104.6456 | dt 0.170
type train | step 4960 | loss 3.4172 1.5236 3.7565 6.2521 | lr 9.0e-04 | norm 98.3035 | dt 0.214
type train | step 4970 | loss 3.4077 1.4714 3.7446 6.0495 | lr 9.0e-04 | norm 97.7832 | dt 0.188
type train | step 4980 | loss 3.3657 1.4685 3.6913 6.0118 | lr 9.0e-04 | norm 110.4059 | dt 0.196
type train | step 4990 | loss 3.3712 1.4920 3.7349 5.9746 | lr 9.0e-04 | norm 91.3792 | dt 0.171
type train | step 5000 | loss 3.4224 1.5213 3.7861 6.0786 | lr 9.0e-04 | norm 109.5408 | dt 0.188
type train | step 5010 | loss 3.3494 1.4779 3.7722 6.0845 | lr 9.0e-04 | norm 95.7278 | dt 0.208
type train | step 5020 | loss 3.5534 1.5791 3.8088 6.1406 | lr 9.0e-04 | norm 104.2866 | dt 0.194
type train | step 5030 | loss 3.4199 1.4984 3.8020 6.0019 | lr 8.9e-04 | norm 90.9643 | dt 0.177
type train | step 5040 | loss 3.3781 1.5168 3.7683 6.0105 | lr 8.9e-04 | norm 102.3887 | dt 0.181
type train | step 5050 | loss 3.3096 1.5062 3.6901 5.8969 | lr 8.9e-04 | norm 101.8069 | dt 0.178
type train | step 5060 | loss 3.4149 1.5393 3.7950 6.0248 | lr 8.9e-04 | norm 89.5521 | dt 0.203
type train | step 5070 | loss 3.2854 1.4929 3.7933 6.1063 | lr 8.9e-04 | norm 113.4240 | dt 0.192
type train | step 5080 | loss 3.4007 1.5530 3.7941 6.0707 | lr 8.9e-04 | norm 105.4785 | dt 0.200
type train | step 5090 | loss 3.3515 1.5183 3.7617 6.0372 | lr 8.9e-04 | norm 128.5507 | dt 0.205
type train | step 5100 | loss 3.2834 1.4915 3.7742 5.9912 | lr 8.9e-04 | norm 113.3929 | dt 0.167
type train | step 5110 | loss 3.2494 1.4833 3.7137 5.9078 | lr 8.9e-04 | norm 102.0543 | dt 0.209
type train | step 5120 | loss 3.3242 1.5404 3.8076 6.1169 | lr 8.9e-04 | norm 112.3624 | dt 0.192
type train | step 5130 | loss 3.2454 1.5002 3.8017 6.0195 | lr 8.9e-04 | norm 116.9180 | dt 0.173
type train | step 5140 | loss 3.2207 1.5093 3.7321 5.9856 | lr 8.9e-04 | norm 103.0231 | dt 0.168
type train | step 5150 | loss 3.2704 1.4952 3.7558 5.9696 | lr 8.9e-04 | norm 112.1702 | dt 0.173
type train | step 5160 | loss 3.2900 1.5157 3.8101 6.0900 | lr 8.9e-04 | norm 100.8308 | dt 0.185
type train | step 5170 | loss 3.2130 1.4699 3.7106 5.9338 | lr 8.9e-04 | norm 95.1773 | dt 0.205
type train | step 5180 | loss 3.1491 1.5334 3.7439 6.0118 | lr 8.9e-04 | norm 101.8201 | dt 0.167
type train | step 5190 | loss 3.2214 1.5122 3.7538 6.0162 | lr 8.9e-04 | norm 117.6755 | dt 0.164
type train | step 5200 | loss 3.1520 1.4904 3.7568 6.0221 | lr 8.9e-04 | norm 107.3050 | dt 0.167
type train | step 5210 | loss 3.2152 1.5230 3.7388 5.9796 | lr 8.9e-04 | norm 103.7160 | dt 0.176
type train | step 5220 | loss 3.1222 1.4987 3.7275 5.9030 | lr 8.9e-04 | norm 117.9471 | dt 0.206
type train | step 5230 | loss 3.1714 1.5257 3.7437 5.9555 | lr 8.8e-04 | norm 131.3515 | dt 0.169
type train | step 5240 | loss 3.1227 1.5220 3.7230 5.8574 | lr 8.8e-04 | norm 114.3630 | dt 0.172
type train | step 5250 | loss 3.0504 1.5341 3.7853 5.9950 | lr 8.8e-04 | norm 110.4602 | dt 0.171
type train | step 5260 | loss 3.1548 1.5985 3.8384 6.1620 | lr 8.8e-04 | norm 126.4431 | dt 0.186
type train | step 5270 | loss 3.1338 1.5281 3.7124 5.9662 | lr 8.8e-04 | norm 131.9523 | dt 0.170
type train | step 5280 | loss 3.0227 1.4918 3.7435 5.8682 | lr 8.8e-04 | norm 121.0524 | dt 0.175
type train | step 5290 | loss 3.1040 1.5668 3.7822 5.9314 | lr 8.8e-04 | norm 104.1752 | dt 0.191
type train | step 5300 | loss 3.0522 1.5235 3.8101 5.8879 | lr 8.8e-04 | norm 102.7974 | dt 0.218
type train | step 5310 | loss 3.0328 1.5249 3.8133 5.9965 | lr 8.8e-04 | norm 136.0027 | dt 0.177
type train | step 5320 | loss 3.0578 1.5297 3.7630 5.9124 | lr 8.8e-04 | norm 110.1478 | dt 0.210
type train | step 5330 | loss 3.0694 1.5371 3.7650 5.9246 | lr 8.8e-04 | norm 114.9710 | dt 0.201
type train | step 5340 | loss 3.0665 1.4998 3.7456 5.7564 | lr 8.8e-04 | norm 107.8638 | dt 0.167
type train | step 5350 | loss 3.0826 1.5507 3.7852 5.9221 | lr 8.8e-04 | norm 112.7303 | dt 0.202
type train | step 5360 | loss 3.0205 1.5468 3.7668 5.8423 | lr 8.8e-04 | norm 105.3093 | dt 0.196
type train | step 5370 | loss 3.0470 1.5317 3.7730 5.8261 | lr 8.8e-04 | norm 106.5247 | dt 0.164
type train | step 5380 | loss 3.0054 1.5441 3.7962 5.8727 | lr 8.8e-04 | norm 118.2820 | dt 0.165
type train | step 5390 | loss 2.9981 1.5369 3.7387 5.7936 | lr 8.8e-04 | norm 107.9898 | dt 0.172
type train | step 5400 | loss 2.9841 1.5250 3.7614 5.7980 | lr 8.8e-04 | norm 121.5289 | dt 0.208
type train | step 5410 | loss 3.0452 1.5420 3.8140 5.9062 | lr 8.8e-04 | norm 111.7725 | dt 0.198
type train | step 5420 | loss 2.9792 1.5257 3.7718 5.8336 | lr 8.8e-04 | norm 132.0532 | dt 0.168
type train | step 5430 | loss 2.9068 1.5552 3.8129 5.9048 | lr 8.8e-04 | norm 118.1981 | dt 0.198
type train | step 5440 | loss 3.0571 1.5697 3.7705 5.9076 | lr 8.7e-04 | norm 115.5136 | dt 0.166
type train | step 5450 | loss 3.0457 1.5556 3.7119 5.8175 | lr 8.7e-04 | norm 110.1059 | dt 0.181
type train | step 5460 | loss 3.0716 1.5400 3.7456 5.7596 | lr 8.7e-04 | norm 110.1156 | dt 0.169
type train | step 5470 | loss 3.0385 1.5646 3.7382 5.8596 | lr 8.7e-04 | norm 131.6118 | dt 0.212
type train | step 5480 | loss 3.0080 1.5198 3.7096 5.7355 | lr 8.7e-04 | norm 106.5400 | dt 0.195
type train | step 5490 | loss 3.0274 1.5373 3.7194 5.8112 | lr 8.7e-04 | norm 114.2716 | dt 0.163
type train | step 5500 | loss 2.9679 1.5440 3.7784 5.8133 | lr 8.7e-04 | norm 134.5997 | dt 0.211
type train | step 5510 | loss 2.9868 1.5452 3.7273 5.7767 | lr 8.7e-04 | norm 110.1424 | dt 0.205
type train | step 5520 | loss 2.9730 1.5022 3.7250 5.6794 | lr 8.7e-04 | norm 96.4505 | dt 0.172
type train | step 5530 | loss 3.1591 1.5685 3.7740 5.9749 | lr 8.7e-04 | norm 98.9536 | dt 0.205
type train | step 5540 | loss 2.9583 1.5034 3.7202 5.6768 | lr 8.7e-04 | norm 109.5945 | dt 0.205
type train | step 5550 | loss 2.9945 1.5021 3.6976 5.7112 | lr 8.7e-04 | norm 108.2066 | dt 0.164
type train | step 5560 | loss 3.0133 1.5022 3.7264 5.7001 | lr 8.7e-04 | norm 106.7317 | dt 0.183
type train | step 5570 | loss 3.0364 1.5474 3.7583 5.9286 | lr 8.7e-04 | norm 146.5632 | dt 0.174
type train | step 5580 | loss 2.9703 1.5086 3.7435 5.7344 | lr 8.7e-04 | norm 129.9640 | dt 0.198
type train | step 5590 | loss 2.8989 1.4841 3.7039 5.6586 | lr 8.7e-04 | norm 106.7885 | dt 0.178
type train | step 5600 | loss 3.0150 1.4995 3.7298 5.6523 | lr 8.7e-04 | norm 96.3039 | dt 0.176
type train | step 5610 | loss 3.0214 1.5453 3.7631 5.7950 | lr 8.7e-04 | norm 103.7719 | dt 0.180
type train | step 5620 | loss 2.9859 1.5389 3.7625 5.8186 | lr 8.7e-04 | norm 117.0217 | dt 0.193
type train | step 5630 | loss 3.1435 1.6053 3.7581 5.8705 | lr 8.6e-04 | norm 111.9259 | dt 0.213
type train | step 5640 | loss 3.0139 1.5168 3.7417 5.7112 | lr 8.6e-04 | norm 115.5812 | dt 0.176
type train | step 5650 | loss 2.9839 1.5079 3.7068 5.8037 | lr 8.6e-04 | norm 107.0813 | dt 0.170
type train | step 5660 | loss 2.9677 1.4756 3.6640 5.6490 | lr 8.6e-04 | norm 121.0132 | dt 0.188
type train | step 5670 | loss 3.1109 1.5276 3.7585 5.7984 | lr 8.6e-04 | norm 111.3289 | dt 0.198
type train | step 5680 | loss 3.0427 1.4728 3.7642 5.8063 | lr 8.6e-04 | norm 127.3431 | dt 0.199
type train | step 5690 | loss 3.0861 1.5176 3.7737 5.8444 | lr 8.6e-04 | norm 127.0745 | dt 0.161
type train | step 5700 | loss 3.0244 1.4813 3.7638 5.7826 | lr 8.6e-04 | norm 114.3643 | dt 0.195
type train | step 5710 | loss 2.9688 1.4799 3.7649 5.7011 | lr 8.6e-04 | norm 113.7574 | dt 0.162
type train | step 5720 | loss 2.9914 1.4633 3.6986 5.6219 | lr 8.6e-04 | norm 109.6067 | dt 0.172
type train | step 5730 | loss 3.0904 1.5106 3.8061 5.8004 | lr 8.6e-04 | norm 125.1715 | dt 0.203
type train | step 5740 | loss 2.9713 1.4735 3.7849 5.7204 | lr 8.6e-04 | norm 116.4809 | dt 0.212
type train | step 5750 | loss 2.9827 1.5089 3.7610 5.7905 | lr 8.6e-04 | norm 131.6812 | dt 0.165
type train | step 5760 | loss 3.0502 1.5034 3.7435 5.6922 | lr 8.6e-04 | norm 105.1594 | dt 0.223
type train | step 5770 | loss 3.1818 1.5051 3.8091 5.8522 | lr 8.6e-04 | norm 124.1414 | dt 0.204
type train | step 5780 | loss 3.1667 1.4860 3.7500 5.6493 | lr 8.6e-04 | norm 98.2404 | dt 0.208
type train | step 5790 | loss 3.0794 1.5462 3.7654 5.7690 | lr 8.6e-04 | norm 109.4306 | dt 0.189
type train | step 5800 | loss 3.0718 1.5222 3.7883 5.7497 | lr 8.6e-04 | norm 119.0610 | dt 0.196
type train | step 5810 | loss 3.0055 1.5077 3.7782 5.8068 | lr 8.6e-04 | norm 107.8477 | dt 0.177
type train | step 5820 | loss 3.0728 1.4981 3.7498 5.7118 | lr 8.5e-04 | norm 109.1145 | dt 0.179
type train | step 5830 | loss 2.9873 1.4772 3.7266 5.6705 | lr 8.5e-04 | norm 112.6951 | dt 0.196
type train | step 5840 | loss 2.9776 1.4945 3.7309 5.7524 | lr 8.5e-04 | norm 116.9946 | dt 0.194
type train | step 5850 | loss 3.0622 1.4790 3.7147 5.6620 | lr 8.5e-04 | norm 123.1230 | dt 0.181
type train | step 5860 | loss 3.0720 1.5018 3.7951 5.7411 | lr 8.5e-04 | norm 96.8403 | dt 0.167
type train | step 5870 | loss 3.1480 1.5441 3.8461 5.9124 | lr 8.5e-04 | norm 125.5679 | dt 0.207
type train | step 5880 | loss 3.0375 1.4961 3.7070 5.7186 | lr 8.5e-04 | norm 116.5262 | dt 0.171
type train | step 5890 | loss 3.1023 1.4748 3.7646 5.6592 | lr 8.5e-04 | norm 106.0738 | dt 0.199
type train | step 5900 | loss 3.0433 1.5354 3.7621 5.7596 | lr 8.5e-04 | norm 125.2398 | dt 0.185
type train | step 5910 | loss 3.0318 1.5070 3.7738 5.7571 | lr 8.5e-04 | norm 114.4018 | dt 0.169
type train | step 5920 | loss 3.0248 1.5146 3.7839 5.7628 | lr 8.5e-04 | norm 101.7029 | dt 0.207
type train | step 5930 | loss 3.0284 1.5117 3.7465 5.7144 | lr 8.5e-04 | norm 104.9073 | dt 0.206
type train | step 5940 | loss 3.0330 1.5341 3.7536 5.7471 | lr 8.5e-04 | norm 111.3137 | dt 0.178
type train | step 5950 | loss 3.0144 1.4685 3.7236 5.6698 | lr 8.5e-04 | norm 122.4064 | dt 0.176
type train | step 5960 | loss 3.0756 1.5224 3.7672 5.8186 | lr 8.5e-04 | norm 140.4497 | dt 0.189
type train | step 5970 | loss 3.0265 1.5099 3.7379 5.7330 | lr 8.5e-04 | norm 132.3104 | dt 0.195
type train | step 5980 | loss 3.0245 1.4971 3.7656 5.7141 | lr 8.5e-04 | norm 112.4743 | dt 0.179
type train | step 5990 | loss 3.0883 1.5163 3.8106 5.8079 | lr 8.5e-04 | norm 146.1306 | dt 0.196
type train | step 6000 | loss 3.0084 1.5106 3.7173 5.7154 | lr 8.4e-04 | norm 121.3341 | dt 0.202
type train | step 6010 | loss 2.9953 1.5080 3.7211 5.7283 | lr 8.4e-04 | norm 141.7843 | dt 0.195
type train | step 6020 | loss 3.1372 1.5354 3.7367 5.8515 | lr 8.4e-04 | norm 122.8899 | dt 0.188
type train | step 6030 | loss 3.0786 1.4960 3.7070 5.6967 | lr 8.4e-04 | norm 108.0102 | dt 0.198
type train | step 6040 | loss 3.0488 1.5092 3.7806 5.7852 | lr 8.4e-04 | norm 132.0718 | dt 0.196
type train | step 6050 | loss 3.1749 1.5165 3.7535 5.7934 | lr 8.4e-04 | norm 100.6401 | dt 0.198
type train | step 6060 | loss 3.1436 1.4973 3.7649 5.7602 | lr 8.4e-04 | norm 120.3919 | dt 0.208
type train | step 6070 | loss 3.1457 1.4741 3.7329 5.7044 | lr 8.4e-04 | norm 126.7979 | dt 0.169
type train | step 6080 | loss 3.1376 1.5221 3.7224 5.7740 | lr 8.4e-04 | norm 107.5381 | dt 0.199
type train | step 6090 | loss 3.1106 1.4683 3.6946 5.6265 | lr 8.4e-04 | norm 96.9182 | dt 0.203
type train | step 6100 | loss 3.0511 1.4919 3.7251 5.7236 | lr 8.4e-04 | norm 128.4131 | dt 0.167
type train | step 6110 | loss 3.0742 1.4737 3.7249 5.7479 | lr 8.4e-04 | norm 118.9576 | dt 0.170
type train | step 6120 | loss 3.0391 1.4756 3.6908 5.7505 | lr 8.4e-04 | norm 134.0656 | dt 0.203
type train | step 6130 | loss 3.1079 1.4363 3.6692 5.6150 | lr 8.4e-04 | norm 107.5626 | dt 0.198
type train | step 6140 | loss 3.1698 1.5104 3.7872 5.8865 | lr 8.4e-04 | norm 127.5046 | dt 0.179
type train | step 6150 | loss 3.0281 1.4437 3.7082 5.6308 | lr 8.4e-04 | norm 101.8928 | dt 0.199
type train | step 6160 | loss 2.9642 1.4380 3.6749 5.7605 | lr 8.4e-04 | norm 153.7363 | dt 0.195
type train | step 6170 | loss 2.9335 1.4475 3.7104 5.7042 | lr 8.4e-04 | norm 130.8869 | dt 0.196
type train | step 6180 | loss 3.0085 1.4841 3.7244 5.8314 | lr 8.3e-04 | norm 111.6057 | dt 0.168
type train | step 6190 | loss 3.0189 1.4281 3.7278 5.7153 | lr 8.3e-04 | norm 107.6163 | dt 0.194
type train | step 6200 | loss 2.9211 1.4210 3.7160 5.6459 | lr 8.3e-04 | norm 127.3637 | dt 0.174
type train | step 6210 | loss 3.0653 1.4490 3.7188 5.6043 | lr 8.3e-04 | norm 113.1833 | dt 0.180
type train | step 6220 | loss 3.0754 1.4896 3.7632 5.7303 | lr 8.3e-04 | norm 114.3544 | dt 0.166
type train | step 6230 | loss 3.0533 1.4358 3.7686 5.7312 | lr 8.3e-04 | norm 108.7604 | dt 0.198
type train | step 6240 | loss 3.1669 1.5375 3.8095 5.8491 | lr 8.3e-04 | norm 99.7244 | dt 0.193
type train | step 6250 | loss 3.0173 1.4697 3.7680 5.7160 | lr 8.3e-04 | norm 100.0037 | dt 0.161
type train | step 6260 | loss 3.0130 1.4686 3.7157 5.7497 | lr 8.3e-04 | norm 128.9765 | dt 0.200
type train | step 6270 | loss 2.9633 1.4458 3.6531 5.6260 | lr 8.3e-04 | norm 118.2798 | dt 0.173
type train | step 6280 | loss 3.0089 1.4903 3.6933 5.7941 | lr 8.3e-04 | norm 135.5052 | dt 0.196
type train | step 6290 | loss 2.9426 1.4519 3.7495 5.7869 | lr 8.3e-04 | norm 113.7337 | dt 0.167
type train | step 6300 | loss 3.0457 1.5061 3.7631 5.8292 | lr 8.3e-04 | norm 121.4782 | dt 0.164
type train | step 6310 | loss 2.9806 1.4902 3.7490 5.7417 | lr 8.3e-04 | norm 117.2486 | dt 0.204
type train | step 6320 | loss 2.9443 1.4801 3.7555 5.6935 | lr 8.3e-04 | norm 110.2189 | dt 0.165
type train | step 6330 | loss 2.9213 1.4761 3.6771 5.5749 | lr 8.3e-04 | norm 100.0883 | dt 0.196
type train | step 6340 | loss 2.9870 1.5162 3.8020 5.7687 | lr 8.3e-04 | norm 99.6106 | dt 0.167
type train | step 6350 | loss 2.9561 1.4687 3.7320 5.7048 | lr 8.2e-04 | norm 129.6814 | dt 0.198
type train | step 6360 | loss 2.8712 1.4924 3.7419 5.7733 | lr 8.2e-04 | norm 102.5361 | dt 0.196
type train | step 6370 | loss 2.9214 1.4828 3.7252 5.7350 | lr 8.2e-04 | norm 116.1644 | dt 0.196
type train | step 6380 | loss 2.9379 1.4934 3.8043 5.8965 | lr 8.2e-04 | norm 143.4213 | dt 0.167
type train | step 6390 | loss 2.9291 1.4684 3.6971 5.6638 | lr 8.2e-04 | norm 111.3512 | dt 0.170
type train | step 6400 | loss 2.8941 1.5113 3.7802 5.8272 | lr 8.2e-04 | norm 137.2085 | dt 0.166
type train | step 6410 | loss 2.8870 1.4828 3.7677 5.8231 | lr 8.2e-04 | norm 131.7308 | dt 0.165
type train | step 6420 | loss 2.8275 1.4706 3.7711 5.7983 | lr 8.2e-04 | norm 109.9485 | dt 0.173
type train | step 6430 | loss 2.8860 1.4863 3.7283 5.8051 | lr 8.2e-04 | norm 117.8508 | dt 0.202
type train | step 6440 | loss 2.7758 1.4395 3.6975 5.6762 | lr 8.2e-04 | norm 110.2465 | dt 0.202
type train | step 6450 | loss 2.8779 1.4583 3.7199 5.7423 | lr 8.2e-04 | norm 106.1318 | dt 0.180
type train | step 6460 | loss 2.8703 1.4353 3.7083 5.6568 | lr 8.2e-04 | norm 109.2648 | dt 0.178
type train | step 6470 | loss 2.7987 1.4357 3.8167 5.7874 | lr 8.2e-04 | norm 119.6050 | dt 0.215
type train | step 6480 | loss 2.8457 1.4821 3.8495 5.9941 | lr 8.2e-04 | norm 121.7124 | dt 0.182
type train | step 6490 | loss 2.8675 1.4338 3.6672 5.8134 | lr 8.2e-04 | norm 121.0725 | dt 0.172
type train | step 6500 | loss 2.8273 1.4224 3.7554 5.7697 | lr 8.2e-04 | norm 115.9237 | dt 0.221
type train | step 6510 | loss 2.8363 1.4627 3.7872 5.7943 | lr 8.2e-04 | norm 104.1012 | dt 0.168
type train | step 6520 | loss 2.8490 1.4368 3.7961 5.7829 | lr 8.1e-04 | norm 100.2340 | dt 0.183
type train | step 6530 | loss 2.8063 1.4359 3.7913 5.8555 | lr 8.1e-04 | norm 116.4658 | dt 0.164
type train | step 6540 | loss 2.8433 1.4197 3.7597 5.8274 | lr 8.1e-04 | norm 105.2694 | dt 0.173
type train | step 6550 | loss 2.9081 1.4316 3.7705 5.8642 | lr 8.1e-04 | norm 100.0734 | dt 0.187
type train | step 6560 | loss 2.8285 1.3927 3.7213 5.7530 | lr 8.1e-04 | norm 129.6931 | dt 0.212
type train | step 6570 | loss 2.8489 1.4599 3.7741 5.8458 | lr 8.1e-04 | norm 113.7473 | dt 0.167
type train | step 6580 | loss 2.8167 1.4424 3.7595 5.8561 | lr 8.1e-04 | norm 112.8148 | dt 0.199
type train | step 6590 | loss 2.7651 1.4206 3.7487 5.7853 | lr 8.1e-04 | norm 122.4963 | dt 0.201
type train | step 6600 | loss 2.8477 1.4414 3.7564 5.9088 | lr 8.1e-04 | norm 117.6303 | dt 0.172
type train | step 6610 | loss 2.8210 1.4488 3.7234 5.8621 | lr 8.1e-04 | norm 119.5394 | dt 0.182
type train | step 6620 | loss 2.8154 1.4214 3.7525 5.8454 | lr 8.1e-04 | norm 115.3370 | dt 0.177
type train | step 6630 | loss 2.8862 1.4443 3.7823 6.0059 | lr 8.1e-04 | norm 99.6303 | dt 0.168
type train | step 6640 | loss 2.8056 1.4104 3.7438 5.7745 | lr 8.1e-04 | norm 104.8820 | dt 0.170
type train | step 6650 | loss 2.8002 1.4235 3.7396 5.8915 | lr 8.1e-04 | norm 101.3590 | dt 0.202
type train | step 6660 | loss 2.9174 1.4780 3.7485 6.0254 | lr 8.1e-04 | norm 107.7787 | dt 0.168
type train | step 6670 | loss 2.8099 1.4501 3.7304 5.8986 | lr 8.1e-04 | norm 106.7560 | dt 0.201
type train | step 6680 | loss 2.8360 1.4128 3.7504 5.8692 | lr 8.1e-04 | norm 110.5851 | dt 0.169
type train | step 6690 | loss 2.8574 1.4327 3.7520 5.9688 | lr 8.0e-04 | norm 112.9969 | dt 0.207
type train | step 6700 | loss 2.8265 1.3827 3.7194 5.8780 | lr 8.0e-04 | norm 105.8002 | dt 0.205
type train | step 6710 | loss 2.8383 1.3919 3.7447 5.9081 | lr 8.0e-04 | norm 120.8875 | dt 0.180
type train | step 6720 | loss 2.7768 1.4036 3.7184 5.9239 | lr 8.0e-04 | norm 120.3003 | dt 0.187
type train | step 6730 | loss 2.7838 1.4041 3.7071 5.9494 | lr 8.0e-04 | norm 117.5976 | dt 0.204
type train | step 6740 | loss 2.7914 1.3664 3.7396 5.8745 | lr 8.0e-04 | norm 101.9290 | dt 0.184
type train | step 6750 | loss 2.8578 1.4583 3.7965 6.0755 | lr 8.0e-04 | norm 98.8061 | dt 0.170
type train | step 6760 | loss 2.7944 1.3989 3.7133 5.8582 | lr 8.0e-04 | norm 112.3237 | dt 0.174
type train | step 6770 | loss 2.8084 1.3592 3.7193 5.8766 | lr 8.0e-04 | norm 118.3144 | dt 0.181
type train | step 6780 | loss 2.8601 1.3834 3.7224 5.8899 | lr 8.0e-04 | norm 119.9495 | dt 0.204
type train | step 6790 | loss 2.8756 1.4230 3.7437 6.0825 | lr 8.0e-04 | norm 132.6384 | dt 0.172
type train | step 6800 | loss 2.8371 1.3808 3.7387 5.9043 | lr 8.0e-04 | norm 109.5804 | dt 0.212
type train | step 6810 | loss 2.7928 1.3586 3.7155 5.8834 | lr 8.0e-04 | norm 118.5022 | dt 0.200
type train | step 6820 | loss 2.8537 1.3837 3.7350 5.8814 | lr 8.0e-04 | norm 113.6567 | dt 0.166
type train | step 6830 | loss 2.8801 1.4256 3.7466 6.0093 | lr 8.0e-04 | norm 119.4651 | dt 0.176
type train | step 6840 | loss 2.8312 1.4000 3.7687 6.0217 | lr 8.0e-04 | norm 114.4560 | dt 0.169
type train | step 6850 | loss 2.9463 1.4893 3.8143 6.1449 | lr 7.9e-04 | norm 127.3977 | dt 0.213
type train | step 6860 | loss 2.8287 1.4082 3.7771 6.0138 | lr 7.9e-04 | norm 113.3347 | dt 0.199
type train | step 6870 | loss 2.8414 1.4183 3.7600 5.9554 | lr 7.9e-04 | norm 118.1411 | dt 0.197
type train | step 6880 | loss 2.8626 1.3931 3.6877 5.8356 | lr 7.9e-04 | norm 114.1374 | dt 0.170
type train | step 6890 | loss 2.8480 1.4402 3.7590 5.9878 | lr 7.9e-04 | norm 95.2489 | dt 0.191
type train | step 6900 | loss 2.7941 1.3823 3.7711 6.0270 | lr 7.9e-04 | norm 112.0902 | dt 0.168
type train | step 6910 | loss 2.8845 1.4346 3.7607 6.1155 | lr 7.9e-04 | norm 136.4431 | dt 0.166
type train | step 6920 | loss 2.8900 1.4209 3.7481 6.0277 | lr 7.9e-04 | norm 122.3076 | dt 0.206
type train | step 6930 | loss 2.8808 1.4024 3.7581 5.9416 | lr 7.9e-04 | norm 118.9901 | dt 0.182
type train | step 6940 | loss 2.9067 1.3915 3.6669 5.8665 | lr 7.9e-04 | norm 115.9167 | dt 0.188
type train | step 6950 | loss 2.9183 1.4374 3.7358 6.0314 | lr 7.9e-04 | norm 121.2913 | dt 0.220
type train | step 6960 | loss 2.8561 1.3746 3.7573 5.9668 | lr 7.9e-04 | norm 105.7621 | dt 0.204
type train | step 6970 | loss 2.7456 1.4064 3.7623 5.9825 | lr 7.9e-04 | norm 106.4856 | dt 0.168
type train | step 6980 | loss 2.8116 1.4081 3.7404 6.0150 | lr 7.9e-04 | norm 121.1631 | dt 0.202
type train | step 6990 | loss 2.8657 1.4197 3.8100 6.1022 | lr 7.9e-04 | norm 103.8290 | dt 0.174
type train | step 7000 | loss 2.8702 1.4160 3.7249 5.9328 | lr 7.9e-04 | norm 105.3474 | dt 0.175
type train | step 7010 | loss 2.8298 1.4437 3.7644 6.0128 | lr 7.8e-04 | norm 117.6853 | dt 0.196
type train | step 7020 | loss 2.8819 1.4087 3.7659 6.0829 | lr 7.8e-04 | norm 129.1662 | dt 0.215
type train | step 7030 | loss 2.8807 1.4069 3.7651 6.0410 | lr 7.8e-04 | norm 120.5176 | dt 0.179
type train | step 7040 | loss 2.8444 1.4291 3.7187 6.0207 | lr 7.8e-04 | norm 112.8389 | dt 0.198
type train | step 7050 | loss 2.8785 1.4016 3.7226 5.9512 | lr 7.8e-04 | norm 113.6291 | dt 0.200
type train | step 7060 | loss 2.8802 1.4020 3.7077 5.9704 | lr 7.8e-04 | norm 104.8651 | dt 0.197
type train | step 7070 | loss 2.9197 1.3932 3.6900 5.9065 | lr 7.8e-04 | norm 109.1511 | dt 0.189
type train | step 7080 | loss 2.8918 1.3928 3.7849 5.9871 | lr 7.8e-04 | norm 95.7616 | dt 0.166
type train | step 7090 | loss 2.9851 1.4546 3.8278 6.1897 | lr 7.8e-04 | norm 114.1288 | dt 0.184
type train | step 7100 | loss 2.8745 1.4064 3.6852 5.9567 | lr 7.8e-04 | norm 111.6821 | dt 0.168
type train | step 7110 | loss 2.8694 1.3854 3.7273 5.9319 | lr 7.8e-04 | norm 102.6016 | dt 0.201
type train | step 7120 | loss 2.8942 1.4134 3.7488 6.0311 | lr 7.8e-04 | norm 108.1689 | dt 0.201
type train | step 7130 | loss 2.8443 1.4172 3.7819 5.9574 | lr 7.8e-04 | norm 108.5426 | dt 0.211
type train | step 7140 | loss 2.8609 1.4219 3.7717 5.9728 | lr 7.8e-04 | norm 99.8146 | dt 0.167
type train | step 7150 | loss 2.8941 1.4009 3.7492 5.9578 | lr 7.8e-04 | norm 102.4161 | dt 0.173
type train | step 7160 | loss 2.8701 1.4380 3.7563 5.9727 | lr 7.8e-04 | norm 107.6440 | dt 0.201
type train | step 7170 | loss 2.8578 1.3755 3.7381 5.8828 | lr 7.7e-04 | norm 130.2965 | dt 0.199
type train | step 7180 | loss 2.8470 1.4417 3.7827 6.0243 | lr 7.7e-04 | norm 131.0835 | dt 0.200
type train | step 7190 | loss 2.8228 1.4434 3.7589 5.9321 | lr 7.7e-04 | norm 113.3646 | dt 0.165
type train | step 7200 | loss 2.8204 1.4115 3.7571 5.9267 | lr 7.7e-04 | norm 108.9693 | dt 0.167
type train | step 7210 | loss 2.9164 1.4251 3.7855 5.9796 | lr 7.7e-04 | norm 107.8430 | dt 0.172
type train | step 7220 | loss 2.8017 1.3967 3.7121 5.9363 | lr 7.7e-04 | norm 111.8990 | dt 0.177
type train | step 7230 | loss 2.8167 1.4009 3.6802 5.9460 | lr 7.7e-04 | norm 121.4008 | dt 0.201
type train | step 7240 | loss 2.8765 1.4357 3.7554 6.0297 | lr 7.7e-04 | norm 100.1117 | dt 0.164
type train | step 7250 | loss 2.8298 1.3919 3.7054 5.8917 | lr 7.7e-04 | norm 113.1946 | dt 0.203
type train | step 7260 | loss 2.7843 1.4095 3.7947 5.9672 | lr 7.7e-04 | norm 116.0947 | dt 0.176
type train | step 7270 | loss 2.8528 1.4169 3.7755 6.0403 | lr 7.7e-04 | norm 109.6412 | dt 0.194
type train | step 7280 | loss 2.8070 1.4184 3.7491 5.9858 | lr 7.7e-04 | norm 125.9998 | dt 0.162
type train | step 7290 | loss 2.7873 1.3994 3.7511 5.8536 | lr 7.7e-04 | norm 108.6694 | dt 0.213
type train | step 7300 | loss 2.7999 1.4294 3.7364 6.0037 | lr 7.7e-04 | norm 140.6039 | dt 0.200
type train | step 7310 | loss 2.7656 1.3854 3.6909 5.8678 | lr 7.7e-04 | norm 127.8693 | dt 0.168
type train | step 7320 | loss 2.8490 1.4090 3.7456 5.8952 | lr 7.7e-04 | norm 105.2965 | dt 0.177
type train | step 7330 | loss 2.8030 1.4006 3.7316 5.9272 | lr 7.6e-04 | norm 105.8427 | dt 0.183
type train | step 7340 | loss 2.8180 1.3990 3.7132 5.9207 | lr 7.6e-04 | norm 106.8476 | dt 0.181
type train | step 7350 | loss 2.7877 1.3729 3.7100 5.8348 | lr 7.6e-04 | norm 125.2096 | dt 0.162
type train | step 7360 | loss 2.7942 1.4400 3.7779 6.0753 | lr 7.6e-04 | norm 126.8097 | dt 0.200
type train | step 7370 | loss 2.7827 1.3807 3.7062 5.7660 | lr 7.6e-04 | norm 107.3141 | dt 0.166
type train | step 7380 | loss 2.7783 1.3667 3.7139 5.8184 | lr 7.6e-04 | norm 135.7737 | dt 0.168
type train | step 7390 | loss 2.7887 1.3891 3.7292 5.8028 | lr 7.6e-04 | norm 118.7153 | dt 0.167
type train | step 7400 | loss 2.7508 1.4385 3.7449 5.9703 | lr 7.6e-04 | norm 120.6752 | dt 0.213
type train | step 7410 | loss 2.6665 1.4207 3.7383 5.8029 | lr 7.6e-04 | norm 137.7887 | dt 0.170
type train | step 7420 | loss 2.6177 1.3923 3.6687 5.7370 | lr 7.6e-04 | norm 118.6511 | dt 0.181
type train | step 7430 | loss 2.6769 1.4164 3.7340 5.6954 | lr 7.6e-04 | norm 104.6601 | dt 0.184
type train | step 7440 | loss 2.7548 1.4391 3.7606 5.8485 | lr 7.6e-04 | norm 118.4081 | dt 0.211
type train | step 7450 | loss 2.7319 1.3830 3.7364 5.8626 | lr 7.6e-04 | norm 146.3561 | dt 0.182
type train | step 7460 | loss 2.8350 1.4880 3.8096 5.9708 | lr 7.6e-04 | norm 131.4366 | dt 0.165
type train | step 7470 | loss 2.7190 1.4080 3.7698 5.7809 | lr 7.6e-04 | norm 117.6364 | dt 0.204
type train | step 7480 | loss 2.6962 1.4005 3.7344 5.7814 | lr 7.5e-04 | norm 123.3318 | dt 0.197
type train | step 7490 | loss 2.6515 1.3923 3.6699 5.6134 | lr 7.5e-04 | norm 122.4768 | dt 0.196
type train | step 7500 | loss 2.7315 1.4312 3.7284 5.7872 | lr 7.5e-04 | norm 122.8500 | dt 0.185
type train | step 7510 | loss 2.6730 1.3799 3.7647 5.7959 | lr 7.5e-04 | norm 106.7031 | dt 0.166
type train | step 7520 | loss 2.7383 1.4337 3.7504 5.8626 | lr 7.5e-04 | norm 148.1970 | dt 0.199
type train | step 7530 | loss 2.6477 1.4026 3.7007 5.7657 | lr 7.5e-04 | norm 118.6013 | dt 0.179
type train | step 7540 | loss 2.5587 1.3951 3.7576 5.6998 | lr 7.5e-04 | norm 131.9639 | dt 0.223
type train | step 7550 | loss 2.5653 1.3942 3.6864 5.5863 | lr 7.5e-04 | norm 111.0940 | dt 0.234
type train | step 7560 | loss 2.6273 1.4517 3.7821 5.7532 | lr 7.5e-04 | norm 104.5561 | dt 0.162
type train | step 7570 | loss 2.6163 1.4001 3.7390 5.6834 | lr 7.5e-04 | norm 115.4623 | dt 0.166
type train | step 7580 | loss 2.5940 1.4308 3.7299 5.6892 | lr 7.5e-04 | norm 110.3748 | dt 0.199
type train | step 7590 | loss 2.5984 1.4284 3.7233 5.6834 | lr 7.5e-04 | norm 117.5120 | dt 0.169
type train | step 7600 | loss 2.5619 1.4432 3.7634 5.7763 | lr 7.5e-04 | norm 109.2792 | dt 0.231
type train | step 7610 | loss 2.5721 1.4134 3.7121 5.5688 | lr 7.5e-04 | norm 102.1165 | dt 0.219
type train | step 7620 | loss 2.6649 1.4683 3.7608 5.6534 | lr 7.5e-04 | norm 119.5702 | dt 0.214
type train | step 7630 | loss 2.5783 1.4420 3.7566 5.6786 | lr 7.4e-04 | norm 108.2448 | dt 0.201
type train | step 7640 | loss 2.5292 1.4291 3.7605 5.6943 | lr 7.4e-04 | norm 129.3259 | dt 0.205
type train | step 7650 | loss 2.5014 1.4347 3.7256 5.6731 | lr 7.4e-04 | norm 135.1413 | dt 0.202
type train | step 7660 | loss 2.5401 1.4028 3.6884 5.5769 | lr 7.4e-04 | norm 141.9996 | dt 0.169
type train | step 7670 | loss 2.6180 1.4090 3.6962 5.5949 | lr 7.4e-04 | norm 145.9360 | dt 0.199
type train | step 7680 | loss 2.5654 1.3938 3.7322 5.5261 | lr 7.4e-04 | norm 139.6333 | dt 0.177
type train | step 7690 | loss 2.5849 1.4099 3.7827 5.6187 | lr 7.4e-04 | norm 125.4228 | dt 0.175
type train | step 7700 | loss 2.6340 1.4556 3.8534 5.7407 | lr 7.4e-04 | norm 112.2314 | dt 0.199
type train | step 7710 | loss 2.5438 1.4144 3.7060 5.5805 | lr 7.4e-04 | norm 115.5406 | dt 0.201
type train | step 7720 | loss 2.5383 1.3995 3.7252 5.5455 | lr 7.4e-04 | norm 140.5470 | dt 0.179
type train | step 7730 | loss 2.5127 1.4270 3.7636 5.6003 | lr 7.4e-04 | norm 131.7465 | dt 0.200
type train | step 7740 | loss 2.5016 1.4044 3.7728 5.4900 | lr 7.4e-04 | norm 100.0137 | dt 0.165
type train | step 7750 | loss 2.5115 1.4119 3.7477 5.5783 | lr 7.4e-04 | norm 131.4581 | dt 0.206
type train | step 7760 | loss 2.5306 1.4041 3.7360 5.5190 | lr 7.4e-04 | norm 112.5814 | dt 0.190
type train | step 7770 | loss 2.5323 1.4171 3.7508 5.6010 | lr 7.4e-04 | norm 156.2716 | dt 0.192
type train | step 7780 | loss 2.5161 1.3710 3.6920 5.4279 | lr 7.3e-04 | norm 140.2242 | dt 0.221
type train | step 7790 | loss 2.6121 1.4315 3.7654 5.5194 | lr 7.3e-04 | norm 117.0192 | dt 0.219
type train | step 7800 | loss 2.5928 1.4213 3.7301 5.4862 | lr 7.3e-04 | norm 116.9353 | dt 0.168
type train | step 7810 | loss 2.5639 1.4109 3.7418 5.4358 | lr 7.3e-04 | norm 103.9528 | dt 0.187
type train | step 7820 | loss 2.5929 1.4416 3.7574 5.5367 | lr 7.3e-04 | norm 121.0983 | dt 0.213
type train | step 7830 | loss 2.5468 1.4243 3.7159 5.5138 | lr 7.3e-04 | norm 145.9269 | dt 0.174
type train | step 7840 | loss 2.5268 1.4157 3.7153 5.4579 | lr 7.3e-04 | norm 124.1619 | dt 0.176
type train | step 7850 | loss 2.5477 1.4555 3.7377 5.5918 | lr 7.3e-04 | norm 137.6104 | dt 0.172
type train | step 7860 | loss 2.4772 1.4025 3.7250 5.4247 | lr 7.3e-04 | norm 131.1256 | dt 0.201
type train | step 7870 | loss 2.5484 1.4238 3.7717 5.4776 | lr 7.3e-04 | norm 122.5150 | dt 0.205
type train | step 7880 | loss 2.5187 1.4480 3.7762 5.5114 | lr 7.3e-04 | norm 133.8710 | dt 0.174
type train | step 7890 | loss 2.4659 1.4372 3.7213 5.5373 | lr 7.3e-04 | norm 137.1917 | dt 0.224
type train | step 7900 | loss 2.4491 1.3931 3.7579 5.3510 | lr 7.3e-04 | norm 121.2135 | dt 0.195
type train | step 7910 | loss 2.4849 1.4268 3.7429 5.4957 | lr 7.3e-04 | norm 135.7248 | dt 0.214
type train | step 7920 | loss 2.4114 1.3904 3.6935 5.3562 | lr 7.3e-04 | norm 110.3861 | dt 0.191
type train | step 7930 | loss 2.4496 1.4196 3.7190 5.4036 | lr 7.2e-04 | norm 117.5625 | dt 0.176
type train | step 7940 | loss 2.3894 1.4035 3.7352 5.4088 | lr 7.2e-04 | norm 109.7288 | dt 0.196
type train | step 7950 | loss 2.4748 1.4186 3.7000 5.4270 | lr 7.2e-04 | norm 123.2757 | dt 0.189
type train | step 7960 | loss 2.4105 1.3920 3.7120 5.3324 | lr 7.2e-04 | norm 136.5956 | dt 0.173
type train | step 7970 | loss 2.5400 1.4645 3.7754 5.5255 | lr 7.2e-04 | norm 110.4039 | dt 0.216
type train | step 7980 | loss 2.3731 1.4135 3.7029 5.2651 | lr 7.2e-04 | norm 123.9501 | dt 0.184
type train | step 7990 | loss 2.3394 1.3995 3.7185 5.3485 | lr 7.2e-04 | norm 142.3115 | dt 0.193
type train | step 8000 | loss 2.3520 1.4201 3.7221 5.3363 | lr 7.2e-04 | norm 136.3508 | dt 0.221
type train | step 8010 | loss 2.4282 1.4618 3.7495 5.4948 | lr 7.2e-04 | norm 123.5099 | dt 0.230
type train | step 8020 | loss 2.4329 1.4160 3.7127 5.3064 | lr 7.2e-04 | norm 112.3336 | dt 0.205
type train | step 8030 | loss 2.3327 1.4013 3.6680 5.2969 | lr 7.2e-04 | norm 125.6610 | dt 0.213
type train | step 8040 | loss 2.3825 1.4062 3.6670 5.2862 | lr 7.2e-04 | norm 115.1377 | dt 0.169
type train | step 8050 | loss 2.3576 1.4395 3.7537 5.4333 | lr 7.2e-04 | norm 130.1139 | dt 0.205
type train | step 8060 | loss 2.3955 1.4087 3.7592 5.3786 | lr 7.2e-04 | norm 117.0706 | dt 0.204
type train | step 8070 | loss 2.4529 1.5041 3.8086 5.5016 | lr 7.2e-04 | norm 123.2018 | dt 0.163
type train | step 8080 | loss 2.4422 1.4523 3.7436 5.3760 | lr 7.1e-04 | norm 149.4193 | dt 0.171
type train | step 8090 | loss 2.3185 1.4265 3.7015 5.3610 | lr 7.1e-04 | norm 122.9692 | dt 0.177
type train | step 8100 | loss 2.3319 1.3967 3.7139 5.2116 | lr 7.1e-04 | norm 117.3957 | dt 0.183
type train | step 8110 | loss 2.4025 1.4625 3.7347 5.3731 | lr 7.1e-04 | norm 103.9791 | dt 0.188
type train | step 8120 | loss 2.3205 1.4063 3.7380 5.4762 | lr 7.1e-04 | norm 150.5610 | dt 0.202
type train | step 8130 | loss 2.4024 1.4525 3.7425 5.4702 | lr 7.1e-04 | norm 146.6324 | dt 0.183
type train | step 8140 | loss 2.3621 1.4376 3.7364 5.3955 | lr 7.1e-04 | norm 131.6909 | dt 0.177
type train | step 8150 | loss 2.3427 1.4294 3.7493 5.3027 | lr 7.1e-04 | norm 123.4421 | dt 0.206
type train | step 8160 | loss 2.3228 1.4031 3.6765 5.2294 | lr 7.1e-04 | norm 124.8028 | dt 0.182
type train | step 8170 | loss 2.3922 1.4510 3.7733 5.4310 | lr 7.1e-04 | norm 143.4059 | dt 0.165
type train | step 8180 | loss 2.3916 1.4223 3.7171 5.3425 | lr 7.1e-04 | norm 142.8205 | dt 0.212
type train | step 8190 | loss 2.3741 1.4379 3.7224 5.3654 | lr 7.1e-04 | norm 116.1704 | dt 0.207
type train | step 8200 | loss 2.3620 1.4367 3.7038 5.3400 | lr 7.1e-04 | norm 142.8109 | dt 0.206
type train | step 8210 | loss 2.3904 1.4486 3.7464 5.4222 | lr 7.1e-04 | norm 116.2712 | dt 0.195
type train | step 8220 | loss 2.3509 1.4166 3.7188 5.2659 | lr 7.1e-04 | norm 126.7980 | dt 0.175
type train | step 8230 | loss 2.3489 1.4684 3.7538 5.3515 | lr 7.0e-04 | norm 125.8014 | dt 0.199
type train | step 8240 | loss 2.3882 1.4309 3.7567 5.3982 | lr 7.0e-04 | norm 122.3059 | dt 0.199
type train | step 8250 | loss 2.3161 1.4225 3.7529 5.4080 | lr 7.0e-04 | norm 139.9881 | dt 0.165
type train | step 8260 | loss 2.3058 1.4292 3.7147 5.3944 | lr 7.0e-04 | norm 130.2148 | dt 0.192
type train | step 8270 | loss 2.3024 1.4040 3.7133 5.2733 | lr 7.0e-04 | norm 120.5480 | dt 0.215
type train | step 8280 | loss 2.3467 1.4043 3.6875 5.3020 | lr 7.0e-04 | norm 135.2734 | dt 0.202
type train | step 8290 | loss 2.2893 1.3818 3.7065 5.2392 | lr 7.0e-04 | norm 130.9854 | dt 0.188
type train | step 8300 | loss 2.3057 1.4080 3.7741 5.3809 | lr 7.0e-04 | norm 158.3917 | dt 0.186
type train | step 8310 | loss 2.3589 1.4548 3.8130 5.4690 | lr 7.0e-04 | norm 131.0351 | dt 0.167
type train | step 8320 | loss 2.2902 1.4084 3.7007 5.3065 | lr 7.0e-04 | norm 120.1394 | dt 0.179
type train | step 8330 | loss 2.3561 1.4131 3.7166 5.2502 | lr 7.0e-04 | norm 124.9408 | dt 0.169
type train | step 8340 | loss 2.4208 1.4406 3.7461 5.3289 | lr 7.0e-04 | norm 136.8558 | dt 0.182
type train | step 8350 | loss 2.3402 1.4106 3.7750 5.2543 | lr 7.0e-04 | norm 127.8138 | dt 0.223
type train | step 8360 | loss 2.3928 1.4163 3.7612 5.3590 | lr 7.0e-04 | norm 129.9136 | dt 0.168
type train | step 8370 | loss 2.3162 1.4024 3.7132 5.2790 | lr 6.9e-04 | norm 110.0658 | dt 0.203
type train | step 8380 | loss 2.3289 1.4316 3.7290 5.3342 | lr 6.9e-04 | norm 121.2876 | dt 0.216
type train | step 8390 | loss 2.3008 1.3862 3.7019 5.1582 | lr 6.9e-04 | norm 119.2816 | dt 0.205
type train | step 8400 | loss 2.3454 1.4518 3.7414 5.3473 | lr 6.9e-04 | norm 151.8961 | dt 0.192
type train | step 8410 | loss 2.3290 1.4247 3.7429 5.3202 | lr 6.9e-04 | norm 125.5120 | dt 0.167
type train | step 8420 | loss 2.3378 1.4221 3.7349 5.2995 | lr 6.9e-04 | norm 133.7731 | dt 0.176
type train | step 8430 | loss 2.3384 1.4484 3.7510 5.3836 | lr 6.9e-04 | norm 142.8750 | dt 0.209
type train | step 8440 | loss 2.2930 1.4266 3.6930 5.3489 | lr 6.9e-04 | norm 127.9005 | dt 0.167
type train | step 8450 | loss 2.3310 1.4230 3.6996 5.2900 | lr 6.9e-04 | norm 120.6628 | dt 0.195
type train | step 8460 | loss 2.3616 1.4303 3.7511 5.4429 | lr 6.9e-04 | norm 120.6071 | dt 0.226
type train | step 8470 | loss 2.2686 1.3955 3.6897 5.3089 | lr 6.9e-04 | norm 136.5851 | dt 0.199
type train | step 8480 | loss 2.3352 1.4319 3.7445 5.3526 | lr 6.9e-04 | norm 119.5707 | dt 0.169
type train | step 8490 | loss 2.3756 1.4230 3.7374 5.3889 | lr 6.9e-04 | norm 106.1217 | dt 0.196
type train | step 8500 | loss 2.3097 1.4120 3.6897 5.3877 | lr 6.9e-04 | norm 132.5209 | dt 0.197
type train | step 8510 | loss 2.3818 1.3824 3.7490 5.2586 | lr 6.8e-04 | norm 121.0375 | dt 0.204
type train | step 8520 | loss 2.3164 1.4151 3.7237 5.3866 | lr 6.8e-04 | norm 133.5401 | dt 0.167
type train | step 8530 | loss 2.2611 1.3720 3.6609 5.3168 | lr 6.8e-04 | norm 129.8177 | dt 0.167
type train | step 8540 | loss 2.3330 1.3992 3.7225 5.3187 | lr 6.8e-04 | norm 112.9234 | dt 0.205
type train | step 8550 | loss 2.3391 1.4106 3.7146 5.3399 | lr 6.8e-04 | norm 120.5781 | dt 0.214
type train | step 8560 | loss 2.3167 1.4184 3.6799 5.2879 | lr 6.8e-04 | norm 112.5490 | dt 0.189
type train | step 8570 | loss 2.2998 1.4136 3.7139 5.2719 | lr 6.8e-04 | norm 138.9869 | dt 0.177
type train | step 8580 | loss 2.4353 1.4870 3.7780 5.4560 | lr 6.8e-04 | norm 115.4595 | dt 0.210
type train | step 8590 | loss 2.3233 1.4125 3.6406 5.2017 | lr 6.8e-04 | norm 111.3242 | dt 0.187
type train | step 8600 | loss 2.3060 1.3982 3.7110 5.2847 | lr 6.8e-04 | norm 152.3571 | dt 0.201
type train | step 8610 | loss 2.3327 1.4066 3.6813 5.2560 | lr 6.8e-04 | norm 146.9800 | dt 0.202
type train | step 8620 | loss 2.3598 1.4443 3.7095 5.4053 | lr 6.8e-04 | norm 125.4657 | dt 0.165
type train | step 8630 | loss 2.3373 1.4090 3.7415 5.2309 | lr 6.8e-04 | norm 138.3161 | dt 0.175
type train | step 8640 | loss 2.2942 1.3873 3.6650 5.2216 | lr 6.8e-04 | norm 123.1634 | dt 0.209
type train | step 8650 | loss 2.2896 1.4207 3.6815 5.2056 | lr 6.8e-04 | norm 139.7160 | dt 0.169
type train | step 8660 | loss 2.3370 1.4476 3.7189 5.3191 | lr 6.7e-04 | norm 136.7719 | dt 0.207
type train | step 8670 | loss 2.2905 1.4032 3.7358 5.3384 | lr 6.7e-04 | norm 130.9151 | dt 0.180
type train | step 8680 | loss 2.3830 1.4978 3.7918 5.5165 | lr 6.7e-04 | norm 135.5755 | dt 0.188
type train | step 8690 | loss 2.2938 1.3987 3.7579 5.2772 | lr 6.7e-04 | norm 126.1370 | dt 0.190
type train | step 8700 | loss 2.3104 1.4188 3.6988 5.3440 | lr 6.7e-04 | norm 129.2446 | dt 0.189
type train | step 8710 | loss 2.2479 1.3959 3.6704 5.1974 | lr 6.7e-04 | norm 123.2742 | dt 0.194
type train | step 8720 | loss 2.2861 1.4294 3.7283 5.3169 | lr 6.7e-04 | norm 120.3196 | dt 0.168
type train | step 8730 | loss 2.2876 1.3948 3.7635 5.3542 | lr 6.7e-04 | norm 114.9653 | dt 0.187
type train | step 8740 | loss 2.3711 1.4385 3.7579 5.3780 | lr 6.7e-04 | norm 118.4934 | dt 0.196
type train | step 8750 | loss 2.3168 1.4032 3.7356 5.3907 | lr 6.7e-04 | norm 134.5816 | dt 0.170
type train | step 8760 | loss 2.2713 1.3912 3.7359 5.2703 | lr 6.7e-04 | norm 145.4447 | dt 0.203
type train | step 8770 | loss 2.2280 1.3812 3.6697 5.1888 | lr 6.7e-04 | norm 133.9035 | dt 0.195
type train | step 8780 | loss 2.2803 1.4481 3.7644 5.3549 | lr 6.7e-04 | norm 129.7144 | dt 0.181
type train | step 8790 | loss 2.2764 1.4033 3.7358 5.2952 | lr 6.7e-04 | norm 128.6560 | dt 0.207
type train | step 8800 | loss 2.2288 1.4200 3.7261 5.2627 | lr 6.6e-04 | norm 133.2794 | dt 0.211
type train | step 8810 | loss 2.2723 1.4265 3.7087 5.2619 | lr 6.6e-04 | norm 119.6394 | dt 0.208
type train | step 8820 | loss 2.3952 1.4474 3.7733 5.4209 | lr 6.6e-04 | norm 130.5872 | dt 0.168
type train | step 8830 | loss 2.2084 1.3995 3.7175 5.2377 | lr 6.6e-04 | norm 119.7364 | dt 0.196
type train | step 8840 | loss 2.2694 1.4348 3.7627 5.3004 | lr 6.6e-04 | norm 122.5658 | dt 0.182
type train | step 8850 | loss 2.2723 1.3966 3.7390 5.3522 | lr 6.6e-04 | norm 124.2226 | dt 0.196
type train | step 8860 | loss 2.2254 1.3939 3.7417 5.3524 | lr 6.6e-04 | norm 126.5434 | dt 0.200
type train | step 8870 | loss 2.2126 1.3973 3.7251 5.3502 | lr 6.6e-04 | norm 141.9034 | dt 0.191
type train | step 8880 | loss 2.1797 1.3921 3.7080 5.2619 | lr 6.6e-04 | norm 126.8211 | dt 0.205
type train | step 8890 | loss 2.2019 1.3977 3.7137 5.2852 | lr 6.6e-04 | norm 125.2195 | dt 0.200
type train | step 8900 | loss 2.2483 1.3813 3.7049 5.2013 | lr 6.6e-04 | norm 122.9637 | dt 0.174
type train | step 8910 | loss 2.2144 1.3835 3.7546 5.3220 | lr 6.6e-04 | norm 124.0753 | dt 0.204
type train | step 8920 | loss 2.2595 1.4268 3.7955 5.4638 | lr 6.6e-04 | norm 159.3614 | dt 0.196
type train | step 8930 | loss 2.2105 1.3861 3.6829 5.3454 | lr 6.6e-04 | norm 141.6184 | dt 0.211
type train | step 8940 | loss 2.2571 1.3658 3.7085 5.2414 | lr 6.5e-04 | norm 113.4905 | dt 0.195
type train | step 8950 | loss 2.2445 1.3876 3.7272 5.3390 | lr 6.5e-04 | norm 126.1614 | dt 0.195
type train | step 8960 | loss 2.2399 1.3810 3.7744 5.2653 | lr 6.5e-04 | norm 118.5055 | dt 0.177
type train | step 8970 | loss 2.2512 1.3912 3.7447 5.3580 | lr 6.5e-04 | norm 109.8519 | dt 0.181
type train | step 8980 | loss 2.2614 1.3954 3.7305 5.3108 | lr 6.5e-04 | norm 132.1500 | dt 0.223
type train | step 8990 | loss 2.2646 1.4069 3.7340 5.3068 | lr 6.5e-04 | norm 118.5797 | dt 0.198
type train | step 9000 | loss 2.2882 1.3668 3.6946 5.2223 | lr 6.5e-04 | norm 133.7595 | dt 0.199
type train | step 9010 | loss 2.2884 1.4117 3.7311 5.3747 | lr 6.5e-04 | norm 143.7473 | dt 0.210
type train | step 9020 | loss 2.2940 1.4136 3.6997 5.2964 | lr 6.5e-04 | norm 120.4188 | dt 0.202
type train | step 9030 | loss 2.3519 1.3981 3.7224 5.3172 | lr 6.5e-04 | norm 105.8931 | dt 0.179
type train | step 9040 | loss 2.3174 1.4319 3.7223 5.3241 | lr 6.5e-04 | norm 123.3300 | dt 0.200
type train | step 9050 | loss 2.2799 1.4124 3.6774 5.3810 | lr 6.5e-04 | norm 124.6177 | dt 0.207
type train | step 9060 | loss 2.3015 1.3944 3.6792 5.3345 | lr 6.5e-04 | norm 137.4621 | dt 0.170
type train | step 9070 | loss 2.3272 1.4378 3.7210 5.4209 | lr 6.5e-04 | norm 118.6783 | dt 0.198
type train | step 9080 | loss 2.2474 1.3912 3.7179 5.2635 | lr 6.4e-04 | norm 113.6990 | dt 0.193
type train | step 9090 | loss 2.3246 1.4051 3.7796 5.3741 | lr 6.4e-04 | norm 153.5902 | dt 0.179
type train | step 9100 | loss 2.3349 1.4354 3.7454 5.4051 | lr 6.4e-04 | norm 142.2731 | dt 0.174
type train | step 9110 | loss 2.3101 1.4387 3.7239 5.4145 | lr 6.4e-04 | norm 153.5857 | dt 0.168
type train | step 9120 | loss 2.3269 1.3962 3.7234 5.2600 | lr 6.4e-04 | norm 144.7325 | dt 0.202
type train | step 9130 | loss 2.3030 1.4207 3.7074 5.3753 | lr 6.4e-04 | norm 139.0347 | dt 0.196
type train | step 9140 | loss 2.3136 1.3808 3.6875 5.2491 | lr 6.4e-04 | norm 114.9825 | dt 0.176
type train | step 9150 | loss 2.3499 1.4228 3.7146 5.3797 | lr 6.4e-04 | norm 151.3764 | dt 0.213
type train | step 9160 | loss 2.3258 1.4152 3.7164 5.3306 | lr 6.4e-04 | norm 125.2167 | dt 0.174
type train | step 9170 | loss 2.3607 1.3932 3.7023 5.3554 | lr 6.4e-04 | norm 122.6294 | dt 0.189
type train | step 9180 | loss 2.3964 1.3582 3.6685 5.2161 | lr 6.4e-04 | norm 137.4332 | dt 0.208
type train | step 9190 | loss 2.5930 1.4429 3.7542 5.4981 | lr 6.4e-04 | norm 120.5395 | dt 0.193
type train | step 9200 | loss 2.4349 1.3766 3.6564 5.2122 | lr 6.4e-04 | norm 121.8451 | dt 0.196
type train | step 9210 | loss 2.4590 1.3531 3.6844 5.2742 | lr 6.4e-04 | norm 129.2184 | dt 0.212
type train | step 9220 | loss 2.4453 1.3768 3.6978 5.3076 | lr 6.3e-04 | norm 121.0015 | dt 0.199
type train | step 9230 | loss 2.4763 1.4224 3.7688 5.4869 | lr 6.3e-04 | norm 129.4841 | dt 0.209
type train | step 9240 | loss 2.4730 1.3631 3.7134 5.2852 | lr 6.3e-04 | norm 139.0546 | dt 0.188
type train | step 9250 | loss 2.4723 1.3705 3.6703 5.2639 | lr 6.3e-04 | norm 112.0229 | dt 0.201
type train | step 9260 | loss 2.4584 1.3854 3.7077 5.2171 | lr 6.3e-04 | norm 128.5119 | dt 0.173
type train | step 9270 | loss 2.4821 1.4147 3.7444 5.3645 | lr 6.3e-04 | norm 109.9058 | dt 0.204
type train | step 9280 | loss 2.4512 1.3856 3.7212 5.3510 | lr 6.3e-04 | norm 119.4912 | dt 0.197
type train | step 9290 | loss 2.5525 1.4958 3.8027 5.5333 | lr 6.3e-04 | norm 140.9982 | dt 0.195
type train | step 9300 | loss 2.4644 1.3972 3.7248 5.3450 | lr 6.3e-04 | norm 121.8525 | dt 0.169
type train | step 9310 | loss 2.6088 1.4021 3.6607 5.3278 | lr 6.3e-04 | norm 120.6109 | dt 0.205
type train | step 9320 | loss 2.5992 1.3910 3.6863 5.1799 | lr 6.3e-04 | norm 110.7223 | dt 0.185
type train | step 9330 | loss 2.6206 1.4131 3.7074 5.3675 | lr 6.3e-04 | norm 131.4729 | dt 0.210
type train | step 9340 | loss 2.5935 1.3760 3.7628 5.4098 | lr 6.3e-04 | norm 127.6549 | dt 0.196
type train | step 9350 | loss 2.6789 1.4409 3.7966 5.4776 | lr 6.2e-04 | norm 104.3180 | dt 0.213
type train | step 9360 | loss 2.5879 1.3905 3.7169 5.3596 | lr 6.2e-04 | norm 118.7834 | dt 0.183
type train | step 9370 | loss 2.5693 1.3731 3.7168 5.3014 | lr 6.2e-04 | norm 129.5281 | dt 0.170
type train | step 9380 | loss 2.5279 1.3711 3.6481 5.2592 | lr 6.2e-04 | norm 131.4746 | dt 0.168
type train | step 9390 | loss 2.5773 1.4131 3.7538 5.3866 | lr 6.2e-04 | norm 130.0338 | dt 0.178
type train | step 9400 | loss 2.5329 1.3654 3.7261 5.3427 | lr 6.2e-04 | norm 133.8892 | dt 0.181
type train | step 9410 | loss 2.5374 1.3809 3.7249 5.3140 | lr 6.2e-04 | norm 115.1804 | dt 0.171
type train | step 9420 | loss 2.5694 1.3964 3.6841 5.3176 | lr 6.2e-04 | norm 115.9909 | dt 0.216
type train | step 9430 | loss 2.5532 1.4104 3.7415 5.4755 | lr 6.2e-04 | norm 106.5041 | dt 0.204
type train | step 9440 | loss 2.4742 1.3925 3.6858 5.2850 | lr 6.2e-04 | norm 133.2660 | dt 0.169
type train | step 9450 | loss 2.4557 1.4413 3.7317 5.3366 | lr 6.2e-04 | norm 113.7511 | dt 0.169
type train | step 9460 | loss 2.4622 1.4306 3.7659 5.4423 | lr 6.2e-04 | norm 135.1754 | dt 0.175
type train | step 9470 | loss 2.4008 1.3969 3.7288 5.4319 | lr 6.2e-04 | norm 132.5995 | dt 0.173
type train | step 9480 | loss 2.4073 1.4248 3.6920 5.4283 | lr 6.2e-04 | norm 139.4428 | dt 0.207
type train | step 9490 | loss 2.3532 1.4111 3.7141 5.3371 | lr 6.1e-04 | norm 119.8273 | dt 0.217
type train | step 9500 | loss 2.4127 1.4137 3.6976 5.3674 | lr 6.1e-04 | norm 136.3707 | dt 0.200
type train | step 9510 | loss 2.3978 1.4117 3.6966 5.3475 | lr 6.1e-04 | norm 141.9563 | dt 0.199
type train | step 9520 | loss 2.4019 1.4370 3.7503 5.4244 | lr 6.1e-04 | norm 126.4861 | dt 0.168
type train | step 9530 | loss 2.4692 1.4559 3.7740 5.5468 | lr 6.1e-04 | norm 112.8938 | dt 0.164
type train | step 9540 | loss 2.3917 1.4067 3.6920 5.4280 | lr 6.1e-04 | norm 134.9176 | dt 0.181
type train | step 9550 | loss 2.4087 1.3684 3.7296 5.3620 | lr 6.1e-04 | norm 126.0919 | dt 0.174
type train | step 9560 | loss 2.4642 1.4221 3.7145 5.3890 | lr 6.1e-04 | norm 105.7718 | dt 0.204
type train | step 9570 | loss 2.4556 1.4031 3.7583 5.3473 | lr 6.1e-04 | norm 116.9490 | dt 0.185
type train | step 9580 | loss 2.4665 1.3815 3.7765 5.4133 | lr 6.1e-04 | norm 108.2978 | dt 0.208
type train | step 9590 | loss 2.4251 1.3709 3.7278 5.3974 | lr 6.1e-04 | norm 122.6208 | dt 0.187
type train | step 9600 | loss 2.4549 1.3928 3.7209 5.4185 | lr 6.1e-04 | norm 102.0255 | dt 0.200
type train | step 9610 | loss 2.4951 1.3460 3.6929 5.2834 | lr 6.1e-04 | norm 113.0701 | dt 0.208
type train | step 9620 | loss 2.4063 1.3809 3.6718 5.4643 | lr 6.1e-04 | norm 113.7304 | dt 0.172
type train | step 9630 | loss 2.4514 1.3877 3.6976 5.3957 | lr 6.0e-04 | norm 104.5337 | dt 0.180
type train | step 9640 | loss 2.3857 1.3681 3.7627 5.4049 | lr 6.0e-04 | norm 122.3016 | dt 0.175
type train | step 9650 | loss 2.3976 1.3952 3.7648 5.4597 | lr 6.0e-04 | norm 119.4989 | dt 0.170
type train | step 9660 | loss 2.3894 1.3959 3.6606 5.4443 | lr 6.0e-04 | norm 121.2914 | dt 0.201
type train | step 9670 | loss 2.3208 1.3653 3.7191 5.4216 | lr 6.0e-04 | norm 112.3844 | dt 0.203
type train | step 9680 | loss 2.3823 1.4128 3.7528 5.5566 | lr 6.0e-04 | norm 128.4127 | dt 0.193
type train | step 9690 | loss 2.3090 1.3944 3.6684 5.4242 | lr 6.0e-04 | norm 128.4270 | dt 0.197
type train | step 9700 | loss 2.3008 1.4069 3.7312 5.4876 | lr 6.0e-04 | norm 123.6090 | dt 0.177
type train | step 9710 | loss 2.3985 1.4351 3.6853 5.5147 | lr 6.0e-04 | norm 114.4957 | dt 0.189
type train | step 9720 | loss 2.3498 1.4234 3.6794 5.5206 | lr 6.0e-04 | norm 124.4947 | dt 0.198
type train | step 9730 | loss 2.3555 1.3921 3.7033 5.3610 | lr 6.0e-04 | norm 96.8894 | dt 0.218
type train | step 9740 | loss 2.3136 1.4144 3.6892 5.5135 | lr 6.0e-04 | norm 121.0217 | dt 0.169
type train | step 9750 | loss 2.3655 1.3804 3.6611 5.3756 | lr 6.0e-04 | norm 102.5283 | dt 0.204
type train | step 9760 | loss 2.4202 1.4116 3.6867 5.4255 | lr 6.0e-04 | norm 121.7361 | dt 0.181
type train | step 9770 | loss 2.4240 1.4039 3.6790 5.4577 | lr 5.9e-04 | norm 130.3942 | dt 0.167
type train | step 9780 | loss 2.4768 1.4038 3.6662 5.4797 | lr 5.9e-04 | norm 110.6028 | dt 0.193
type train | step 9790 | loss 2.4430 1.3648 3.6839 5.3534 | lr 5.9e-04 | norm 107.6791 | dt 0.210
type train | step 9800 | loss 2.5724 1.4183 3.7472 5.5817 | lr 5.9e-04 | norm 97.9751 | dt 0.202
type train | step 9810 | loss 2.4998 1.3685 3.6759 5.3000 | lr 5.9e-04 | norm 105.0624 | dt 0.182
type train | step 9820 | loss 2.4923 1.3451 3.6772 5.3873 | lr 5.9e-04 | norm 105.9404 | dt 0.203
type train | step 9830 | loss 2.5011 1.3377 3.6887 5.4131 | lr 5.9e-04 | norm 112.8775 | dt 0.202
type train | step 9840 | loss 2.6213 1.3972 3.6959 5.6131 | lr 5.9e-04 | norm 121.8233 | dt 0.176
type train | step 9850 | loss 2.5612 1.3422 3.7091 5.3635 | lr 5.9e-04 | norm 104.2351 | dt 0.198
type train | step 9860 | loss 2.6201 1.3318 3.6584 5.3713 | lr 5.9e-04 | norm 121.1481 | dt 0.169
type train | step 9870 | loss 2.6875 1.3450 3.6707 5.3244 | lr 5.9e-04 | norm 119.0735 | dt 0.175
type train | step 9880 | loss 2.6863 1.3744 3.7076 5.4368 | lr 5.9e-04 | norm 114.8976 | dt 0.210
type train | step 9890 | loss 2.6356 1.3544 3.7470 5.4709 | lr 5.9e-04 | norm 122.6031 | dt 0.195
type train | step 9900 | loss 2.7042 1.4369 3.7697 5.5682 | lr 5.8e-04 | norm 108.3845 | dt 0.211
type train | step 9910 | loss 2.6514 1.3517 3.7234 5.4261 | lr 5.8e-04 | norm 109.0796 | dt 0.199
type train | step 9920 | loss 2.6766 1.3573 3.6975 5.4577 | lr 5.8e-04 | norm 128.9256 | dt 0.203
type train | step 9930 | loss 2.6343 1.3380 3.6557 5.2906 | lr 5.8e-04 | norm 125.1555 | dt 0.199
type train | step 9940 | loss 2.6669 1.3961 3.7235 5.4695 | lr 5.8e-04 | norm 147.5423 | dt 0.175
type train | step 9950 | loss 2.6278 1.3598 3.7424 5.4999 | lr 5.8e-04 | norm 122.7294 | dt 0.199
type train | step 9960 | loss 2.7071 1.4246 3.7463 5.5565 | lr 5.8e-04 | norm 119.3373 | dt 0.176
type train | step 9970 | loss 2.6909 1.3896 3.7090 5.4396 | lr 5.8e-04 | norm 96.7992 | dt 0.200
type train | step 9980 | loss 2.6092 1.3781 3.7277 5.3900 | lr 5.8e-04 | norm 122.4657 | dt 0.170
type train | step 9990 | loss 2.5335 1.3737 3.6870 5.3316 | lr 5.8e-04 | norm 107.1246 | dt 0.200
type train | step 10000 | loss 2.6204 1.4030 3.7577 5.4970 | lr 5.8e-04 | norm 121.8362 | dt 0.187
type train | step 10010 | loss 2.6776 1.3654 3.7060 5.3953 | lr 5.8e-04 | norm 113.7366 | dt 0.176
type train | step 10020 | loss 2.6089 1.3719 3.7324 5.4275 | lr 5.8e-04 | norm 122.9119 | dt 0.178
type train | step 10030 | loss 2.6500 1.3749 3.7169 5.4036 | lr 5.8e-04 | norm 99.4381 | dt 0.177
type train | step 10040 | loss 2.6730 1.3920 3.7565 5.5512 | lr 5.7e-04 | norm 136.6888 | dt 0.213
type train | step 10050 | loss 2.6023 1.3560 3.6865 5.3532 | lr 5.7e-04 | norm 123.0590 | dt 0.210
type train | step 10060 | loss 2.5947 1.4108 3.7476 5.4339 | lr 5.7e-04 | norm 113.9341 | dt 0.202
type train | step 10070 | loss 2.6484 1.3817 3.7531 5.4251 | lr 5.7e-04 | norm 118.4678 | dt 0.203
type train | step 10080 | loss 2.6854 1.3843 3.6919 5.4431 | lr 5.7e-04 | norm 106.8758 | dt 0.191
type train | step 10090 | loss 2.5466 1.4005 3.6962 5.4361 | lr 5.7e-04 | norm 105.8126 | dt 0.176
type train | step 10100 | loss 2.5126 1.3838 3.6997 5.3655 | lr 5.7e-04 | norm 118.5026 | dt 0.201
type train | step 10110 | loss 2.5587 1.3858 3.6750 5.3675 | lr 5.7e-04 | norm 111.4115 | dt 0.169
type train | step 10120 | loss 2.5213 1.3742 3.6654 5.3256 | lr 5.7e-04 | norm 108.6932 | dt 0.167
type train | step 10130 | loss 2.5316 1.3945 3.7564 5.4128 | lr 5.7e-04 | norm 105.8745 | dt 0.179
type train | step 10140 | loss 2.5943 1.4355 3.7910 5.5631 | lr 5.7e-04 | norm 112.3506 | dt 0.194
type train | step 10150 | loss 2.4482 1.4028 3.6697 5.4278 | lr 5.7e-04 | norm 138.7940 | dt 0.164
type train | step 10160 | loss 2.5133 1.3766 3.7025 5.3366 | lr 5.7e-04 | norm 122.8993 | dt 0.182
type train | step 10170 | loss 2.4925 1.4447 3.7016 5.3975 | lr 5.7e-04 | norm 103.5454 | dt 0.205
type train | step 10180 | loss 2.5049 1.4065 3.7452 5.3559 | lr 5.6e-04 | norm 105.8029 | dt 0.171
type train | step 10190 | loss 2.4780 1.4116 3.7137 5.4107 | lr 5.6e-04 | norm 118.6293 | dt 0.210
type train | step 10200 | loss 2.4375 1.3850 3.7189 5.4127 | lr 5.6e-04 | norm 117.6846 | dt 0.167
type train | step 10210 | loss 2.4680 1.4173 3.7201 5.4118 | lr 5.6e-04 | norm 110.8961 | dt 0.167
type train | step 10220 | loss 2.5053 1.3988 3.6784 5.3004 | lr 5.6e-04 | norm 120.7684 | dt 0.215
type train | step 10230 | loss 2.5683 1.4194 3.6992 5.4596 | lr 5.6e-04 | norm 115.4116 | dt 0.173
type train | step 10240 | loss 2.6278 1.4132 3.7294 5.3563 | lr 5.6e-04 | norm 96.1597 | dt 0.168
type train | step 10250 | loss 2.6525 1.3678 3.6901 5.3802 | lr 5.6e-04 | norm 109.3322 | dt 0.198
type train | step 10260 | loss 2.6706 1.3895 3.7444 5.4529 | lr 5.6e-04 | norm 120.7293 | dt 0.194
type train | step 10270 | loss 2.7149 1.3748 3.6806 5.4062 | lr 5.6e-04 | norm 128.8040 | dt 0.206
type train | step 10280 | loss 2.8180 1.3419 3.7013 5.3752 | lr 5.6e-04 | norm 119.6751 | dt 0.187
type train | step 10290 | loss 2.9421 1.3687 3.7136 5.4804 | lr 5.6e-04 | norm 114.9168 | dt 0.194
type train | step 10300 | loss 2.8846 1.3190 3.6889 5.3430 | lr 5.6e-04 | norm 110.0402 | dt 0.168
type train | step 10310 | loss 2.8920 1.3558 3.7250 5.4117 | lr 5.5e-04 | norm 110.9104 | dt 0.206
type train | step 10320 | loss 2.9754 1.3906 3.7498 5.4962 | lr 5.5e-04 | norm 118.9179 | dt 0.209
type train | step 10330 | loss 3.0137 1.3544 3.6972 5.4204 | lr 5.5e-04 | norm 122.8068 | dt 0.168
type train | step 10340 | loss 2.9750 1.3339 3.7245 5.3138 | lr 5.5e-04 | norm 105.8330 | dt 0.213
type train | step 10350 | loss 2.9856 1.3547 3.6989 5.4441 | lr 5.5e-04 | norm 146.8402 | dt 0.210
type train | step 10360 | loss 2.8878 1.3159 3.6767 5.3241 | lr 5.5e-04 | norm 129.5154 | dt 0.174
type train | step 10370 | loss 2.9538 1.3312 3.7075 5.3525 | lr 5.5e-04 | norm 126.9439 | dt 0.216
type train | step 10380 | loss 2.9109 1.3481 3.6986 5.3895 | lr 5.5e-04 | norm 113.6476 | dt 0.208
type train | step 10390 | loss 2.8296 1.3387 3.6804 5.4000 | lr 5.5e-04 | norm 112.8938 | dt 0.225
type train | step 10400 | loss 2.8328 1.3221 3.6839 5.2934 | lr 5.5e-04 | norm 134.3564 | dt 0.195
type train | step 10410 | loss 2.9471 1.3993 3.7475 5.5443 | lr 5.5e-04 | norm 114.0509 | dt 0.218
type train | step 10420 | loss 2.7781 1.3328 3.6746 5.2447 | lr 5.5e-04 | norm 118.0679 | dt 0.203
type train | step 10430 | loss 2.7530 1.3317 3.6753 5.3143 | lr 5.5e-04 | norm 114.1314 | dt 0.169
type train | step 10440 | loss 2.8098 1.3339 3.6892 5.3536 | lr 5.5e-04 | norm 115.3622 | dt 0.193
type train | step 10450 | loss 2.8698 1.3889 3.6808 5.4840 | lr 5.4e-04 | norm 121.7654 | dt 0.219
type train | step 10460 | loss 2.8447 1.3425 3.7074 5.2906 | lr 5.4e-04 | norm 127.1702 | dt 0.196
type train | step 10470 | loss 2.7627 1.3185 3.6556 5.2600 | lr 5.4e-04 | norm 95.8095 | dt 0.199
type train | step 10480 | loss 2.7572 1.3299 3.6662 5.2266 | lr 5.4e-04 | norm 116.6047 | dt 0.206
type train | step 10490 | loss 2.8715 1.3970 3.7266 5.3436 | lr 5.4e-04 | norm 103.1802 | dt 0.211
type train | step 10500 | loss 2.7724 1.3632 3.7131 5.3902 | lr 5.4e-04 | norm 131.0288 | dt 0.197
type train | step 10510 | loss 2.8786 1.4483 3.7606 5.5174 | lr 5.4e-04 | norm 135.7642 | dt 0.216
type train | step 10520 | loss 2.8117 1.3857 3.7094 5.3604 | lr 5.4e-04 | norm 126.2687 | dt 0.206
type train | step 10530 | loss 2.6959 1.3901 3.6640 5.3677 | lr 5.4e-04 | norm 125.8454 | dt 0.179
type train | step 10540 | loss 2.6495 1.3559 3.6201 5.2248 | lr 5.4e-04 | norm 123.8255 | dt 0.197
type train | step 10550 | loss 2.7428 1.4065 3.6808 5.3703 | lr 5.4e-04 | norm 107.1183 | dt 0.209
type train | step 10560 | loss 2.7006 1.3855 3.7424 5.4139 | lr 5.4e-04 | norm 121.0406 | dt 0.184
type train | step 10570 | loss 2.8116 1.4294 3.7115 5.4556 | lr 5.4e-04 | norm 113.5078 | dt 0.194
type train | step 10580 | loss 2.7557 1.3883 3.7160 5.3955 | lr 5.3e-04 | norm 120.2725 | dt 0.182
type train | step 10590 | loss 2.6722 1.3832 3.7008 5.3221 | lr 5.3e-04 | norm 123.9591 | dt 0.175
type train | step 10600 | loss 2.5779 1.3779 3.6395 5.2579 | lr 5.3e-04 | norm 132.7943 | dt 0.183
type train | step 10610 | loss 2.6445 1.4073 3.7271 5.3963 | lr 5.3e-04 | norm 123.3177 | dt 0.199
type train | step 10620 | loss 2.5705 1.3655 3.7012 5.3248 | lr 5.3e-04 | norm 116.5128 | dt 0.199
type train | step 10630 | loss 2.5418 1.3926 3.7117 5.3727 | lr 5.3e-04 | norm 122.8597 | dt 0.203
type train | step 10640 | loss 2.6439 1.3902 3.6924 5.3473 | lr 5.3e-04 | norm 117.3774 | dt 0.199
type train | step 10650 | loss 2.6762 1.3966 3.7504 5.4618 | lr 5.3e-04 | norm 116.1914 | dt 0.170
type train | step 10660 | loss 2.5905 1.3511 3.6693 5.3093 | lr 5.3e-04 | norm 137.8557 | dt 0.208
type train | step 10670 | loss 2.5802 1.4169 3.7096 5.3670 | lr 5.3e-04 | norm 117.8384 | dt 0.184
type train | step 10680 | loss 2.5793 1.3860 3.7234 5.3659 | lr 5.3e-04 | norm 105.7440 | dt 0.190
type train | step 10690 | loss 2.6556 1.3849 3.6937 5.4273 | lr 5.3e-04 | norm 145.2345 | dt 0.211
type train | step 10700 | loss 2.5754 1.3893 3.7028 5.3972 | lr 5.3e-04 | norm 123.9228 | dt 0.196
type train | step 10710 | loss 2.5330 1.3707 3.6837 5.2996 | lr 5.3e-04 | norm 116.2001 | dt 0.173
type train | step 10720 | loss 2.4884 1.3800 3.6825 5.3066 | lr 5.2e-04 | norm 100.8420 | dt 0.191
type train | step 10730 | loss 2.5268 1.3493 3.6848 5.2662 | lr 5.2e-04 | norm 119.2253 | dt 0.199
type train | step 10740 | loss 2.5125 1.3750 3.7363 5.3635 | lr 5.2e-04 | norm 107.6938 | dt 0.208
type train | step 10750 | loss 2.5536 1.4042 3.7849 5.5189 | lr 5.2e-04 | norm 135.4303 | dt 0.205
type train | step 10760 | loss 2.4809 1.3573 3.6718 5.3418 | lr 5.2e-04 | norm 125.8008 | dt 0.169
type train | step 10770 | loss 2.5118 1.3469 3.6533 5.2672 | lr 5.2e-04 | norm 119.2250 | dt 0.200
type train | step 10780 | loss 2.5445 1.3803 3.7262 5.3238 | lr 5.2e-04 | norm 111.1903 | dt 0.200
type train | step 10790 | loss 2.5420 1.3614 3.7375 5.2914 | lr 5.2e-04 | norm 118.4604 | dt 0.201
type train | step 10800 | loss 2.4956 1.3689 3.7388 5.3536 | lr 5.2e-04 | norm 106.8730 | dt 0.167
type train | step 10810 | loss 2.5346 1.3547 3.6891 5.3230 | lr 5.2e-04 | norm 104.4165 | dt 0.181
type train | step 10820 | loss 2.5498 1.3765 3.6992 5.3427 | lr 5.2e-04 | norm 116.7144 | dt 0.182
type train | step 10830 | loss 2.5430 1.3161 3.6424 5.2042 | lr 5.2e-04 | norm 118.8641 | dt 0.200
type train | step 10840 | loss 2.5303 1.3694 3.6955 5.3494 | lr 5.2e-04 | norm 124.9506 | dt 0.203
type train | step 10850 | loss 2.5107 1.3646 3.6889 5.3055 | lr 5.2e-04 | norm 121.1466 | dt 0.177
type train | step 10860 | loss 2.5261 1.3475 3.6814 5.2891 | lr 5.1e-04 | norm 127.0960 | dt 0.172
type train | step 10870 | loss 2.5684 1.3708 3.7484 5.3396 | lr 5.1e-04 | norm 115.1252 | dt 0.208
type train | step 10880 | loss 2.5239 1.3644 3.6657 5.3142 | lr 5.1e-04 | norm 130.8044 | dt 0.218
type train | step 10890 | loss 2.5288 1.3424 3.6922 5.3099 | lr 5.1e-04 | norm 133.3098 | dt 0.206
type train | step 10900 | loss 2.5168 1.3920 3.6817 5.4435 | lr 5.1e-04 | norm 135.7978 | dt 0.205
type train | step 10910 | loss 2.4798 1.3582 3.7099 5.2876 | lr 5.1e-04 | norm 128.2165 | dt 0.201
type train | step 10920 | loss 2.4339 1.3869 3.7683 5.3485 | lr 5.1e-04 | norm 114.6202 | dt 0.181
type train | step 10930 | loss 2.5042 1.4061 3.7725 5.3720 | lr 5.1e-04 | norm 105.2462 | dt 0.201
type train | step 10940 | loss 2.4192 1.3948 3.6890 5.3573 | lr 5.1e-04 | norm 117.9218 | dt 0.188
type train | step 10950 | loss 2.4109 1.3696 3.7326 5.2425 | lr 5.1e-04 | norm 112.9665 | dt 0.194
type train | step 10960 | loss 2.4156 1.3817 3.6926 5.3454 | lr 5.1e-04 | norm 126.9982 | dt 0.166
type train | step 10970 | loss 2.3623 1.3288 3.6971 5.2203 | lr 5.1e-04 | norm 113.1617 | dt 0.204
type train | step 10980 | loss 2.4383 1.3745 3.7033 5.2670 | lr 5.1e-04 | norm 109.9631 | dt 0.184
type train | step 10990 | loss 2.3936 1.3766 3.6742 5.3035 | lr 5.0e-04 | norm 137.1610 | dt 0.207
type train | step 11000 | loss 2.3449 1.3644 3.6624 5.3346 | lr 5.0e-04 | norm 126.9964 | dt 0.182
type train | step 11010 | loss 2.4030 1.3338 3.6808 5.2123 | lr 5.0e-04 | norm 128.8029 | dt 0.207
type train | step 11020 | loss 2.5209 1.4256 3.7499 5.4122 | lr 5.0e-04 | norm 108.5232 | dt 0.176
type train | step 11030 | loss 2.3823 1.3393 3.6807 5.1739 | lr 5.0e-04 | norm 138.2274 | dt 0.199
type train | step 11040 | loss 2.3881 1.3315 3.6680 5.2122 | lr 5.0e-04 | norm 113.3176 | dt 0.204
type train | step 11050 | loss 2.4374 1.3468 3.6625 5.2255 | lr 5.0e-04 | norm 108.9054 | dt 0.196
type train | step 11060 | loss 2.4743 1.3916 3.7102 5.4030 | lr 5.0e-04 | norm 118.4775 | dt 0.193
type train | step 11070 | loss 2.4736 1.3514 3.6994 5.1924 | lr 5.0e-04 | norm 108.4739 | dt 0.204
type train | step 11080 | loss 2.3979 1.3413 3.6682 5.2160 | lr 5.0e-04 | norm 136.1790 | dt 0.186
type train | step 11090 | loss 2.4219 1.3589 3.6770 5.1664 | lr 5.0e-04 | norm 117.0545 | dt 0.191
type train | step 11100 | loss 2.4966 1.3838 3.7276 5.2625 | lr 5.0e-04 | norm 111.5944 | dt 0.174
type train | step 11110 | loss 2.5088 1.3454 3.7140 5.2857 | lr 5.0e-04 | norm 129.4420 | dt 0.201
type train | step 11120 | loss 2.6472 1.4261 3.7454 5.4166 | lr 5.0e-04 | norm 123.0489 | dt 0.193
type train | step 11130 | loss 2.6581 1.3561 3.7205 5.2308 | lr 4.9e-04 | norm 116.7741 | dt 0.206
type train | step 11140 | loss 2.5849 1.3467 3.6746 5.2656 | lr 4.9e-04 | norm 129.4632 | dt 0.215
type train | step 11150 | loss 2.5544 1.3212 3.6605 5.1168 | lr 4.9e-04 | norm 104.9945 | dt 0.174
type train | step 11160 | loss 2.6623 1.3698 3.6912 5.2709 | lr 4.9e-04 | norm 113.4944 | dt 0.190
type train | step 11170 | loss 2.7454 1.3341 3.7252 5.3105 | lr 4.9e-04 | norm 111.2689 | dt 0.212
type train | step 11180 | loss 2.8148 1.3996 3.7353 5.3682 | lr 4.9e-04 | norm 127.6546 | dt 0.210
type train | step 11190 | loss 2.8076 1.3569 3.7052 5.2877 | lr 4.9e-04 | norm 113.5463 | dt 0.202
type train | step 11200 | loss 2.7199 1.3363 3.7081 5.2052 | lr 4.9e-04 | norm 129.2054 | dt 0.210
type train | step 11210 | loss 2.7013 1.3273 3.6299 5.1435 | lr 4.9e-04 | norm 136.4306 | dt 0.201
type train | step 11220 | loss 2.7322 1.3838 3.7326 5.2828 | lr 4.9e-04 | norm 121.3241 | dt 0.200
type train | step 11230 | loss 2.7041 1.3360 3.7042 5.1979 | lr 4.9e-04 | norm 127.4842 | dt 0.170
type train | step 11240 | loss 2.7199 1.3530 3.7002 5.2251 | lr 4.9e-04 | norm 118.7905 | dt 0.173
type train | step 11250 | loss 2.7561 1.3666 3.6457 5.2268 | lr 4.9e-04 | norm 108.1669 | dt 0.190
type train | step 11260 | loss 2.8063 1.3829 3.7280 5.3577 | lr 4.9e-04 | norm 125.6915 | dt 0.217
type train | step 11270 | loss 2.7987 1.3370 3.7070 5.1570 | lr 4.8e-04 | norm 105.8474 | dt 0.192
type train | step 11280 | loss 2.8622 1.3903 3.7212 5.2496 | lr 4.8e-04 | norm 117.3248 | dt 0.185
type train | step 11290 | loss 3.0234 1.3633 3.7250 5.2536 | lr 4.8e-04 | norm 121.4592 | dt 0.211
type train | step 11300 | loss 3.0560 1.3521 3.7135 5.2766 | lr 4.8e-04 | norm 110.1875 | dt 0.180
type train | step 11310 | loss 3.1157 1.3781 3.6728 5.2904 | lr 4.8e-04 | norm 135.1430 | dt 0.211
type train | step 11320 | loss 3.1609 1.3425 3.6884 5.1716 | lr 4.8e-04 | norm 120.0687 | dt 0.202
type train | step 11330 | loss 3.2602 1.3546 3.6814 5.1847 | lr 4.8e-04 | norm 118.0512 | dt 0.200
type train | step 11340 | loss 3.3125 1.3346 3.6824 5.1362 | lr 4.8e-04 | norm 110.7078 | dt 0.209
type train | step 11350 | loss 3.3891 1.3463 3.7427 5.2431 | lr 4.8e-04 | norm 114.2599 | dt 0.198
type train | step 11360 | loss 3.5559 1.3885 3.7766 5.4038 | lr 4.8e-04 | norm 129.6867 | dt 0.197
type train | step 11370 | loss 3.5608 1.3379 3.6671 5.2272 | lr 4.8e-04 | norm 117.0291 | dt 0.166
type train | step 11380 | loss 3.7738 1.3318 3.6823 5.1927 | lr 4.8e-04 | norm 107.8424 | dt 0.205
type train | step 11390 | loss 4.0301 1.3519 3.7120 5.2227 | lr 4.8e-04 | norm 102.1073 | dt 0.211
type train | step 11400 | loss 4.2650 1.3398 3.7395 5.1911 | lr 4.8e-04 | norm 117.8215 | dt 0.205
type train | step 11410 | loss 4.2754 1.3583 3.7270 5.2633 | lr 4.7e-04 | norm 121.5620 | dt 0.204
type train | step 11420 | loss 4.2311 1.3546 3.7033 5.2522 | lr 4.7e-04 | norm 124.3598 | dt 0.205
type train | step 11430 | loss 4.0415 1.3633 3.7126 5.2444 | lr 4.7e-04 | norm 112.2199 | dt 0.174
type train | step 11440 | loss 3.8440 1.3088 3.6763 5.1265 | lr 4.7e-04 | norm 108.7541 | dt 0.205
type train | step 11450 | loss 3.7167 1.3567 3.7207 5.2638 | lr 4.7e-04 | norm 114.9444 | dt 0.207
type train | step 11460 | loss 3.6124 1.3457 3.7166 5.2242 | lr 4.7e-04 | norm 119.0677 | dt 0.180
type train | step 11470 | loss 3.4226 1.3345 3.7091 5.2060 | lr 4.7e-04 | norm 117.8758 | dt 0.187
type train | step 11480 | loss 3.3512 1.3466 3.7335 5.2598 | lr 4.7e-04 | norm 129.4984 | dt 0.202
type train | step 11490 | loss 3.2616 1.3394 3.6728 5.2284 | lr 4.7e-04 | norm 103.7154 | dt 0.207
type train | step 11500 | loss 3.2541 1.3438 3.6876 5.2116 | lr 4.7e-04 | norm 112.5660 | dt 0.186
type train | step 11510 | loss 3.2068 1.3754 3.7189 5.3454 | lr 4.7e-04 | norm 90.9630 | dt 0.196
type train | step 11520 | loss 3.0729 1.3338 3.6834 5.2063 | lr 4.7e-04 | norm 126.5513 | dt 0.212
type train | step 11530 | loss 3.0403 1.3640 3.7372 5.2529 | lr 4.7e-04 | norm 102.4884 | dt 0.205
type train | step 11540 | loss 3.1354 1.3956 3.7350 5.3021 | lr 4.6e-04 | norm 116.7519 | dt 0.210
type train | step 11550 | loss 3.1027 1.3685 3.6857 5.2920 | lr 4.6e-04 | norm 128.0545 | dt 0.177
type train | step 11560 | loss 3.0790 1.3367 3.7106 5.1905 | lr 4.6e-04 | norm 113.8120 | dt 0.178
type train | step 11570 | loss 3.1499 1.3707 3.6908 5.3016 | lr 4.6e-04 | norm 119.9291 | dt 0.206
type train | step 11580 | loss 3.0204 1.3207 3.6664 5.1672 | lr 4.6e-04 | norm 98.3856 | dt 0.170
type train | step 11590 | loss 3.0718 1.3541 3.6992 5.2416 | lr 4.6e-04 | norm 106.2422 | dt 0.217
type train | step 11600 | loss 3.0162 1.3589 3.6889 5.2502 | lr 4.6e-04 | norm 128.8044 | dt 0.173
type train | step 11610 | loss 2.9866 1.3533 3.6717 5.2503 | lr 4.6e-04 | norm 106.4101 | dt 0.180
type train | step 11620 | loss 2.9852 1.3250 3.6738 5.1405 | lr 4.6e-04 | norm 114.2684 | dt 0.200
type train | step 11630 | loss 3.0703 1.4121 3.7388 5.4039 | lr 4.6e-04 | norm 135.4388 | dt 0.170
type train | step 11640 | loss 2.9438 1.3353 3.6631 5.1102 | lr 4.6e-04 | norm 115.0466 | dt 0.209
type train | step 11650 | loss 2.8986 1.3374 3.6707 5.1403 | lr 4.6e-04 | norm 112.4284 | dt 0.207
type train | step 11660 | loss 2.9340 1.3422 3.6811 5.1662 | lr 4.6e-04 | norm 117.9523 | dt 0.186
type train | step 11670 | loss 2.9907 1.3824 3.6957 5.3499 | lr 4.6e-04 | norm 135.9186 | dt 0.192
type train | step 11680 | loss 2.9162 1.3573 3.7330 5.1420 | lr 4.5e-04 | norm 112.3815 | dt 0.231
type train | step 11690 | loss 2.8263 1.3328 3.6531 5.1287 | lr 4.5e-04 | norm 127.8680 | dt 0.209
type train | step 11700 | loss 2.8800 1.3340 3.6117 5.0912 | lr 4.5e-04 | norm 119.0760 | dt 0.183
type train | step 11710 | loss 2.9613 1.3867 3.7231 5.2080 | lr 4.5e-04 | norm 113.3179 | dt 0.174
type train | step 11720 | loss 2.8958 1.3505 3.6774 5.2214 | lr 4.5e-04 | norm 117.2532 | dt 0.167
type train | step 11730 | loss 3.0660 1.4419 3.7314 5.3716 | lr 4.5e-04 | norm 137.9887 | dt 0.190
type train | step 11740 | loss 3.0249 1.3640 3.7017 5.1781 | lr 4.5e-04 | norm 124.2533 | dt 0.209
type train | step 11750 | loss 3.0415 1.3578 3.6903 5.1845 | lr 4.5e-04 | norm 112.9513 | dt 0.198
type train | step 11760 | loss 3.0300 1.3345 3.6453 5.0325 | lr 4.5e-04 | norm 107.3600 | dt 0.213
type train | step 11770 | loss 3.1513 1.3852 3.6432 5.1874 | lr 4.5e-04 | norm 114.9199 | dt 0.194
type train | step 11780 | loss 3.2093 1.3498 3.7084 5.2500 | lr 4.5e-04 | norm 128.7518 | dt 0.207
type train | step 11790 | loss 3.4045 1.3834 3.7021 5.3191 | lr 4.5e-04 | norm 124.1494 | dt 0.171
type train | step 11800 | loss 3.4435 1.3353 3.6879 5.2451 | lr 4.5e-04 | norm 108.6595 | dt 0.215
type train | step 11810 | loss 3.5270 1.3292 3.6995 5.1442 | lr 4.5e-04 | norm 117.3904 | dt 0.184
type train | step 11820 | loss 3.6926 1.3087 3.6325 5.0746 | lr 4.4e-04 | norm 136.3355 | dt 0.184
type train | step 11830 | loss 3.9713 1.3523 3.7243 5.2263 | lr 4.4e-04 | norm 113.9635 | dt 0.177
type train | step 11840 | loss 4.2533 1.3311 3.6882 5.1160 | lr 4.4e-04 | norm 111.9266 | dt 0.201
type train | step 11850 | loss 4.4905 1.3365 3.7004 5.1654 | lr 4.4e-04 | norm 125.2392 | dt 0.165
type train | step 11860 | loss 4.7994 1.3405 3.6547 5.1855 | lr 4.4e-04 | norm 111.0521 | dt 0.178
type train | step 11870 | loss 5.0204 1.3706 3.7566 5.2729 | lr 4.4e-04 | norm 120.0109 | dt 0.197
type train | step 11880 | loss 5.1132 1.3329 3.7079 5.0865 | lr 4.4e-04 | norm 120.2224 | dt 0.197
type train | step 11890 | loss 5.2734 1.3831 3.6933 5.1841 | lr 4.4e-04 | norm 113.8038 | dt 0.223
type train | step 11900 | loss 5.3606 1.3590 3.6980 5.1910 | lr 4.4e-04 | norm 106.6601 | dt 0.186
type train | step 11910 | loss 5.3940 1.3582 3.6740 5.1989 | lr 4.4e-04 | norm 109.3699 | dt 0.218
type train | step 11920 | loss 5.3678 1.3821 3.7460 5.1944 | lr 4.4e-04 | norm 117.2743 | dt 0.215
type train | step 11930 | loss 5.3784 1.3309 3.6621 5.0899 | lr 4.4e-04 | norm 106.1580 | dt 0.171
type train | step 11940 | loss 5.3025 1.3518 3.7080 5.1424 | lr 4.4e-04 | norm 117.8755 | dt 0.202
type train | step 11950 | loss 5.2234 1.3460 3.6338 5.0672 | lr 4.4e-04 | norm 133.6479 | dt 0.219
type train | step 11960 | loss 5.2087 1.3356 3.7254 5.1731 | lr 4.3e-04 | norm 102.7620 | dt 0.169
type train | step 11970 | loss 5.3605 1.3875 3.8123 5.3387 | lr 4.3e-04 | norm 127.3519 | dt 0.215
type train | step 11980 | loss 5.3010 1.3485 3.5960 5.1605 | lr 4.3e-04 | norm 107.9440 | dt 0.186
type train | step 11990 | loss 5.3432 1.3156 3.6825 5.0752 | lr 4.3e-04 | norm 111.5953 | dt 0.206
type train | step 12000 | loss 5.2810 1.3660 3.7101 5.1696 | lr 4.3e-04 | norm 120.8981 | dt 0.203
type train | step 12010 | loss 5.3858 1.3699 3.7430 5.1081 | lr 4.3e-04 | norm 112.2464 | dt 0.209
type train | step 12020 | loss 5.3945 1.3586 3.7263 5.1974 | lr 4.3e-04 | norm 122.1419 | dt 0.181
type train | step 12030 | loss 5.6050 1.3551 3.7017 5.1608 | lr 4.3e-04 | norm 114.9285 | dt 0.221
type train | step 12040 | loss 5.6524 1.3696 3.7083 5.1554 | lr 4.3e-04 | norm 115.0790 | dt 0.215
type train | step 12050 | loss 5.6612 1.3103 3.6729 5.0362 | lr 4.3e-04 | norm 119.2761 | dt 0.179
type train | step 12060 | loss 5.6498 1.3829 3.7162 5.1872 | lr 4.3e-04 | norm 105.2911 | dt 0.206
type train | step 12070 | loss 5.5000 1.3767 3.7155 5.1158 | lr 4.3e-04 | norm 97.3325 | dt 0.205
type train | step 12080 | loss 5.2976 1.3452 3.7055 5.1108 | lr 4.3e-04 | norm 106.6792 | dt 0.204
type train | step 12090 | loss 5.1486 1.3650 3.7286 5.1695 | lr 4.3e-04 | norm 112.0805 | dt 0.164
type train | step 12100 | loss 5.0533 1.3687 3.6686 5.1743 | lr 4.2e-04 | norm 105.7450 | dt 0.225
type train | step 12110 | loss 4.9052 1.3261 3.6823 5.1294 | lr 4.2e-04 | norm 114.8420 | dt 0.177
type train | step 12120 | loss 4.7860 1.3782 3.7142 5.3006 | lr 4.2e-04 | norm 123.8942 | dt 0.205
type train | step 12130 | loss 4.5463 1.3414 3.6782 5.1121 | lr 4.2e-04 | norm 113.9954 | dt 0.174
type train | step 12140 | loss 4.5657 1.3509 3.7305 5.1696 | lr 4.2e-04 | norm 101.7502 | dt 0.201
type train | step 12150 | loss 4.6805 1.3813 3.7299 5.2345 | lr 4.2e-04 | norm 104.7189 | dt 0.197
type train | step 12160 | loss 4.5370 1.3643 3.6811 5.2018 | lr 4.2e-04 | norm 110.6401 | dt 0.197
type train | step 12170 | loss 4.5239 1.3260 3.7080 5.0725 | lr 4.2e-04 | norm 97.7678 | dt 0.186
type train | step 12180 | loss 4.4801 1.3706 3.6862 5.2472 | lr 4.2e-04 | norm 115.6329 | dt 0.201
type train | step 12190 | loss 4.4011 1.3262 3.6613 5.0840 | lr 4.2e-04 | norm 100.3261 | dt 0.211
type train | step 12200 | loss 4.3206 1.3460 3.6938 5.1434 | lr 4.2e-04 | norm 110.8448 | dt 0.169
type train | step 12210 | loss 4.3061 1.3711 3.6842 5.1648 | lr 4.2e-04 | norm 107.4908 | dt 0.168
type train | step 12220 | loss 4.3239 1.3806 3.6687 5.1851 | lr 4.2e-04 | norm 106.5740 | dt 0.227
type train | step 12230 | loss 4.2242 1.3300 3.6686 5.0669 | lr 4.2e-04 | norm 106.0548 | dt 0.169
type train | step 12240 | loss 4.2715 1.3829 3.7338 5.2991 | lr 4.2e-04 | norm 111.6763 | dt 0.207
type train | step 12250 | loss 4.1136 1.3094 3.6586 5.0476 | lr 4.1e-04 | norm 104.6374 | dt 0.189
type train | step 12260 | loss 4.1021 1.2745 3.6659 5.0669 | lr 4.1e-04 | norm 110.6304 | dt 0.226
type train | step 12270 | loss 4.0370 1.2930 3.6763 5.0999 | lr 4.1e-04 | norm 110.7764 | dt 0.177
type train | step 12280 | loss 4.0956 1.3410 3.6979 5.2803 | lr 4.1e-04 | norm 116.6508 | dt 0.207
type train | step 12290 | loss 4.0522 1.2923 3.6942 5.0935 | lr 4.1e-04 | norm 95.6767 | dt 0.204
type train | step 12300 | loss 3.9161 1.2642 3.6554 5.1134 | lr 4.1e-04 | norm 103.7044 | dt 0.181
type train | step 12310 | loss 3.9483 1.2843 3.6685 5.0899 | lr 4.1e-04 | norm 108.9971 | dt 0.201
type train | step 12320 | loss 4.0138 1.3210 3.7185 5.1785 | lr 4.1e-04 | norm 101.5826 | dt 0.179
type train | step 12330 | loss 3.9295 1.2916 3.7176 5.1962 | lr 4.1e-04 | norm 105.7269 | dt 0.189
type train | step 12340 | loss 4.0074 1.3740 3.7569 5.2927 | lr 4.1e-04 | norm 114.2208 | dt 0.188
type train | step 12350 | loss 3.9275 1.3096 3.7178 5.1475 | lr 4.1e-04 | norm 102.6974 | dt 0.201
type train | step 12360 | loss 3.8503 1.3075 3.6909 5.1832 | lr 4.1e-04 | norm 97.7605 | dt 0.170
type train | step 12370 | loss 3.8343 1.2956 3.6498 5.0252 | lr 4.1e-04 | norm 98.4833 | dt 0.170
type train | step 12380 | loss 3.8527 1.3219 3.7002 5.1741 | lr 4.1e-04 | norm 103.7101 | dt 0.171
type train | step 12390 | loss 3.7782 1.3042 3.7231 5.2546 | lr 4.0e-04 | norm 115.4645 | dt 0.202
type train | step 12400 | loss 3.8527 1.3551 3.7142 5.3130 | lr 4.0e-04 | norm 106.2744 | dt 0.216
type train | step 12410 | loss 3.7472 1.3296 3.6984 5.1981 | lr 4.0e-04 | norm 107.5825 | dt 0.190
type train | step 12420 | loss 3.6463 1.3046 3.7006 5.1358 | lr 4.0e-04 | norm 95.3051 | dt 0.197
type train | step 12430 | loss 3.6130 1.3003 3.6795 5.0585 | lr 4.0e-04 | norm 112.9876 | dt 0.169
type train | step 12440 | loss 3.6927 1.3582 3.7428 5.1957 | lr 4.0e-04 | norm 100.2288 | dt 0.198
type train | step 12450 | loss 3.6176 1.3138 3.6997 5.1364 | lr 4.0e-04 | norm 112.0847 | dt 0.211
type train | step 12460 | loss 3.5902 1.3269 3.6731 5.1722 | lr 4.0e-04 | norm 93.9672 | dt 0.191
type train | step 12470 | loss 3.6755 1.3229 3.6788 5.1599 | lr 4.0e-04 | norm 99.5747 | dt 0.172
type train | step 12480 | loss 3.6548 1.3349 3.7490 5.3027 | lr 4.0e-04 | norm 107.3536 | dt 0.206
type train | step 12490 | loss 3.6319 1.2914 3.6285 5.0943 | lr 4.0e-04 | norm 112.9569 | dt 0.192
type train | step 12500 | loss 3.6156 1.3438 3.6760 5.1835 | lr 4.0e-04 | norm 98.4529 | dt 0.201
type train | step 12510 | loss 3.6691 1.3269 3.7238 5.1969 | lr 4.0e-04 | norm 117.2875 | dt 0.198
type train | step 12520 | loss 3.5929 1.3198 3.7159 5.2306 | lr 4.0e-04 | norm 111.0980 | dt 0.216
type train | step 12530 | loss 3.6339 1.3300 3.6949 5.2051 | lr 3.9e-04 | norm 109.0268 | dt 0.217
type train | step 12540 | loss 3.5546 1.3111 3.6767 5.1048 | lr 3.9e-04 | norm 105.7329 | dt 0.181
type train | step 12550 | loss 3.5605 1.3348 3.6697 5.1353 | lr 3.9e-04 | norm 118.1438 | dt 0.221
type train | step 12560 | loss 3.5248 1.3402 3.6724 5.0707 | lr 3.9e-04 | norm 101.0150 | dt 0.214
type train | step 12570 | loss 3.5154 1.3270 3.7345 5.1885 | lr 3.9e-04 | norm 101.4896 | dt 0.186
type train | step 12580 | loss 3.6391 1.3634 3.7693 5.3230 | lr 3.9e-04 | norm 107.1713 | dt 0.203
type train | step 12590 | loss 3.5350 1.3176 3.6592 5.2034 | lr 3.9e-04 | norm 129.3982 | dt 0.212
type train | step 12600 | loss 3.4489 1.2855 3.6741 5.1046 | lr 3.9e-04 | norm 89.8455 | dt 0.174
type train | step 12610 | loss 3.4706 1.3190 3.7037 5.1926 | lr 3.9e-04 | norm 132.2361 | dt 0.203
type train | step 12620 | loss 3.4294 1.3284 3.7312 5.1072 | lr 3.9e-04 | norm 102.5667 | dt 0.207
type train | step 12630 | loss 3.3993 1.3155 3.7197 5.1868 | lr 3.9e-04 | norm 107.9742 | dt 0.170
type train | step 12640 | loss 3.4218 1.3070 3.6939 5.1391 | lr 3.9e-04 | norm 96.5566 | dt 0.187
type train | step 12650 | loss 3.4197 1.3346 3.7032 5.1887 | lr 3.9e-04 | norm 102.7689 | dt 0.223
type train | step 12660 | loss 3.3992 1.2878 3.6674 5.0481 | lr 3.9e-04 | norm 104.2316 | dt 0.210
type train | step 12670 | loss 3.4810 1.3438 3.7109 5.2216 | lr 3.9e-04 | norm 101.9033 | dt 0.188
type train | step 12680 | loss 3.4596 1.3142 3.7097 5.1177 | lr 3.8e-04 | norm 107.6163 | dt 0.208
type train | step 12690 | loss 3.3867 1.3075 3.7027 5.1616 | lr 3.8e-04 | norm 119.9028 | dt 0.170
type train | step 12700 | loss 3.3893 1.3379 3.7242 5.1764 | lr 3.8e-04 | norm 121.1847 | dt 0.168
type train | step 12710 | loss 3.3816 1.3159 3.6653 5.1514 | lr 3.8e-04 | norm 109.4354 | dt 0.165
type train | step 12720 | loss 3.3244 1.3064 3.6793 5.1424 | lr 3.8e-04 | norm 106.3684 | dt 0.175
type train | step 12730 | loss 3.4104 1.3586 3.7102 5.2977 | lr 3.8e-04 | norm 108.7640 | dt 0.197
type train | step 12740 | loss 3.2888 1.2919 3.6752 5.1115 | lr 3.8e-04 | norm 102.5418 | dt 0.213
type train | step 12750 | loss 3.2372 1.3213 3.7281 5.1801 | lr 3.8e-04 | norm 109.6140 | dt 0.202
type train | step 12760 | loss 3.3604 1.3637 3.7266 5.2341 | lr 3.8e-04 | norm 107.2584 | dt 0.167
type train | step 12770 | loss 3.3030 1.3308 3.6784 5.2029 | lr 3.8e-04 | norm 108.6720 | dt 0.176
type train | step 12780 | loss 3.3180 1.2934 3.7041 5.1073 | lr 3.8e-04 | norm 121.8110 | dt 0.210
type train | step 12790 | loss 3.2874 1.3262 3.6831 5.2528 | lr 3.8e-04 | norm 110.4980 | dt 0.216
type train | step 12800 | loss 3.2022 1.2902 3.6601 5.1007 | lr 3.8e-04 | norm 116.3470 | dt 0.172
type train | step 12810 | loss 3.2853 1.3111 3.6910 5.1690 | lr 3.8e-04 | norm 104.5610 | dt 0.177
type train | step 12820 | loss 3.2412 1.3087 3.6799 5.1644 | lr 3.8e-04 | norm 97.9627 | dt 0.175
type train | step 12830 | loss 3.2405 1.3117 3.6617 5.2159 | lr 3.7e-04 | norm 111.5015 | dt 0.179
type train | step 12840 | loss 3.1736 1.2733 3.6640 5.1016 | lr 3.7e-04 | norm 114.5866 | dt 0.206
type train | step 12850 | loss 3.3213 1.3591 3.7329 5.3318 | lr 3.7e-04 | norm 107.6768 | dt 0.211
type train | step 12860 | loss 3.1408 1.2871 3.6556 5.0499 | lr 3.7e-04 | norm 103.9850 | dt 0.171
type train | step 12870 | loss 3.1504 1.2845 3.6667 5.1137 | lr 3.7e-04 | norm 109.0929 | dt 0.186
type train | step 12880 | loss 3.1069 1.3011 3.6689 5.1179 | lr 3.7e-04 | norm 114.3816 | dt 0.189
type train | step 12890 | loss 3.2570 1.3684 3.6817 5.3036 | lr 3.7e-04 | norm 103.8484 | dt 0.222
type train | step 12900 | loss 3.1784 1.3012 3.6941 5.0859 | lr 3.7e-04 | norm 103.3586 | dt 0.171
type train | step 12910 | loss 3.0990 1.2917 3.6528 5.1275 | lr 3.7e-04 | norm 95.2265 | dt 0.188
type train | step 12920 | loss 3.1007 1.3065 3.6659 5.0646 | lr 3.7e-04 | norm 97.2631 | dt 0.204
type train | step 12930 | loss 3.1290 1.3330 3.7165 5.1761 | lr 3.7e-04 | norm 96.3750 | dt 0.189
type train | step 12940 | loss 3.1789 1.3143 3.7166 5.1800 | lr 3.7e-04 | norm 108.7731 | dt 0.206
type train | step 12950 | loss 3.2380 1.4131 3.7533 5.3283 | lr 3.7e-04 | norm 104.7859 | dt 0.180
type train | step 12960 | loss 3.1047 1.3206 3.7138 5.1453 | lr 3.7e-04 | norm 107.1963 | dt 0.188
type train | step 12970 | loss 3.1145 1.3114 3.6878 5.1893 | lr 3.7e-04 | norm 105.1256 | dt 0.199
type train | step 12980 | loss 3.0398 1.2839 3.6459 5.0149 | lr 3.6e-04 | norm 107.8533 | dt 0.215
type train | step 12990 | loss 3.1293 1.3437 3.6963 5.1672 | lr 3.6e-04 | norm 110.0410 | dt 0.177
type train | step 13000 | loss 3.1409 1.3011 3.7193 5.2188 | lr 3.6e-04 | norm 118.8424 | dt 0.183
type train | step 13010 | loss 3.2223 1.3649 3.7100 5.2886 | lr 3.6e-04 | norm 110.7717 | dt 0.212
type train | step 13020 | loss 3.1530 1.3421 3.6937 5.2260 | lr 3.6e-04 | norm 101.3405 | dt 0.168
type train | step 13030 | loss 3.0605 1.3243 3.7019 5.1113 | lr 3.6e-04 | norm 102.6595 | dt 0.179
type train | step 13040 | loss 3.0112 1.2908 3.6405 5.0430 | lr 3.6e-04 | norm 99.7078 | dt 0.186
type train | step 13050 | loss 3.0953 1.3590 3.7247 5.2048 | lr 3.6e-04 | norm 98.3606 | dt 0.197
type train | step 13060 | loss 3.1276 1.3226 3.6949 5.1088 | lr 3.6e-04 | norm 114.0290 | dt 0.170
type train | step 13070 | loss 3.1044 1.3331 3.6872 5.1691 | lr 3.6e-04 | norm 111.0580 | dt 0.212
type train | step 13080 | loss 3.1979 1.3288 3.6749 5.1670 | lr 3.6e-04 | norm 118.8095 | dt 0.176
type train | step 13090 | loss 3.2225 1.3677 3.7317 5.2726 | lr 3.6e-04 | norm 97.6892 | dt 0.170
type train | step 13100 | loss 3.1671 1.3241 3.6685 5.0857 | lr 3.6e-04 | norm 116.5302 | dt 0.177
type train | step 13110 | loss 3.2273 1.3514 3.7091 5.1855 | lr 3.6e-04 | norm 108.4781 | dt 0.221
type train | step 13120 | loss 3.2615 1.3352 3.7151 5.1932 | lr 3.6e-04 | norm 108.5218 | dt 0.220
type train | step 13130 | loss 3.2506 1.3319 3.7108 5.1857 | lr 3.5e-04 | norm 97.7854 | dt 0.180
type train | step 13140 | loss 3.2017 1.3343 3.6869 5.1966 | lr 3.5e-04 | norm 101.4702 | dt 0.171
type train | step 13150 | loss 3.1708 1.3040 3.6727 5.0977 | lr 3.5e-04 | norm 100.9447 | dt 0.212
type train | step 13160 | loss 3.1569 1.3139 3.6666 5.1353 | lr 3.5e-04 | norm 103.9639 | dt 0.221
type train | step 13170 | loss 3.0930 1.3059 3.6698 5.0543 | lr 3.5e-04 | norm 110.9860 | dt 0.182
type train | step 13180 | loss 3.0556 1.3151 3.7362 5.1670 | lr 3.5e-04 | norm 102.4178 | dt 0.211
type train | step 13190 | loss 3.1679 1.3665 3.7666 5.3268 | lr 3.5e-04 | norm 94.9319 | dt 0.167
type train | step 13200 | loss 3.0838 1.3019 3.6577 5.1576 | lr 3.5e-04 | norm 112.8423 | dt 0.198
type train | step 13210 | loss 3.0635 1.2821 3.6722 5.1176 | lr 3.5e-04 | norm 105.1153 | dt 0.206
type train | step 13220 | loss 3.0765 1.3238 3.6989 5.1835 | lr 3.5e-04 | norm 101.6623 | dt 0.168
type train | step 13230 | loss 3.0345 1.2955 3.7285 5.1410 | lr 3.5e-04 | norm 89.0420 | dt 0.201
type train | step 13240 | loss 3.0010 1.3035 3.7159 5.2005 | lr 3.5e-04 | norm 115.0970 | dt 0.172
type train | step 13250 | loss 3.0427 1.3030 3.6902 5.1947 | lr 3.5e-04 | norm 114.0980 | dt 0.213
type train | step 13260 | loss 3.0032 1.3057 3.6974 5.1926 | lr 3.5e-04 | norm 100.0159 | dt 0.183
type train | step 13270 | loss 3.0012 1.2659 3.6630 5.0488 | lr 3.5e-04 | norm 102.2429 | dt 0.176
type train | step 13280 | loss 3.0681 1.3273 3.7071 5.2015 | lr 3.4e-04 | norm 106.3533 | dt 0.222
type train | step 13290 | loss 2.9977 1.3152 3.7057 5.1320 | lr 3.4e-04 | norm 95.1713 | dt 0.190
type train | step 13300 | loss 2.9762 1.2984 3.6979 5.1239 | lr 3.4e-04 | norm 105.5793 | dt 0.200
type train | step 13310 | loss 2.9727 1.3199 3.7206 5.2000 | lr 3.4e-04 | norm 99.1482 | dt 0.203
type train | step 13320 | loss 2.9438 1.3123 3.6594 5.1700 | lr 3.4e-04 | norm 99.3229 | dt 0.203
type train | step 13330 | loss 2.9529 1.3129 3.6746 5.1732 | lr 3.4e-04 | norm 120.4495 | dt 0.183
type train | step 13340 | loss 2.9882 1.3423 3.7058 5.3038 | lr 3.4e-04 | norm 107.0808 | dt 0.196
type train | step 13350 | loss 2.8836 1.2873 3.6723 5.1544 | lr 3.4e-04 | norm 113.5956 | dt 0.203
type train | step 13360 | loss 2.8941 1.3029 3.7242 5.1733 | lr 3.4e-04 | norm 99.9746 | dt 0.210
type train | step 13370 | loss 3.0284 1.3365 3.7210 5.2719 | lr 3.4e-04 | norm 102.7639 | dt 0.202
type train | step 13380 | loss 2.9154 1.3054 3.6740 5.2298 | lr 3.4e-04 | norm 108.4879 | dt 0.202
type train | step 13390 | loss 2.8736 1.2723 3.7009 5.0979 | lr 3.4e-04 | norm 98.9202 | dt 0.178
type train | step 13400 | loss 2.9451 1.3188 3.6789 5.2609 | lr 3.4e-04 | norm 118.6823 | dt 0.167
type train | step 13410 | loss 2.8602 1.2700 3.6562 5.1243 | lr 3.4e-04 | norm 100.3365 | dt 0.199
type train | step 13420 | loss 2.8870 1.2815 3.6880 5.1657 | lr 3.4e-04 | norm 115.6352 | dt 0.186
type train | step 13430 | loss 2.8641 1.2958 3.6765 5.1688 | lr 3.3e-04 | norm 103.7423 | dt 0.174
type train | step 13440 | loss 2.9023 1.3138 3.6605 5.1897 | lr 3.3e-04 | norm 96.6394 | dt 0.215
type train | step 13450 | loss 2.8700 1.2644 3.6627 5.0671 | lr 3.3e-04 | norm 101.1371 | dt 0.197
type train | step 13460 | loss 2.9562 1.3364 3.7261 5.3415 | lr 3.3e-04 | norm 95.8726 | dt 0.179
type train | step 13470 | loss 2.8137 1.2706 3.6515 5.0402 | lr 3.3e-04 | norm 93.4046 | dt 0.176
type train | step 13480 | loss 2.7795 1.2577 3.6613 5.1023 | lr 3.3e-04 | norm 105.9223 | dt 0.184
type train | step 13490 | loss 2.8101 1.2599 3.6701 5.1285 | lr 3.3e-04 | norm 97.9253 | dt 0.204
type train | step 13500 | loss 2.8864 1.3221 3.6911 5.3204 | lr 3.3e-04 | norm 122.7410 | dt 0.174
type train | step 13510 | loss 2.8298 1.2753 3.6868 5.1086 | lr 3.3e-04 | norm 104.9157 | dt 0.229
type train | step 13520 | loss 2.7256 1.2566 3.6493 5.1150 | lr 3.3e-04 | norm 95.5146 | dt 0.194
type train | step 13530 | loss 2.7800 1.2747 3.6628 5.0584 | lr 3.3e-04 | norm 96.4675 | dt 0.182
type train | step 13540 | loss 2.8181 1.3038 3.7121 5.1841 | lr 3.3e-04 | norm 96.3196 | dt 0.213
type train | step 13550 | loss 2.7881 1.2863 3.7125 5.2093 | lr 3.3e-04 | norm 104.9238 | dt 0.212
type train | step 13560 | loss 2.8901 1.3657 3.7493 5.3322 | lr 3.3e-04 | norm 104.3223 | dt 0.205
type train | step 13570 | loss 2.7740 1.2872 3.7104 5.1361 | lr 3.3e-04 | norm 109.6165 | dt 0.204
type train | step 13580 | loss 2.7242 1.2903 3.6855 5.1817 | lr 3.3e-04 | norm 101.0675 | dt 0.190
type train | step 13590 | loss 2.6594 1.2557 3.6441 5.0224 | lr 3.2e-04 | norm 103.0625 | dt 0.178
type train | step 13600 | loss 2.7680 1.2957 3.6939 5.1895 | lr 3.2e-04 | norm 92.6169 | dt 0.197
type train | step 13610 | loss 2.7373 1.2767 3.7170 5.2485 | lr 3.2e-04 | norm 113.2189 | dt 0.193
type train | step 13620 | loss 2.7650 1.3253 3.7079 5.3139 | lr 3.2e-04 | norm 106.1649 | dt 0.181
type train | step 13630 | loss 2.7558 1.2911 3.6940 5.1977 | lr 3.2e-04 | norm 99.2748 | dt 0.199
type train | step 13640 | loss 2.6849 1.2769 3.6994 5.1287 | lr 3.2e-04 | norm 105.0751 | dt 0.204
type train | step 13650 | loss 2.6323 1.2710 3.6380 5.0616 | lr 3.2e-04 | norm 97.9703 | dt 0.201
type train | step 13660 | loss 2.7219 1.3081 3.7288 5.2223 | lr 3.2e-04 | norm 108.7594 | dt 0.179
type train | step 13670 | loss 2.6549 1.2697 3.6949 5.1062 | lr 3.2e-04 | norm 113.0930 | dt 0.187
type train | step 13680 | loss 2.6399 1.2815 3.6866 5.1640 | lr 3.2e-04 | norm 115.9886 | dt 0.204
type train | step 13690 | loss 2.7216 1.2793 3.6744 5.1612 | lr 3.2e-04 | norm 104.6959 | dt 0.199
type train | step 13700 | loss 2.7155 1.3090 3.7306 5.2912 | lr 3.2e-04 | norm 109.0364 | dt 0.200
type train | step 13710 | loss 2.6313 1.2564 3.6662 5.1155 | lr 3.2e-04 | norm 103.7932 | dt 0.201
type train | step 13720 | loss 2.6657 1.2893 3.7066 5.2271 | lr 3.2e-04 | norm 110.1711 | dt 0.168
type train | step 13730 | loss 2.7479 1.2748 3.7132 5.2476 | lr 3.2e-04 | norm 100.7826 | dt 0.199
type train | step 13740 | loss 2.6723 1.2763 3.7078 5.2176 | lr 3.2e-04 | norm 96.1768 | dt 0.208
type train | step 13750 | loss 2.6973 1.2822 3.6838 5.2198 | lr 3.1e-04 | norm 96.9956 | dt 0.174
type train | step 13760 | loss 2.6261 1.2546 3.6706 5.1300 | lr 3.1e-04 | norm 97.9471 | dt 0.173
type train | step 13770 | loss 2.6992 1.2746 3.6618 5.1502 | lr 3.1e-04 | norm 112.0999 | dt 0.210
type train | step 13780 | loss 2.6654 1.2554 3.6725 5.0762 | lr 3.1e-04 | norm 111.1168 | dt 0.213
type train | step 13790 | loss 2.6610 1.2568 3.7564 5.2290 | lr 3.1e-04 | norm 118.0652 | dt 0.207
type train | step 13800 | loss 2.7357 1.3124 3.7170 5.3499 | lr 3.1e-04 | norm 106.7664 | dt 0.201
type train | step 13810 | loss 2.6842 1.2603 3.6711 5.1904 | lr 3.1e-04 | norm 103.4451 | dt 0.207
type train | step 13820 | loss 2.7284 1.2360 3.6426 5.1299 | lr 3.1e-04 | norm 95.2164 | dt 0.180
type train | step 13830 | loss 2.7281 1.2709 3.6589 5.1959 | lr 3.1e-04 | norm 108.4547 | dt 0.181
type train | step 13840 | loss 2.7194 1.2559 3.6898 5.1355 | lr 3.1e-04 | norm 92.8214 | dt 0.200
type train | step 13850 | loss 2.7145 1.2676 3.6863 5.2285 | lr 3.1e-04 | norm 105.0308 | dt 0.206
type train | step 13860 | loss 2.7415 1.2563 3.6637 5.1908 | lr 3.1e-04 | norm 99.0825 | dt 0.201
type train | step 13870 | loss 2.8088 1.2757 3.6854 5.2256 | lr 3.1e-04 | norm 118.3058 | dt 0.199
type train | step 13880 | loss 2.7688 1.2350 3.6462 5.0893 | lr 3.1e-04 | norm 118.0943 | dt 0.177
type train | step 13890 | loss 2.8116 1.2797 3.7051 5.2678 | lr 3.1e-04 | norm 108.4192 | dt 0.216
type train | step 13900 | loss 2.8461 1.2629 3.7108 5.1884 | lr 3.1e-04 | norm 107.2871 | dt 0.202
type train | step 13910 | loss 2.8042 1.2604 3.6634 5.1815 | lr 3.0e-04 | norm 97.2183 | dt 0.200
type train | step 13920 | loss 2.8159 1.2755 3.7270 5.2173 | lr 3.0e-04 | norm 95.6498 | dt 0.205
type train | step 13930 | loss 2.7608 1.2612 3.6623 5.2167 | lr 3.0e-04 | norm 94.3856 | dt 0.199
type train | step 13940 | loss 2.7096 1.2499 3.6400 5.1794 | lr 3.0e-04 | norm 98.6046 | dt 0.211
type train | step 13950 | loss 2.7923 1.2816 3.6827 5.3338 | lr 3.0e-04 | norm 93.5272 | dt 0.187
type train | step 13960 | loss 2.7249 1.2354 3.6660 5.1701 | lr 3.0e-04 | norm 95.8774 | dt 0.201
type train | step 13970 | loss 2.7455 1.2539 3.7064 5.2280 | lr 3.0e-04 | norm 86.6942 | dt 0.220
type train | step 13980 | loss 2.8372 1.2921 3.7284 5.2711 | lr 3.0e-04 | norm 109.6325 | dt 0.208
type train | step 13990 | loss 2.7912 1.2613 3.6596 5.2745 | lr 3.0e-04 | norm 103.2969 | dt 0.200
type train | step 14000 | loss 2.8154 1.2277 3.6883 5.1423 | lr 3.0e-04 | norm 104.2693 | dt 0.206
type train | step 14010 | loss 2.8101 1.2799 3.6794 5.2690 | lr 3.0e-04 | norm 114.4670 | dt 0.194
type train | step 14020 | loss 2.7634 1.2234 3.6399 5.1696 | lr 3.0e-04 | norm 91.4963 | dt 0.204
type train | step 14030 | loss 2.8354 1.2460 3.6733 5.2056 | lr 3.0e-04 | norm 99.2901 | dt 0.209
type train | step 14040 | loss 2.8338 1.2495 3.6555 5.2125 | lr 3.0e-04 | norm 98.8353 | dt 0.201
type train | step 14050 | loss 2.8638 1.2493 3.6659 5.2428 | lr 3.0e-04 | norm 100.6437 | dt 0.210
type train | step 14060 | loss 2.8887 1.2079 3.6750 5.1066 | lr 3.0e-04 | norm 105.5280 | dt 0.168
type train | step 14070 | loss 2.9964 1.2893 3.7347 5.3929 | lr 2.9e-04 | norm 99.0429 | dt 0.190
type train | step 14080 | loss 2.9391 1.2276 3.6253 5.0887 | lr 2.9e-04 | norm 102.1903 | dt 0.180
type train | step 14090 | loss 2.9736 1.2049 3.6563 5.1610 | lr 2.9e-04 | norm 103.9605 | dt 0.168
type train | step 14100 | loss 3.1051 1.2199 3.6600 5.1888 | lr 2.9e-04 | norm 103.3522 | dt 0.182
type train | step 14110 | loss 3.2185 1.2771 3.6597 5.3651 | lr 2.9e-04 | norm 108.8375 | dt 0.198
type train | step 14120 | loss 3.2971 1.2294 3.6776 5.1442 | lr 2.9e-04 | norm 102.3507 | dt 0.171
type train | step 14130 | loss 3.3374 1.2074 3.5870 5.1348 | lr 2.9e-04 | norm 96.7079 | dt 0.217
type train | step 14140 | loss 3.4187 1.2263 3.6595 5.1051 | lr 2.9e-04 | norm 103.7529 | dt 0.166
type train | step 14150 | loss 3.5903 1.2675 3.6725 5.2272 | lr 2.9e-04 | norm 88.9031 | dt 0.186
type train | step 14160 | loss 3.5616 1.2365 3.6983 5.2499 | lr 2.9e-04 | norm 100.6071 | dt 0.202
type train | step 14170 | loss 3.7310 1.3188 3.7460 5.3955 | lr 2.9e-04 | norm 99.3653 | dt 0.194
type train | step 14180 | loss 3.7270 1.2358 3.7186 5.1927 | lr 2.9e-04 | norm 108.6125 | dt 0.196
type train | step 14190 | loss 3.7747 1.2372 3.6719 5.2329 | lr 2.9e-04 | norm 99.2050 | dt 0.200
type train | step 14200 | loss 3.7355 1.2116 3.6105 5.0761 | lr 2.9e-04 | norm 96.8318 | dt 0.238
type train | step 14210 | loss 3.9651 1.2540 3.6893 5.2372 | lr 2.9e-04 | norm 95.3672 | dt 0.225
type train | step 14220 | loss 3.9905 1.2247 3.7012 5.3034 | lr 2.9e-04 | norm 108.2136 | dt 0.230
type train | step 14230 | loss 4.1565 1.2751 3.7104 5.3478 | lr 2.9e-04 | norm 99.7609 | dt 0.214
type train | step 14240 | loss 4.2290 1.2412 3.6729 5.2689 | lr 2.8e-04 | norm 102.2767 | dt 0.193
type train | step 14250 | loss 4.2592 1.2244 3.6811 5.1852 | lr 2.8e-04 | norm 106.3319 | dt 0.214
type train | step 14260 | loss 4.2324 1.2072 3.6275 5.1110 | lr 2.8e-04 | norm 94.8005 | dt 0.190
type train | step 14270 | loss 4.3829 1.2623 3.7026 5.2669 | lr 2.8e-04 | norm 109.0350 | dt 0.182
type train | step 14280 | loss 4.4117 1.2155 3.6930 5.1694 | lr 2.8e-04 | norm 103.6826 | dt 0.200
type train | step 14290 | loss 4.4824 1.2333 3.6636 5.1949 | lr 2.8e-04 | norm 99.0204 | dt 0.171
type train | step 14300 | loss 4.6256 1.2376 3.6727 5.2137 | lr 2.8e-04 | norm 101.6954 | dt 0.177
type train | step 14310 | loss 4.7537 1.2667 3.6981 5.3377 | lr 2.8e-04 | norm 91.8462 | dt 0.224
type train | step 14320 | loss 4.8426 1.2140 3.6582 5.1481 | lr 2.8e-04 | norm 108.0454 | dt 0.237
type train | step 14330 | loss 5.0315 1.2608 3.7032 5.2233 | lr 2.8e-04 | norm 103.3826 | dt 0.183
type train | step 14340 | loss 5.1705 1.2431 3.7278 5.2641 | lr 2.8e-04 | norm 108.0199 | dt 0.170
type train | step 14350 | loss 5.2039 1.2368 3.6759 5.2640 | lr 2.8e-04 | norm 112.7141 | dt 0.218
type train | step 14360 | loss 5.3593 1.2401 3.6572 5.2502 | lr 2.8e-04 | norm 98.9494 | dt 0.204
type train | step 14370 | loss 5.5115 1.2182 3.6219 5.1505 | lr 2.8e-04 | norm 93.9116 | dt 0.171
type train | step 14380 | loss 5.6796 1.2234 3.6707 5.1769 | lr 2.8e-04 | norm 92.0157 | dt 0.180
type train | step 14390 | loss 5.7475 1.2218 3.6621 5.1089 | lr 2.8e-04 | norm 106.9853 | dt 0.204
type train | step 14400 | loss 5.8543 1.2364 3.7071 5.2463 | lr 2.8e-04 | norm 97.4087 | dt 0.201
type train | step 14410 | loss 6.0318 1.2842 3.7643 5.3959 | lr 2.7e-04 | norm 110.4636 | dt 0.172
type train | step 14420 | loss 6.0138 1.2450 3.6464 5.2013 | lr 2.7e-04 | norm 105.0744 | dt 0.184
type train | step 14430 | loss 6.1326 1.2117 3.6465 5.1510 | lr 2.7e-04 | norm 100.7352 | dt 0.178
type train | step 14440 | loss 6.2554 1.2333 3.6789 5.2210 | lr 2.7e-04 | norm 113.0000 | dt 0.209
type train | step 14450 | loss 6.3345 1.2282 3.7261 5.1649 | lr 2.7e-04 | norm 105.8337 | dt 0.204
type train | step 14460 | loss 6.4257 1.2365 3.7401 5.2411 | lr 2.7e-04 | norm 107.4625 | dt 0.202
type train | step 14470 | loss 6.5700 1.2196 3.6857 5.2109 | lr 2.7e-04 | norm 102.7784 | dt 0.170
type train | step 14480 | loss 6.8356 1.2384 3.6701 5.2450 | lr 2.7e-04 | norm 108.1183 | dt 0.204
type train | step 14490 | loss 7.0994 1.1913 3.6536 5.0879 | lr 2.7e-04 | norm 110.7880 | dt 0.218
type train | step 14500 | loss 7.4414 1.2443 3.6915 5.2641 | lr 2.7e-04 | norm 112.5776 | dt 0.181
type train | step 14510 | loss 7.8993 1.2306 3.6902 5.1929 | lr 2.7e-04 | norm 108.4690 | dt 0.173
type train | step 14520 | loss 8.3955 1.2206 3.6830 5.2011 | lr 2.7e-04 | norm 122.2963 | dt 0.221
type train | step 14530 | loss 8.9602 1.2363 3.6795 5.2335 | lr 2.7e-04 | norm 129.5784 | dt 0.221
type train | step 14540 | loss 9.5155 1.2275 3.6582 5.2157 | lr 2.7e-04 | norm 111.2822 | dt 0.183
type train | step 14550 | loss 10.3500 1.2266 3.6493 5.2027 | lr 2.7e-04 | norm 125.3901 | dt 0.220
type train | step 14560 | loss 10.8637 1.2605 3.7149 5.3393 | lr 2.7e-04 | norm 119.1163 | dt 0.217
type train | step 14570 | loss 11.5139 1.2137 3.6607 5.1585 | lr 2.7e-04 | norm 126.9973 | dt 0.197
type train | step 14580 | loss 11.9913 1.2350 3.7137 5.2477 | lr 2.6e-04 | norm 128.4226 | dt 0.218
type train | step 14590 | loss 12.8194 1.2650 3.7227 5.2817 | lr 2.6e-04 | norm 126.6829 | dt 0.181
type train | step 14600 | loss 13.3583 1.2464 3.6630 5.2638 | lr 2.6e-04 | norm 131.1452 | dt 0.207
type train | step 14610 | loss 14.1949 1.2014 3.6926 5.1488 | lr 2.6e-04 | norm 136.5493 | dt 0.209
type train | step 14620 | loss 14.8323 1.2474 3.6615 5.2842 | lr 2.6e-04 | norm 139.3382 | dt 0.187
type train | step 14630 | loss 15.3315 1.2128 3.6404 5.1663 | lr 2.6e-04 | norm 136.4743 | dt 0.198
type train | step 14640 | loss 16.1279 1.2287 3.6437 5.1841 | lr 2.6e-04 | norm 133.2042 | dt 0.179
type train | step 14650 | loss 16.8546 1.2251 3.6614 5.2176 | lr 2.6e-04 | norm 145.3532 | dt 0.182
type train | step 14660 | loss 17.3082 1.2323 3.6126 5.2240 | lr 2.6e-04 | norm 139.7091 | dt 0.198
type train | step 14670 | loss 17.7503 1.1976 3.6558 5.1110 | lr 2.6e-04 | norm 141.2573 | dt 0.198
type train | step 14680 | loss 18.3037 1.2682 3.7280 5.3757 | lr 2.6e-04 | norm 142.8669 | dt 0.202
type train | step 14690 | loss 18.5401 1.2095 3.6518 5.0765 | lr 2.6e-04 | norm 138.6148 | dt 0.205
type train | step 14700 | loss 18.7762 1.1888 3.6172 5.1446 | lr 2.6e-04 | norm 135.1041 | dt 0.202
type train | step 14710 | loss 19.1552 1.1958 3.6633 5.1579 | lr 2.6e-04 | norm 135.8368 | dt 0.197
type train | step 14720 | loss 19.3660 1.2472 3.6770 5.3256 | lr 2.6e-04 | norm 134.9713 | dt 0.186
type train | step 14730 | loss 19.6269 1.2155 3.6753 5.1322 | lr 2.6e-04 | norm 135.7147 | dt 0.194
type train | step 14740 | loss 19.6190 1.2045 3.6618 5.1164 | lr 2.6e-04 | norm 126.4717 | dt 0.198
type train | step 14750 | loss 19.6930 1.2088 3.6546 5.0690 | lr 2.6e-04 | norm 129.5092 | dt 0.200
type train | step 14760 | loss 20.1462 1.2344 3.6867 5.1959 | lr 2.5e-04 | norm 124.6828 | dt 0.190
type train | step 14770 | loss 20.1472 1.2120 3.6595 5.2109 | lr 2.5e-04 | norm 134.5241 | dt 0.179
type train | step 14780 | loss 20.4850 1.3084 3.7322 5.3447 | lr 2.5e-04 | norm 132.1203 | dt 0.199
type train | step 14790 | loss 20.6839 1.2306 3.6888 5.1594 | lr 2.5e-04 | norm 133.2116 | dt 0.199
type train | step 14800 | loss 20.7698 1.2283 3.6479 5.1908 | lr 2.5e-04 | norm 128.2655 | dt 0.203
type train | step 14810 | loss 20.7107 1.2065 3.6443 5.0406 | lr 2.5e-04 | norm 133.4123 | dt 0.204
type train | step 14820 | loss 20.9950 1.2535 3.6595 5.1907 | lr 2.5e-04 | norm 135.7173 | dt 0.199
type train | step 14830 | loss 20.8794 1.2199 3.7244 5.2539 | lr 2.5e-04 | norm 139.3482 | dt 0.205
type train | step 14840 | loss 20.8519 1.2606 3.7107 5.2985 | lr 2.5e-04 | norm 128.8222 | dt 0.202
type train | step 14850 | loss 20.8467 1.2308 3.6694 5.2348 | lr 2.5e-04 | norm 136.2334 | dt 0.185
type train | step 14860 | loss 20.9538 1.2191 3.6931 5.1354 | lr 2.5e-04 | norm 132.8580 | dt 0.173
type train | step 14870 | loss 20.9182 1.1990 3.6309 5.0687 | lr 2.5e-04 | norm 138.4834 | dt 0.197
type train | step 14880 | loss 20.9148 1.2466 3.7173 5.2097 | lr 2.5e-04 | norm 126.3063 | dt 0.196
type train | step 14890 | loss 20.4691 1.2049 3.6891 5.1271 | lr 2.5e-04 | norm 132.0523 | dt 0.169
type train | step 14900 | loss 20.3455 1.2162 3.6676 5.1518 | lr 2.5e-04 | norm 128.7614 | dt 0.204
type train | step 14910 | loss 20.0904 1.2148 3.6611 5.1583 | lr 2.5e-04 | norm 123.8685 | dt 0.218
type train | step 14920 | loss 19.9254 1.2394 3.7236 5.2832 | lr 2.5e-04 | norm 127.5253 | dt 0.184
type train | step 14930 | loss 19.1955 1.1961 3.6494 5.0974 | lr 2.5e-04 | norm 136.2715 | dt 0.203
type train | step 14940 | loss 18.9924 1.2388 3.7003 5.1779 | lr 2.4e-04 | norm 130.8042 | dt 0.201
type train | step 14950 | loss 18.3603 1.2265 3.7076 5.2159 | lr 2.4e-04 | norm 125.6957 | dt 0.194
type train | step 14960 | loss 18.0336 1.2239 3.6931 5.2101 | lr 2.4e-04 | norm 124.4794 | dt 0.184
type train | step 14970 | loss 17.9410 1.2201 3.6701 5.2012 | lr 2.4e-04 | norm 117.8564 | dt 0.172
type train | step 14980 | loss 17.4766 1.1976 3.6681 5.1007 | lr 2.4e-04 | norm 112.8544 | dt 0.206
type train | step 14990 | loss 16.9890 1.2084 3.6466 5.1278 | lr 2.4e-04 | norm 119.1064 | dt 0.201
type train | step 15000 | loss 16.9566 1.1983 3.6615 5.0507 | lr 2.4e-04 | norm 117.4076 | dt 0.170
type train | step 15010 | loss 16.2995 1.2051 3.7501 5.1996 | lr 2.4e-04 | norm 117.4186 | dt 0.204
type train | step 15020 | loss 16.2194 1.2437 3.7600 5.3323 | lr 2.4e-04 | norm 118.9238 | dt 0.200
type train | step 15030 | loss 15.9413 1.1977 3.6513 5.1593 | lr 2.4e-04 | norm 116.9759 | dt 0.168
type train | step 15040 | loss 15.7991 1.1763 3.6565 5.0933 | lr 2.4e-04 | norm 103.9777 | dt 0.215
type train | step 15050 | loss 15.3261 1.2070 3.6971 5.1531 | lr 2.4e-04 | norm 118.4112 | dt 0.186
type train | step 15060 | loss 14.9765 1.1961 3.7060 5.0960 | lr 2.4e-04 | norm 96.6366 | dt 0.208
type train | step 15070 | loss 14.5283 1.2034 3.7035 5.1852 | lr 2.4e-04 | norm 106.2345 | dt 0.180
type train | step 15080 | loss 14.2108 1.1926 3.6350 5.1392 | lr 2.4e-04 | norm 100.9567 | dt 0.180
type train | step 15090 | loss 13.9768 1.2144 3.6439 5.1915 | lr 2.4e-04 | norm 99.8546 | dt 0.174
type train | step 15100 | loss 13.6130 1.1729 3.6146 5.0219 | lr 2.4e-04 | norm 106.8087 | dt 0.188
type train | step 15110 | loss 13.3418 1.2250 3.6712 5.1938 | lr 2.4e-04 | norm 89.1239 | dt 0.173
type train | step 15120 | loss 12.9188 1.2054 3.6849 5.1326 | lr 2.4e-04 | norm 103.3566 | dt 0.206
type train | step 15130 | loss 12.4596 1.1904 3.6678 5.1295 | lr 2.3e-04 | norm 97.0855 | dt 0.201
type train | step 15140 | loss 12.4189 1.2055 3.6697 5.1653 | lr 2.3e-04 | norm 108.4718 | dt 0.178
type train | step 15150 | loss 11.8091 1.1990 3.6576 5.1603 | lr 2.3e-04 | norm 104.7046 | dt 0.165
type train | step 15160 | loss 11.6030 1.1953 3.6550 5.1253 | lr 2.3e-04 | norm 104.8179 | dt 0.207
type train | step 15170 | loss 11.4197 1.2328 3.7058 5.2713 | lr 2.3e-04 | norm 93.7896 | dt 0.202
type train | step 15180 | loss 11.0651 1.1865 3.6664 5.0948 | lr 2.3e-04 | norm 101.4813 | dt 0.191
type train | step 15190 | loss 10.7005 1.2130 3.6841 5.1851 | lr 2.3e-04 | norm 100.0724 | dt 0.179
type train | step 15200 | loss 10.7994 1.2418 3.7120 5.1993 | lr 2.3e-04 | norm 95.3300 | dt 0.216
type train | step 15210 | loss 10.5352 1.2218 3.6593 5.1887 | lr 2.3e-04 | norm 93.7142 | dt 0.215
type train | step 15220 | loss 10.4244 1.1886 3.7006 5.0685 | lr 2.3e-04 | norm 94.4985 | dt 0.170
type train | step 15230 | loss 10.2537 1.2197 3.6329 5.2186 | lr 2.3e-04 | norm 100.3768 | dt 0.203
type train | step 15240 | loss 9.9529 1.1873 3.6553 5.0879 | lr 2.3e-04 | norm 91.3899 | dt 0.194
type train | step 15250 | loss 10.0247 1.2005 3.6793 5.1251 | lr 2.3e-04 | norm 92.9907 | dt 0.200
type train | step 15260 | loss 9.8463 1.2066 3.6683 5.1324 | lr 2.3e-04 | norm 97.8406 | dt 0.171
type train | step 15270 | loss 9.6688 1.2094 3.6498 5.1562 | lr 2.3e-04 | norm 101.0775 | dt 0.202
type train | step 15280 | loss 9.4823 1.1790 3.6562 5.0317 | lr 2.3e-04 | norm 96.4581 | dt 0.200
type train | step 15290 | loss 9.6760 1.2500 3.7174 5.2971 | lr 2.3e-04 | norm 113.3982 | dt 0.197
type train | step 15300 | loss 9.2570 1.1904 3.6441 5.0040 | lr 2.3e-04 | norm 98.3157 | dt 0.202
type train | step 15310 | loss 9.1492 1.1694 3.6543 5.0736 | lr 2.3e-04 | norm 101.3760 | dt 0.202
type train | step 15320 | loss 9.1142 1.1866 3.6631 5.0704 | lr 2.2e-04 | norm 94.7358 | dt 0.192
type train | step 15330 | loss 9.3138 1.2311 3.6833 5.2630 | lr 2.2e-04 | norm 113.5641 | dt 0.180
type train | step 15340 | loss 9.1060 1.1918 3.6804 5.0498 | lr 2.2e-04 | norm 95.7150 | dt 0.203
type train | step 15350 | loss 8.9249 1.1719 3.6431 5.0554 | lr 2.2e-04 | norm 96.4059 | dt 0.169
type train | step 15360 | loss 8.8091 1.1904 3.6534 5.0071 | lr 2.2e-04 | norm 91.8372 | dt 0.201
type train | step 15370 | loss 8.9556 1.2211 3.7101 5.1268 | lr 2.2e-04 | norm 91.1786 | dt 0.197
type train | step 15380 | loss 8.6235 1.1912 3.7100 5.1563 | lr 2.2e-04 | norm 99.3808 | dt 0.205
type train | step 15390 | loss 8.9660 1.2802 3.7653 5.2736 | lr 2.2e-04 | norm 97.7203 | dt 0.169
type train | step 15400 | loss 8.6109 1.1983 3.6960 5.0865 | lr 2.2e-04 | norm 98.5274 | dt 0.182
type train | step 15410 | loss 8.2891 1.2098 3.6632 5.1192 | lr 2.2e-04 | norm 85.4190 | dt 0.201
type train | step 15420 | loss 8.0007 1.1791 3.6459 4.9644 | lr 2.2e-04 | norm 93.7504 | dt 0.201
type train | step 15430 | loss 8.2513 1.2247 3.6619 5.1206 | lr 2.2e-04 | norm 97.9192 | dt 0.215
type train | step 15440 | loss 7.9362 1.1908 3.6590 5.1805 | lr 2.2e-04 | norm 102.8799 | dt 0.190
type train | step 15450 | loss 8.0669 1.2443 3.7079 5.2244 | lr 2.2e-04 | norm 98.1742 | dt 0.203
type train | step 15460 | loss 7.9327 1.2192 3.6793 5.1463 | lr 2.2e-04 | norm 91.9535 | dt 0.178
type train | step 15470 | loss 7.6272 1.2044 3.6845 5.0673 | lr 2.2e-04 | norm 96.8454 | dt 0.170
type train | step 15480 | loss 7.4694 1.1925 3.6304 4.9872 | lr 2.2e-04 | norm 99.3865 | dt 0.207
type train | step 15490 | loss 7.6582 1.2383 3.7028 5.1337 | lr 2.2e-04 | norm 98.5102 | dt 0.189
type train | step 15500 | loss 7.4397 1.1966 3.6833 5.0370 | lr 2.2e-04 | norm 95.7449 | dt 0.206
type train | step 15510 | loss 7.3368 1.2061 3.6352 5.0701 | lr 2.2e-04 | norm 101.7824 | dt 0.198
type train | step 15520 | loss 7.3981 1.2083 3.6818 5.0712 | lr 2.2e-04 | norm 98.3552 | dt 0.165
type train | step 15530 | loss 7.3135 1.2231 3.7145 5.1859 | lr 2.1e-04 | norm 94.3463 | dt 0.199
type train | step 15540 | loss 7.1193 1.1897 3.6190 5.0078 | lr 2.1e-04 | norm 95.5274 | dt 0.211
type train | step 15550 | loss 7.0209 1.2219 3.6464 5.0799 | lr 2.1e-04 | norm 95.2843 | dt 0.194
type train | step 15560 | loss 7.0691 1.2036 3.7020 5.1094 | lr 2.1e-04 | norm 91.7431 | dt 0.191
type train | step 15570 | loss 6.9824 1.2071 3.6794 5.1070 | lr 2.1e-04 | norm 93.0930 | dt 0.200
type train | step 15580 | loss 6.8438 1.2072 3.6858 5.0961 | lr 2.1e-04 | norm 92.8396 | dt 0.184
type train | step 15590 | loss 6.7875 1.1861 3.6351 5.0171 | lr 2.1e-04 | norm 100.8844 | dt 0.170
type train | step 15600 | loss 6.5951 1.2010 3.6608 5.0467 | lr 2.1e-04 | norm 95.1880 | dt 0.222
type train | step 15610 | loss 6.6380 1.1895 3.6663 4.9724 | lr 2.1e-04 | norm 110.1828 | dt 0.191
type train | step 15620 | loss 6.4318 1.2058 3.7283 5.1109 | lr 2.1e-04 | norm 99.7643 | dt 0.171
type train | step 15630 | loss 6.5837 1.2371 3.7518 5.2357 | lr 2.1e-04 | norm 107.5832 | dt 0.174
type train | step 15640 | loss 6.3730 1.2086 3.6536 5.0644 | lr 2.1e-04 | norm 102.4466 | dt 0.200
type train | step 15650 | loss 6.3898 1.1799 3.6314 4.9884 | lr 2.1e-04 | norm 85.1835 | dt 0.223
type train | step 15660 | loss 6.3168 1.2158 3.6934 5.0690 | lr 2.1e-04 | norm 102.5577 | dt 0.187
type train | step 15670 | loss 6.3832 1.2037 3.7244 5.0135 | lr 2.1e-04 | norm 86.6515 | dt 0.165
type train | step 15680 | loss 6.1780 1.2082 3.7114 5.0984 | lr 2.1e-04 | norm 98.0302 | dt 0.198
type train | step 15690 | loss 6.0895 1.1868 3.6846 5.0518 | lr 2.1e-04 | norm 95.3203 | dt 0.178
type train | step 15700 | loss 6.1701 1.2151 3.6824 5.0972 | lr 2.1e-04 | norm 99.3980 | dt 0.182
type train | step 15710 | loss 6.0503 1.1706 3.6521 4.9396 | lr 2.1e-04 | norm 97.1257 | dt 0.216
type train | step 15720 | loss 6.1105 1.2243 3.6899 5.0982 | lr 2.1e-04 | norm 92.0537 | dt 0.215
type train | step 15730 | loss 6.0175 1.2044 3.6795 5.0591 | lr 2.0e-04 | norm 101.8555 | dt 0.187
type train | step 15740 | loss 5.9118 1.1964 3.6776 5.0625 | lr 2.0e-04 | norm 104.6000 | dt 0.206
type train | step 15750 | loss 5.9257 1.2086 3.7004 5.0813 | lr 2.0e-04 | norm 100.8433 | dt 0.169
type train | step 15760 | loss 5.8286 1.1995 3.6236 5.0639 | lr 2.0e-04 | norm 91.7142 | dt 0.196
type train | step 15770 | loss 5.7980 1.1900 3.6562 5.0498 | lr 2.0e-04 | norm 100.6437 | dt 0.168
type train | step 15780 | loss 5.8551 1.2224 3.6808 5.1791 | lr 2.0e-04 | norm 97.1592 | dt 0.217
type train | step 15790 | loss 5.6221 1.1876 3.6577 5.0222 | lr 2.0e-04 | norm 105.6708 | dt 0.214
type train | step 15800 | loss 5.5186 1.2015 3.6514 5.0850 | lr 2.0e-04 | norm 94.8749 | dt 0.181
type train | step 15810 | loss 5.6767 1.2339 3.7010 5.1111 | lr 2.0e-04 | norm 95.2068 | dt 0.184
type train | step 15820 | loss 5.5270 1.2165 3.6241 5.0962 | lr 2.0e-04 | norm 89.4625 | dt 0.190
type train | step 15830 | loss 5.5197 1.1780 3.6488 4.9974 | lr 2.0e-04 | norm 92.1032 | dt 0.181
type train | step 15840 | loss 5.4022 1.2122 3.6739 5.1306 | lr 2.0e-04 | norm 99.3760 | dt 0.188
type train | step 15850 | loss 5.2597 1.1721 3.6439 4.9945 | lr 2.0e-04 | norm 96.2928 | dt 0.190
type train | step 15860 | loss 5.3106 1.1974 3.6848 5.0434 | lr 2.0e-04 | norm 100.7550 | dt 0.194
type train | step 15870 | loss 5.3293 1.1936 3.6618 5.0468 | lr 2.0e-04 | norm 101.3951 | dt 0.216
type train | step 15880 | loss 5.4153 1.1974 3.6155 5.0524 | lr 2.0e-04 | norm 94.9174 | dt 0.209
type train | step 15890 | loss 5.4134 1.1612 3.6893 4.9435 | lr 2.0e-04 | norm 98.4754 | dt 0.206
type train | step 15900 | loss 5.5055 1.2376 3.7190 5.1950 | lr 2.0e-04 | norm 110.9949 | dt 0.204
type train | step 15910 | loss 5.2113 1.1796 3.6129 4.9145 | lr 2.0e-04 | norm 104.7379 | dt 0.184
type train | step 15920 | loss 5.2177 1.1593 3.6281 4.9841 | lr 2.0e-04 | norm 101.8541 | dt 0.201
type train | step 15930 | loss 5.1844 1.1700 3.6559 4.9826 | lr 2.0e-04 | norm 92.9656 | dt 0.200
type train | step 15940 | loss 5.2566 1.2190 3.6802 5.1651 | lr 2.0e-04 | norm 101.0846 | dt 0.203
type train | step 15950 | loss 5.1409 1.1823 3.6168 4.9668 | lr 1.9e-04 | norm 98.5907 | dt 0.176
type train | step 15960 | loss 5.0007 1.1600 3.6183 4.9531 | lr 1.9e-04 | norm 91.9033 | dt 0.209
type train | step 15970 | loss 4.9826 1.1789 3.6498 4.9166 | lr 1.9e-04 | norm 95.3161 | dt 0.187
type train | step 15980 | loss 5.0690 1.2060 3.6690 5.0323 | lr 1.9e-04 | norm 93.0226 | dt 0.192
type train | step 15990 | loss 4.9717 1.1714 3.7044 5.0483 | lr 1.9e-04 | norm 105.4796 | dt 0.174
type train | step 16000 | loss 5.1402 1.2615 3.6804 5.1672 | lr 1.9e-04 | norm 88.8238 | dt 0.178
type train | step 16010 | loss 4.9463 1.1865 3.6875 5.0028 | lr 1.9e-04 | norm 100.9139 | dt 0.191
type train | step 16020 | loss 4.7779 1.1852 3.6630 5.0388 | lr 1.9e-04 | norm 104.7346 | dt 0.207
type train | step 16030 | loss 4.6294 1.1639 3.6066 4.8691 | lr 1.9e-04 | norm 90.1302 | dt 0.211
type train | step 16040 | loss 4.7813 1.2070 3.6720 5.0428 | lr 1.9e-04 | norm 93.3091 | dt 0.188
type train | step 16050 | loss 4.6778 1.1703 3.7142 5.0881 | lr 1.9e-04 | norm 103.0722 | dt 0.188
type train | step 16060 | loss 4.8021 1.2211 3.6664 5.1287 | lr 1.9e-04 | norm 95.4744 | dt 0.185
type train | step 16070 | loss 4.7493 1.1955 3.6554 5.0561 | lr 1.9e-04 | norm 85.2987 | dt 0.170
type train | step 16080 | loss 4.5552 1.1808 3.6736 5.0005 | lr 1.9e-04 | norm 110.1156 | dt 0.168
type train | step 16090 | loss 4.4664 1.1668 3.6284 4.9251 | lr 1.9e-04 | norm 94.5639 | dt 0.178
type train | step 16100 | loss 4.6245 1.2115 3.7164 5.0290 | lr 1.9e-04 | norm 95.0377 | dt 0.171
type train | step 16110 | loss 4.5538 1.1775 3.6866 4.9714 | lr 1.9e-04 | norm 96.4318 | dt 0.180
type train | step 16120 | loss 4.4956 1.1868 3.6387 4.9856 | lr 1.9e-04 | norm 88.8936 | dt 0.193
type train | step 16130 | loss 4.5644 1.1860 3.6501 5.0011 | lr 1.9e-04 | norm 93.0537 | dt 0.187
type train | step 16140 | loss 4.4964 1.2064 3.6942 5.1167 | lr 1.9e-04 | norm 98.1652 | dt 0.172
type train | step 16150 | loss 4.4618 1.1659 3.6470 4.9305 | lr 1.9e-04 | norm 105.2563 | dt 0.219
type train | step 16160 | loss 4.4417 1.2028 3.6809 5.0153 | lr 1.9e-04 | norm 102.7894 | dt 0.194
type train | step 16170 | loss 4.5341 1.1901 3.6859 5.0567 | lr 1.9e-04 | norm 100.0810 | dt 0.204
type train | step 16180 | loss 4.4256 1.1795 3.6970 5.0430 | lr 1.8e-04 | norm 95.7027 | dt 0.199
type train | step 16190 | loss 4.4230 1.1916 3.6646 5.0442 | lr 1.8e-04 | norm 93.7777 | dt 0.176
type train | step 16200 | loss 4.3674 1.1743 3.6600 4.9344 | lr 1.8e-04 | norm 95.6244 | dt 0.178
type train | step 16210 | loss 4.3589 1.1779 3.6349 4.9759 | lr 1.8e-04 | norm 100.9842 | dt 0.206
type train | step 16220 | loss 4.3925 1.1671 3.6555 4.8959 | lr 1.8e-04 | norm 103.2252 | dt 0.169
type train | step 16230 | loss 4.3131 1.1812 3.6881 5.0565 | lr 1.8e-04 | norm 95.8890 | dt 0.171
type train | step 16240 | loss 4.3936 1.2148 3.7446 5.1597 | lr 1.8e-04 | norm 101.8503 | dt 0.175
type train | step 16250 | loss 4.2523 1.1789 3.6462 5.0004 | lr 1.8e-04 | norm 106.4321 | dt 0.205
type train | step 16260 | loss 4.2706 1.1572 3.6591 4.9368 | lr 1.8e-04 | norm 86.3117 | dt 0.175
type train | step 16270 | loss 4.2570 1.1800 3.6927 5.0393 | lr 1.8e-04 | norm 104.5885 | dt 0.169
type train | step 16280 | loss 4.3204 1.1715 3.6997 4.9705 | lr 1.8e-04 | norm 89.8185 | dt 0.175
type train | step 16290 | loss 4.1938 1.1875 3.6994 5.0503 | lr 1.8e-04 | norm 90.3633 | dt 0.210
type train | step 16300 | loss 4.1090 1.1661 3.6287 4.9837 | lr 1.8e-04 | norm 84.4757 | dt 0.214
type train | step 16310 | loss 4.1741 1.1932 3.6487 5.0222 | lr 1.8e-04 | norm 88.7478 | dt 0.178
type train | step 16320 | loss 4.0977 1.1452 3.6541 4.8724 | lr 1.8e-04 | norm 93.0279 | dt 0.166
type train | step 16330 | loss 4.2101 1.1926 3.6841 5.0374 | lr 1.8e-04 | norm 103.3797 | dt 0.218
type train | step 16340 | loss 4.1090 1.1733 3.6665 4.9899 | lr 1.8e-04 | norm 100.2380 | dt 0.178
type train | step 16350 | loss 4.0242 1.1653 3.6696 4.9884 | lr 1.8e-04 | norm 103.4602 | dt 0.190
type train | step 16360 | loss 4.0244 1.1856 3.7075 5.0275 | lr 1.8e-04 | norm 102.7471 | dt 0.175
type train | step 16370 | loss 4.0202 1.1792 3.6612 4.9947 | lr 1.8e-04 | norm 89.4720 | dt 0.183
type train | step 16380 | loss 4.0033 1.1726 3.6392 4.9809 | lr 1.8e-04 | norm 95.6540 | dt 0.207
type train | step 16390 | loss 4.0262 1.1938 3.6850 5.1334 | lr 1.8e-04 | norm 97.7561 | dt 0.213
type train | step 16400 | loss 3.9187 1.1559 3.6622 4.9625 | lr 1.8e-04 | norm 97.2848 | dt 0.205
type train | step 16410 | loss 3.8692 1.1763 3.6840 5.0341 | lr 1.8e-04 | norm 91.0653 | dt 0.204
type train | step 16420 | loss 3.9722 1.2036 3.6977 5.0643 | lr 1.7e-04 | norm 97.8261 | dt 0.179
type train | step 16430 | loss 3.8594 1.1848 3.6315 5.0327 | lr 1.7e-04 | norm 86.8218 | dt 0.166
type train | step 16440 | loss 3.8459 1.1510 3.6895 4.9395 | lr 1.7e-04 | norm 104.2267 | dt 0.208
type train | step 16450 | loss 3.8366 1.1892 3.6430 5.0802 | lr 1.7e-04 | norm 97.4795 | dt 0.221
type train | step 16460 | loss 3.7182 1.1512 3.6595 4.9595 | lr 1.7e-04 | norm 98.4846 | dt 0.191
type train | step 16470 | loss 3.7766 1.1692 3.6103 4.9914 | lr 1.7e-04 | norm 96.2728 | dt 0.187
type train | step 16480 | loss 3.7655 1.1775 3.6062 4.9974 | lr 1.7e-04 | norm 93.3943 | dt 0.176
type train | step 16490 | loss 3.7400 1.1807 3.6573 5.0104 | lr 1.7e-04 | norm 96.2990 | dt 0.171
type train | step 16500 | loss 3.7282 1.1453 3.5718 4.9026 | lr 1.7e-04 | norm 95.6524 | dt 0.214
type train | step 16510 | loss 3.8559 1.2116 3.6589 5.1487 | lr 1.7e-04 | norm 112.5627 | dt 0.179
type train | step 16520 | loss 3.6640 1.1522 3.6587 4.8789 | lr 1.7e-04 | norm 102.7149 | dt 0.205
type train | step 16530 | loss 3.6357 1.1382 3.5864 4.9449 | lr 1.7e-04 | norm 99.9437 | dt 0.205
type train | step 16540 | loss 3.6129 1.1458 3.6343 4.9464 | lr 1.7e-04 | norm 88.6333 | dt 0.203
type train | step 16550 | loss 3.6911 1.1955 3.6267 5.1188 | lr 1.7e-04 | norm 92.8592 | dt 0.173
type train | step 16560 | loss 3.6324 1.1559 3.6785 4.9216 | lr 1.7e-04 | norm 90.8138 | dt 0.171
type train | step 16570 | loss 3.5457 1.1351 3.6066 4.9242 | lr 1.7e-04 | norm 95.4088 | dt 0.199
type train | step 16580 | loss 3.5843 1.1521 3.6074 4.8902 | lr 1.7e-04 | norm 101.5029 | dt 0.208
type train | step 16590 | loss 3.6195 1.1883 3.6308 4.9994 | lr 1.7e-04 | norm 83.3309 | dt 0.207
type train | step 16600 | loss 3.5961 1.1594 3.6920 5.0266 | lr 1.7e-04 | norm 95.9618 | dt 0.197
type train | step 16610 | loss 3.7004 1.2423 3.6814 5.1319 | lr 1.7e-04 | norm 96.5990 | dt 0.197
type train | step 16620 | loss 3.6037 1.1685 3.6499 4.9608 | lr 1.7e-04 | norm 92.2045 | dt 0.189
type train | step 16630 | loss 3.5094 1.1747 3.6643 5.0101 | lr 1.7e-04 | norm 87.3209 | dt 0.171
type train | step 16640 | loss 3.4415 1.1503 3.6379 4.8582 | lr 1.7e-04 | norm 96.0531 | dt 0.199
type train | step 16650 | loss 3.5572 1.1947 3.6669 5.0066 | lr 1.7e-04 | norm 99.1644 | dt 0.200
type train | step 16660 | loss 3.5037 1.1598 3.7420 5.0458 | lr 1.7e-04 | norm 94.1889 | dt 0.199
type train | step 16670 | loss 3.5934 1.2084 3.6800 5.0899 | lr 1.6e-04 | norm 94.3545 | dt 0.199
type train | step 16680 | loss 3.5845 1.1820 3.6930 5.0231 | lr 1.6e-04 | norm 88.9940 | dt 0.199
type train | step 16690 | loss 3.4956 1.1651 3.6535 4.9583 | lr 1.6e-04 | norm 101.6855 | dt 0.201
type train | step 16700 | loss 3.4258 1.1531 3.6242 4.8923 | lr 1.6e-04 | norm 94.8821 | dt 0.174
type train | step 16710 | loss 3.5320 1.1984 3.7583 5.0234 | lr 1.6e-04 | norm 86.7306 | dt 0.193
type train | step 16720 | loss 3.5345 1.1629 3.6650 4.9445 | lr 1.6e-04 | norm 84.2243 | dt 0.178
type train | step 16730 | loss 3.5124 1.1616 3.6681 4.9773 | lr 1.6e-04 | norm 91.1600 | dt 0.199
type train | step 16740 | loss 3.5669 1.1694 3.6570 4.9716 | lr 1.6e-04 | norm 87.9591 | dt 0.185
type train | step 16750 | loss 3.5769 1.1957 3.7162 5.0915 | lr 1.6e-04 | norm 82.7267 | dt 0.169
type train | step 16760 | loss 3.5229 1.1503 3.6153 4.9171 | lr 1.6e-04 | norm 97.9279 | dt 0.177
type train | step 16770 | loss 3.5336 1.1850 3.6893 5.0331 | lr 1.6e-04 | norm 95.2001 | dt 0.212
type train | step 16780 | loss 3.5975 1.1740 3.6859 5.0651 | lr 1.6e-04 | norm 103.6383 | dt 0.182
type train | step 16790 | loss 3.5318 1.1663 3.6801 5.0411 | lr 1.6e-04 | norm 81.1041 | dt 0.187
type train | step 16800 | loss 3.5658 1.1720 3.6504 5.0211 | lr 1.6e-04 | norm 76.7172 | dt 0.173
type train | step 16810 | loss 3.5495 1.1497 3.5974 4.9603 | lr 1.6e-04 | norm 95.6654 | dt 0.184
type train | step 16820 | loss 3.5740 1.1584 3.6002 4.9700 | lr 1.6e-04 | norm 104.5311 | dt 0.196
type train | step 16830 | loss 3.6119 1.1465 3.6392 4.8950 | lr 1.6e-04 | norm 106.4867 | dt 0.170
type train | step 16840 | loss 3.5329 1.1621 3.7041 5.0186 | lr 1.6e-04 | norm 82.4263 | dt 0.204
type train | step 16850 | loss 3.6616 1.1980 3.7052 5.1701 | lr 1.6e-04 | norm 96.5565 | dt 0.199
type train | step 16860 | loss 3.5814 1.1704 3.6506 5.0209 | lr 1.6e-04 | norm 101.6133 | dt 0.181
type train | step 16870 | loss 3.6361 1.1453 3.6476 4.9280 | lr 1.6e-04 | norm 84.2701 | dt 0.169
type train | step 16880 | loss 3.5759 1.1708 3.6313 5.0030 | lr 1.6e-04 | norm 95.0318 | dt 0.185
type train | step 16890 | loss 3.6874 1.1594 3.6719 4.9704 | lr 1.6e-04 | norm 93.1720 | dt 0.189
type train | step 16900 | loss 3.6192 1.1724 3.7140 5.0475 | lr 1.6e-04 | norm 104.0950 | dt 0.182
type train | step 16910 | loss 3.5762 1.1585 3.6345 4.9723 | lr 1.6e-04 | norm 93.6292 | dt 0.171
type train | step 16920 | loss 3.6343 1.1767 3.7268 5.0212 | lr 1.6e-04 | norm 79.0640 | dt 0.198
type train | step 16930 | loss 3.6310 1.1311 3.6203 4.8867 | lr 1.6e-04 | norm 88.0534 | dt 0.196
type train | step 16940 | loss 3.6704 1.1841 3.7264 5.0636 | lr 1.5e-04 | norm 104.8997 | dt 0.180
type train | step 16950 | loss 3.6701 1.1705 3.7051 5.0010 | lr 1.5e-04 | norm 91.5822 | dt 0.170
type train | step 16960 | loss 3.6177 1.1523 3.6857 5.0034 | lr 1.5e-04 | norm 96.9005 | dt 0.187
type train | step 16970 | loss 3.6040 1.1694 3.7022 5.0488 | lr 1.5e-04 | norm 88.1434 | dt 0.189
type train | step 16980 | loss 3.5889 1.1645 3.6300 5.0303 | lr 1.5e-04 | norm 86.6392 | dt 0.197
type train | step 16990 | loss 3.6189 1.1611 3.6217 5.0170 | lr 1.5e-04 | norm 100.3701 | dt 0.192
type train | step 17000 | loss 3.6193 1.1866 3.6739 5.1478 | lr 1.5e-04 | norm 96.0119 | dt 0.201
type train | step 17010 | loss 3.5009 1.1446 3.6422 4.9825 | lr 1.5e-04 | norm 88.5751 | dt 0.204
type train | step 17020 | loss 3.4890 1.1630 3.6936 5.0500 | lr 1.5e-04 | norm 84.2074 | dt 0.176
type train | step 17030 | loss 3.6446 1.1901 3.6915 5.0708 | lr 1.5e-04 | norm 94.1584 | dt 0.207
type train | step 17040 | loss 3.5554 1.1750 3.6586 5.0521 | lr 1.5e-04 | norm 87.5026 | dt 0.202
type train | step 17050 | loss 3.5749 1.1375 3.6539 4.9777 | lr 1.5e-04 | norm 103.9618 | dt 0.170
type train | step 17060 | loss 3.5326 1.1733 3.6585 5.0838 | lr 1.5e-04 | norm 97.0091 | dt 0.197
type train | step 17070 | loss 3.4744 1.1394 3.6191 4.9659 | lr 1.5e-04 | norm 97.9272 | dt 0.182
type train | step 17080 | loss 3.4856 1.1546 3.6538 5.0196 | lr 1.5e-04 | norm 93.0661 | dt 0.191
type train | step 17090 | loss 3.4755 1.1579 3.6415 5.0246 | lr 1.5e-04 | norm 90.4186 | dt 0.195
type train | step 17100 | loss 3.4733 1.1629 3.6568 5.0283 | lr 1.5e-04 | norm 96.5164 | dt 0.194
type train | step 17110 | loss 3.4821 1.1318 3.6374 4.9232 | lr 1.5e-04 | norm 90.1009 | dt 0.184
type train | step 17120 | loss 3.5609 1.2038 3.6869 5.1731 | lr 1.5e-04 | norm 105.7601 | dt 0.198
type train | step 17130 | loss 3.3955 1.1408 3.6117 4.8925 | lr 1.5e-04 | norm 90.1359 | dt 0.213
type train | step 17140 | loss 3.3900 1.1279 3.6527 4.9733 | lr 1.5e-04 | norm 88.9679 | dt 0.201
type train | step 17150 | loss 3.4018 1.1369 3.6717 4.9775 | lr 1.5e-04 | norm 85.3162 | dt 0.167
type train | step 17160 | loss 3.4935 1.1797 3.6864 5.1584 | lr 1.5e-04 | norm 92.2539 | dt 0.173
type train | step 17170 | loss 3.4238 1.1398 3.6940 4.9839 | lr 1.5e-04 | norm 92.7618 | dt 0.195
type train | step 17180 | loss 3.3576 1.1251 3.6442 4.9607 | lr 1.5e-04 | norm 85.8115 | dt 0.186
type train | step 17190 | loss 3.3918 1.1388 3.6566 4.9404 | lr 1.5e-04 | norm 92.1503 | dt 0.180
type train | step 17200 | loss 3.4156 1.1676 3.6519 5.0419 | lr 1.5e-04 | norm 84.2018 | dt 0.200
type train | step 17210 | loss 3.4013 1.1386 3.6587 5.0843 | lr 1.5e-04 | norm 96.6177 | dt 0.169
type train | step 17220 | loss 3.5291 1.2255 3.6798 5.1861 | lr 1.5e-04 | norm 87.0872 | dt 0.184
type train | step 17230 | loss 3.4024 1.1516 3.6986 5.0072 | lr 1.5e-04 | norm 79.1081 | dt 0.209
type train | step 17240 | loss 3.3323 1.1556 3.6318 5.0435 | lr 1.4e-04 | norm 87.5044 | dt 0.187
type train | step 17250 | loss 3.2733 1.1314 3.5778 4.9207 | lr 1.4e-04 | norm 97.4079 | dt 0.189
type train | step 17260 | loss 3.3765 1.1712 3.6120 5.0596 | lr 1.4e-04 | norm 96.2469 | dt 0.171
type train | step 17270 | loss 3.3102 1.1405 3.6854 5.1113 | lr 1.4e-04 | norm 86.1863 | dt 0.217
type train | step 17280 | loss 3.4121 1.1855 3.6788 5.1716 | lr 1.4e-04 | norm 97.0940 | dt 0.200
type train | step 17290 | loss 3.3783 1.1607 3.6203 5.0886 | lr 1.4e-04 | norm 84.6010 | dt 0.205
type train | step 17300 | loss 3.2721 1.1470 3.6435 5.0327 | lr 1.4e-04 | norm 92.1860 | dt 0.170
type train | step 17310 | loss 3.2723 1.1366 3.6200 4.9247 | lr 1.4e-04 | norm 101.1897 | dt 0.174
type train | step 17320 | loss 3.3331 1.1768 3.6929 5.0877 | lr 1.4e-04 | norm 81.5917 | dt 0.228
type train | step 17330 | loss 3.3124 1.1461 3.6828 4.9898 | lr 1.4e-04 | norm 90.0709 | dt 0.175
type train | step 17340 | loss 3.2923 1.1520 3.6548 5.0395 | lr 1.4e-04 | norm 86.4961 | dt 0.178
type train | step 17350 | loss 3.3377 1.1541 3.6356 5.0724 | lr 1.4e-04 | norm 86.4954 | dt 0.199
type train | step 17360 | loss 3.3454 1.1775 3.7101 5.1701 | lr 1.4e-04 | norm 98.6017 | dt 0.207
type train | step 17370 | loss 3.2736 1.1348 3.6464 4.9870 | lr 1.4e-04 | norm 91.8866 | dt 0.178
type train | step 17380 | loss 3.2837 1.1666 3.6807 5.0804 | lr 1.4e-04 | norm 94.7271 | dt 0.209
type train | step 17390 | loss 3.3476 1.1614 3.6975 5.0758 | lr 1.4e-04 | norm 98.4951 | dt 0.205
type train | step 17400 | loss 3.3036 1.1540 3.7015 5.0891 | lr 1.4e-04 | norm 86.7089 | dt 0.204
type train | step 17410 | loss 3.2723 1.1580 3.6797 5.1105 | lr 1.4e-04 | norm 88.6016 | dt 0.187
type train | step 17420 | loss 3.2434 1.1330 3.6568 5.0063 | lr 1.4e-04 | norm 87.9518 | dt 0.204
type train | step 17430 | loss 3.2481 1.1469 3.6480 5.0254 | lr 1.4e-04 | norm 95.5387 | dt 0.199
type train | step 17440 | loss 3.2725 1.1306 3.6643 4.9707 | lr 1.4e-04 | norm 96.7035 | dt 0.201
type train | step 17450 | loss 3.2170 1.1415 3.7160 5.0847 | lr 1.4e-04 | norm 86.6785 | dt 0.200
type train | step 17460 | loss 3.3222 1.1773 3.7513 5.2218 | lr 1.4e-04 | norm 93.3402 | dt 0.207
type train | step 17470 | loss 3.2430 1.1454 3.6088 5.0564 | lr 1.4e-04 | norm 95.2152 | dt 0.165
type train | step 17480 | loss 3.2464 1.1197 3.6604 4.9977 | lr 1.4e-04 | norm 85.2662 | dt 0.202
type train | step 17490 | loss 3.1986 1.1494 3.6788 5.0649 | lr 1.4e-04 | norm 103.5936 | dt 0.202
type train | step 17500 | loss 3.2895 1.1405 3.7028 5.0417 | lr 1.4e-04 | norm 85.7503 | dt 0.184
type train | step 17510 | loss 3.2333 1.1515 3.6551 5.1056 | lr 1.4e-04 | norm 98.6369 | dt 0.182
type train | step 17520 | loss 3.1977 1.1331 3.6382 5.0614 | lr 1.4e-04 | norm 89.3022 | dt 0.222
type train | step 17530 | loss 3.2460 1.1542 3.6115 5.1130 | lr 1.4e-04 | norm 92.3744 | dt 0.165
type train | step 17540 | loss 3.1889 1.1123 3.5930 4.9901 | lr 1.4e-04 | norm 85.4482 | dt 0.178
type train | step 17550 | loss 3.2357 1.1623 3.6275 5.1314 | lr 1.4e-04 | norm 99.2295 | dt 0.177
type train | step 17560 | loss 3.2442 1.1428 3.6803 5.0822 | lr 1.4e-04 | norm 87.9121 | dt 0.203
type train | step 17570 | loss 3.1839 1.1323 3.6032 5.0543 | lr 1.3e-04 | norm 92.1003 | dt 0.193
type train | step 17580 | loss 3.1667 1.1530 3.7261 5.0855 | lr 1.3e-04 | norm 83.8990 | dt 0.170
type train | step 17590 | loss 3.2021 1.1412 3.5682 5.0801 | lr 1.3e-04 | norm 87.0005 | dt 0.200
type train | step 17600 | loss 3.2362 1.1304 3.6646 5.0816 | lr 1.3e-04 | norm 91.0292 | dt 0.192
type train | step 17610 | loss 3.2566 1.1623 3.6544 5.2191 | lr 1.3e-04 | norm 92.2448 | dt 0.192
type train | step 17620 | loss 3.1844 1.1229 3.6171 5.0480 | lr 1.3e-04 | norm 86.3037 | dt 0.201
type train | step 17630 | loss 3.1550 1.1402 3.6753 5.1010 | lr 1.3e-04 | norm 90.2330 | dt 0.165
type train | step 17640 | loss 3.2835 1.1649 3.6420 5.1264 | lr 1.3e-04 | norm 89.9943 | dt 0.176
type train | step 17650 | loss 3.1953 1.1511 3.6773 5.1269 | lr 1.3e-04 | norm 83.1317 | dt 0.188
type train | step 17660 | loss 3.1815 1.1145 3.5932 5.0511 | lr 1.3e-04 | norm 102.2075 | dt 0.184
type train | step 17670 | loss 3.1792 1.1541 3.6413 5.1593 | lr 1.3e-04 | norm 97.8745 | dt 0.198
type train | step 17680 | loss 3.0980 1.1144 3.5863 5.0604 | lr 1.3e-04 | norm 93.0402 | dt 0.170
type train | step 17690 | loss 3.1692 1.1344 3.6763 5.0856 | lr 1.3e-04 | norm 91.9291 | dt 0.190
type train | step 17700 | loss 3.1440 1.1378 3.6524 5.1040 | lr 1.3e-04 | norm 80.2988 | dt 0.177
type train | step 17710 | loss 3.1550 1.1393 3.5928 5.1298 | lr 1.3e-04 | norm 94.3522 | dt 0.209
type train | step 17720 | loss 3.1435 1.1067 3.5827 4.9887 | lr 1.3e-04 | norm 89.2903 | dt 0.187
type train | step 17730 | loss 3.2781 1.1782 3.6421 5.2544 | lr 1.3e-04 | norm 112.2474 | dt 0.212
type train | step 17740 | loss 3.1037 1.1187 3.6273 4.9675 | lr 1.3e-04 | norm 94.6226 | dt 0.189
type train | step 17750 | loss 3.1119 1.1025 3.6067 5.0276 | lr 1.3e-04 | norm 96.3585 | dt 0.204
type train | step 17760 | loss 3.1128 1.1141 3.6271 5.0533 | lr 1.3e-04 | norm 83.0563 | dt 0.202
type train | step 17770 | loss 3.1980 1.1549 3.6063 5.2490 | lr 1.3e-04 | norm 89.3892 | dt 0.176
type train | step 17780 | loss 3.1747 1.1144 3.6875 5.0344 | lr 1.3e-04 | norm 88.0505 | dt 0.188
type train | step 17790 | loss 3.0874 1.0985 3.5751 5.0344 | lr 1.3e-04 | norm 90.7738 | dt 0.176
type train | step 17800 | loss 3.1474 1.1162 3.5941 4.9856 | lr 1.3e-04 | norm 89.8363 | dt 0.172
type train | step 17810 | loss 3.2075 1.1500 3.6465 5.1096 | lr 1.3e-04 | norm 90.7814 | dt 0.212
type train | step 17820 | loss 3.1464 1.1202 3.6670 5.1388 | lr 1.3e-04 | norm 93.1794 | dt 0.182
type train | step 17830 | loss 3.2804 1.2031 3.6938 5.2454 | lr 1.3e-04 | norm 81.7855 | dt 0.175
type train | step 17840 | loss 3.2082 1.1271 3.6321 5.0742 | lr 1.3e-04 | norm 93.5163 | dt 0.171
type train | step 17850 | loss 3.1035 1.1281 3.6665 5.1099 | lr 1.3e-04 | norm 85.0873 | dt 0.217
type train | step 17860 | loss 3.0602 1.1060 3.6267 4.9703 | lr 1.3e-04 | norm 91.8476 | dt 0.184
type train | step 17870 | loss 3.1793 1.1587 3.6670 5.1143 | lr 1.3e-04 | norm 88.2697 | dt 0.170
type train | step 17880 | loss 3.1159 1.1287 3.6988 5.1764 | lr 1.3e-04 | norm 84.2867 | dt 0.234
type train | step 17890 | loss 3.1799 1.1695 3.6633 5.2283 | lr 1.3e-04 | norm 82.1276 | dt 0.212
type train | step 17900 | loss 3.1817 1.1432 3.6634 5.1641 | lr 1.3e-04 | norm 88.2797 | dt 0.216
type train | step 17910 | loss 3.0875 1.1305 3.6457 5.0941 | lr 1.3e-04 | norm 83.2157 | dt 0.196
type train | step 17920 | loss 3.0401 1.1194 3.6221 4.9873 | lr 1.3e-04 | norm 101.1678 | dt 0.221
type train | step 17930 | loss 3.1126 1.1610 3.6945 5.1380 | lr 1.3e-04 | norm 88.7129 | dt 0.215
type train | step 17940 | loss 3.1008 1.1334 3.6712 5.0563 | lr 1.3e-04 | norm 74.9706 | dt 0.178
type train | step 17950 | loss 3.0725 1.1381 3.6565 5.0822 | lr 1.2e-04 | norm 83.3235 | dt 0.180
type train | step 17960 | loss 3.1428 1.1381 3.6240 5.1201 | lr 1.2e-04 | norm 85.9974 | dt 0.171
type train | step 17970 | loss 3.1156 1.1519 3.7103 5.2277 | lr 1.2e-04 | norm 101.9701 | dt 0.203
type train | step 17980 | loss 3.0767 1.1107 3.6413 5.0538 | lr 1.2e-04 | norm 94.8987 | dt 0.212
type train | step 17990 | loss 3.0505 1.1460 3.6827 5.1346 | lr 1.2e-04 | norm 92.8649 | dt 0.197
type train | step 18000 | loss 3.0855 1.1416 3.7012 5.1333 | lr 1.2e-04 | norm 95.9922 | dt 0.197
type train | step 18010 | loss 3.0774 1.1366 3.6597 5.1422 | lr 1.2e-04 | norm 85.7468 | dt 0.200
type train | step 18020 | loss 3.1305 1.1404 3.6758 5.1559 | lr 1.2e-04 | norm 89.1457 | dt 0.169
type train | step 18030 | loss 3.0564 1.1170 3.6509 5.0429 | lr 1.2e-04 | norm 88.8484 | dt 0.222
type train | step 18040 | loss 3.0382 1.1276 3.6157 5.0724 | lr 1.2e-04 | norm 99.4691 | dt 0.224
type train | step 18050 | loss 3.0477 1.1160 3.6509 5.0227 | lr 1.2e-04 | norm 90.5218 | dt 0.188
type train | step 18060 | loss 2.9995 1.1229 3.7098 5.1519 | lr 1.2e-04 | norm 85.6059 | dt 0.176
type train | step 18070 | loss 3.0952 1.1666 3.7228 5.2713 | lr 1.2e-04 | norm 88.5310 | dt 0.169
type train | step 18080 | loss 3.0044 1.1343 3.6333 5.1222 | lr 1.2e-04 | norm 88.5709 | dt 0.167
type train | step 18090 | loss 3.0310 1.1082 3.6546 5.0762 | lr 1.2e-04 | norm 87.4140 | dt 0.214
type train | step 18100 | loss 3.0458 1.1386 3.6818 5.1219 | lr 1.2e-04 | norm 95.2003 | dt 0.178
type train | step 18110 | loss 3.0577 1.1285 3.7074 5.0888 | lr 1.2e-04 | norm 83.2976 | dt 0.221
type train | step 18120 | loss 2.9970 1.1385 3.6883 5.1650 | lr 1.2e-04 | norm 88.1128 | dt 0.193
type train | step 18130 | loss 2.9762 1.1217 3.6453 5.1424 | lr 1.2e-04 | norm 83.6415 | dt 0.220
type train | step 18140 | loss 2.9805 1.1454 3.6759 5.1657 | lr 1.2e-04 | norm 85.0633 | dt 0.183
type train | step 18150 | loss 2.9827 1.1008 3.6417 5.0213 | lr 1.2e-04 | norm 77.6971 | dt 0.183
type train | step 18160 | loss 3.0469 1.1501 3.6612 5.1970 | lr 1.2e-04 | norm 97.2240 | dt 0.223
type train | step 18170 | loss 2.9843 1.1337 3.6829 5.1508 | lr 1.2e-04 | norm 80.2344 | dt 0.179
type train | step 18180 | loss 2.9573 1.1241 3.6746 5.1422 | lr 1.2e-04 | norm 94.8463 | dt 0.209
type train | step 18190 | loss 2.9671 1.1455 3.6960 5.1663 | lr 1.2e-04 | norm 91.7339 | dt 0.216
type train | step 18200 | loss 2.9617 1.1342 3.6444 5.1677 | lr 1.2e-04 | norm 87.6453 | dt 0.195
type train | step 18210 | loss 2.9558 1.1239 3.6534 5.1294 | lr 1.2e-04 | norm 84.3882 | dt 0.193
type train | step 18220 | loss 3.0042 1.1543 3.6874 5.2715 | lr 1.2e-04 | norm 89.1449 | dt 0.182
type train | step 18230 | loss 2.9395 1.1147 3.6583 5.1000 | lr 1.2e-04 | norm 80.7597 | dt 0.189
type train | step 18240 | loss 2.9734 1.1289 3.7106 5.1761 | lr 1.2e-04 | norm 83.9522 | dt 0.186
type train | step 18250 | loss 3.0449 1.1587 3.7006 5.2080 | lr 1.2e-04 | norm 87.2395 | dt 0.202
type train | step 18260 | loss 3.0260 1.1463 3.6410 5.2187 | lr 1.2e-04 | norm 85.7529 | dt 0.218
type train | step 18270 | loss 3.1097 1.1123 3.6808 5.0922 | lr 1.2e-04 | norm 104.4781 | dt 0.209
type train | step 18280 | loss 3.0095 1.1444 3.6476 5.2346 | lr 1.2e-04 | norm 95.7651 | dt 0.206
type train | step 18290 | loss 2.9779 1.1091 3.6379 5.1155 | lr 1.2e-04 | norm 88.8540 | dt 0.203
type train | step 18300 | loss 3.0299 1.1273 3.6646 5.1502 | lr 1.2e-04 | norm 91.9522 | dt 0.167
type train | step 18310 | loss 2.9647 1.1368 3.6508 5.1577 | lr 1.2e-04 | norm 79.3581 | dt 0.177
type train | step 18320 | loss 2.9756 1.1359 3.6400 5.2024 | lr 1.2e-04 | norm 101.0039 | dt 0.218
type train | step 18330 | loss 2.9786 1.0982 3.6434 5.0687 | lr 1.2e-04 | norm 83.3684 | dt 0.210
type train | step 18340 | loss 3.0559 1.1724 3.6973 5.3324 | lr 1.2e-04 | norm 110.6857 | dt 0.173
type train | step 18350 | loss 2.9208 1.1098 3.6351 5.0518 | lr 1.2e-04 | norm 89.4114 | dt 0.189
type train | step 18360 | loss 2.9113 1.0964 3.6493 5.1246 | lr 1.2e-04 | norm 98.7941 | dt 0.175
type train | step 18370 | loss 2.9003 1.1073 3.6288 5.1226 | lr 1.2e-04 | norm 83.6697 | dt 0.188
type train | step 18380 | loss 2.9660 1.1505 3.6773 5.3100 | lr 1.2e-04 | norm 90.8554 | dt 0.187
type train | step 18390 | loss 2.9507 1.1105 3.6802 5.1131 | lr 1.2e-04 | norm 81.6737 | dt 0.188
type train | step 18400 | loss 2.9036 1.0938 3.6252 5.1267 | lr 1.2e-04 | norm 86.9295 | dt 0.168
type train | step 18410 | loss 2.9305 1.1099 3.6271 5.0624 | lr 1.2e-04 | norm 91.6065 | dt 0.223
type train | step 18420 | loss 2.9914 1.1413 3.6826 5.1936 | lr 1.1e-04 | norm 90.8324 | dt 0.205
type train | step 18430 | loss 2.9108 1.1160 3.7551 5.2009 | lr 1.1e-04 | norm 80.3139 | dt 0.205
type train | step 18440 | loss 3.0090 1.2038 3.7224 5.3274 | lr 1.1e-04 | norm 81.2531 | dt 0.201
type train | step 18450 | loss 2.9742 1.1253 3.6868 5.1529 | lr 1.1e-04 | norm 87.3519 | dt 0.190
type train | step 18460 | loss 2.8783 1.1254 3.6716 5.1954 | lr 1.1e-04 | norm 83.6127 | dt 0.204
type train | step 18470 | loss 2.8285 1.1036 3.6052 5.0384 | lr 1.1e-04 | norm 90.2043 | dt 0.188
type train | step 18480 | loss 2.9112 1.1485 3.6686 5.1974 | lr 1.1e-04 | norm 85.6243 | dt 0.171
type train | step 18490 | loss 2.9205 1.1129 3.6511 5.2545 | lr 1.1e-04 | norm 82.8891 | dt 0.178
type train | step 18500 | loss 2.9403 1.1593 3.6693 5.3100 | lr 1.1e-04 | norm 92.3039 | dt 0.199
type train | step 18510 | loss 2.9391 1.1381 3.6473 5.2423 | lr 1.1e-04 | norm 79.0617 | dt 0.184
type train | step 18520 | loss 2.9111 1.1201 3.6429 5.1756 | lr 1.1e-04 | norm 93.0091 | dt 0.222
type train | step 18530 | loss 2.8464 1.1081 3.5910 5.0764 | lr 1.1e-04 | norm 97.5460 | dt 0.201
type train | step 18540 | loss 2.9422 1.1483 3.6931 5.2375 | lr 1.1e-04 | norm 87.1194 | dt 0.204
type train | step 18550 | loss 2.9475 1.1172 3.6333 5.1262 | lr 1.1e-04 | norm 83.9073 | dt 0.216
type train | step 18560 | loss 2.9220 1.1301 3.6383 5.1767 | lr 1.1e-04 | norm 89.9712 | dt 0.205
type train | step 18570 | loss 2.9470 1.1299 3.6467 5.1943 | lr 1.1e-04 | norm 87.4270 | dt 0.205
type train | step 18580 | loss 2.9443 1.1452 3.7146 5.3010 | lr 1.1e-04 | norm 97.6076 | dt 0.205
type train | step 18590 | loss 2.8934 1.1048 3.6199 5.1223 | lr 1.1e-04 | norm 86.9657 | dt 0.196
type train | step 18600 | loss 2.8948 1.1414 3.6910 5.2250 | lr 1.1e-04 | norm 94.7437 | dt 0.171
type train | step 18610 | loss 2.9912 1.1309 3.6900 5.2241 | lr 1.1e-04 | norm 88.1659 | dt 0.194
type train | step 18620 | loss 2.9070 1.1279 3.6705 5.2318 | lr 1.1e-04 | norm 79.0884 | dt 0.177
type train | step 18630 | loss 2.8768 1.1302 3.6664 5.2404 | lr 1.1e-04 | norm 74.1359 | dt 0.187
type train | step 18640 | loss 2.8147 1.1119 3.6545 5.1380 | lr 1.1e-04 | norm 83.7474 | dt 0.182
type train | step 18650 | loss 2.8186 1.1212 3.6289 5.1540 | lr 1.1e-04 | norm 96.9369 | dt 0.227
type train | step 18660 | loss 2.8552 1.1052 3.6502 5.1035 | lr 1.1e-04 | norm 92.2429 | dt 0.212
type train | step 18670 | loss 2.8018 1.1178 3.6886 5.2295 | lr 1.1e-04 | norm 81.7115 | dt 0.210
type train | step 18680 | loss 2.8921 1.1550 3.7233 5.3676 | lr 1.1e-04 | norm 79.7927 | dt 0.207
type train | step 18690 | loss 2.7584 1.1204 3.6065 5.2119 | lr 1.1e-04 | norm 90.6857 | dt 0.211
type train | step 18700 | loss 2.8135 1.0963 3.6385 5.1575 | lr 1.1e-04 | norm 82.4369 | dt 0.211
type train | step 18710 | loss 2.7993 1.1256 3.6346 5.2123 | lr 1.1e-04 | norm 93.7053 | dt 0.179
type train | step 18720 | loss 2.8710 1.1173 3.7011 5.1830 | lr 1.1e-04 | norm 89.7892 | dt 0.168
type train | step 18730 | loss 2.7920 1.1277 3.6952 5.2695 | lr 1.1e-04 | norm 91.8567 | dt 0.179
type train | step 18740 | loss 2.7670 1.1110 3.6469 5.2235 | lr 1.1e-04 | norm 93.1675 | dt 0.188
type train | step 18750 | loss 2.8273 1.1311 3.6722 5.2447 | lr 1.1e-04 | norm 91.6391 | dt 0.198
type train | step 18760 | loss 2.7679 1.0888 3.6101 5.1084 | lr 1.1e-04 | norm 82.5330 | dt 0.177
type train | step 18770 | loss 2.7937 1.1399 3.7000 5.2726 | lr 1.1e-04 | norm 94.4121 | dt 0.178
type train | step 18780 | loss 2.7682 1.1201 3.6874 5.2335 | lr 1.1e-04 | norm 83.0560 | dt 0.170
type train | step 18790 | loss 2.7145 1.1044 3.6517 5.2289 | lr 1.1e-04 | norm 89.1515 | dt 0.184
type train | step 18800 | loss 2.7250 1.1303 3.6660 5.2505 | lr 1.1e-04 | norm 87.3188 | dt 0.223
type train | step 18810 | loss 2.7152 1.1178 3.6001 5.2714 | lr 1.1e-04 | norm 82.2579 | dt 0.212
type train | step 18820 | loss 2.7013 1.1067 3.6478 5.2364 | lr 1.1e-04 | norm 85.1088 | dt 0.184
type train | step 18830 | loss 2.7670 1.1385 3.6873 5.3575 | lr 1.1e-04 | norm 86.6518 | dt 0.208
type train | step 18840 | loss 2.6751 1.1010 3.6459 5.1960 | lr 1.1e-04 | norm 84.0847 | dt 0.211
type train | step 18850 | loss 2.6842 1.1129 3.6877 5.2764 | lr 1.1e-04 | norm 85.9949 | dt 0.193
type train | step 18860 | loss 2.7784 1.1403 3.6798 5.2673 | lr 1.1e-04 | norm 84.7568 | dt 0.182
type train | step 18870 | loss 2.7215 1.1272 3.6435 5.3041 | lr 1.1e-04 | norm 87.8925 | dt 0.170
type train | step 18880 | loss 2.7153 1.0899 3.6753 5.1942 | lr 1.1e-04 | norm 88.2394 | dt 0.208
type train | step 18890 | loss 2.7446 1.1268 3.6431 5.3186 | lr 1.1e-04 | norm 105.8483 | dt 0.204
type train | step 18900 | loss 2.6408 1.0940 3.6196 5.2022 | lr 1.1e-04 | norm 99.2523 | dt 0.169
type train | step 18910 | loss 2.7392 1.1117 3.6197 5.2383 | lr 1.1e-04 | norm 87.6622 | dt 0.203
type train | step 18920 | loss 2.7073 1.1192 3.6225 5.2271 | lr 1.1e-04 | norm 85.7235 | dt 0.183
type train | step 18930 | loss 2.6383 1.1184 3.6491 5.2885 | lr 1.1e-04 | norm 95.2860 | dt 0.198
type train | step 18940 | loss 2.6404 1.0860 3.6569 5.1630 | lr 1.1e-04 | norm 81.2359 | dt 0.208
type train | step 18950 | loss 2.7636 1.1537 3.7136 5.4116 | lr 1.1e-04 | norm 109.7346 | dt 0.204
type train | step 18960 | loss 2.6249 1.0944 3.5731 5.1184 | lr 1.1e-04 | norm 86.2249 | dt 0.215
type train | step 18970 | loss 2.6313 1.0811 3.5762 5.2032 | lr 1.1e-04 | norm 101.7589 | dt 0.202
type train | step 18980 | loss 2.6314 1.0913 3.5784 5.2117 | lr 1.1e-04 | norm 79.0942 | dt 0.203
type train | step 18990 | loss 2.7029 1.1319 3.5924 5.3967 | lr 1.1e-04 | norm 89.9233 | dt 0.171
type train | step 19000 | loss 2.6619 1.0975 3.6006 5.1938 | lr 1.1e-04 | norm 84.0570 | dt 0.205
type train | step 19010 | loss 2.5927 1.0800 3.5770 5.1883 | lr 1.1e-04 | norm 87.5857 | dt 0.215
type train | step 19020 | loss 2.6294 1.0949 3.6170 5.1391 | lr 1.1e-04 | norm 85.4214 | dt 0.180
type train | step 19030 | loss 2.6718 1.1293 3.6302 5.2658 | lr 1.1e-04 | norm 85.6705 | dt 0.187
type train | step 19040 | loss 2.6524 1.0941 3.6493 5.2900 | lr 1.1e-04 | norm 83.6488 | dt 0.195
type train | step 19050 | loss 2.7501 1.1775 3.6863 5.4092 | lr 1.1e-04 | norm 81.4896 | dt 0.206
type train | step 19060 | loss 2.6700 1.1057 3.6436 5.2415 | lr 1.1e-04 | norm 86.8690 | dt 0.175
type train | step 19070 | loss 2.5769 1.1101 3.6810 5.2801 | lr 1.1e-04 | norm 84.4713 | dt 0.205
type train | step 19080 | loss 2.5430 1.0842 3.5598 5.1055 | lr 1.1e-04 | norm 96.1071 | dt 0.215
type train | step 19090 | loss 2.6483 1.1324 3.6213 5.2660 | lr 1.0e-04 | norm 85.9041 | dt 0.181
type train | step 19100 | loss 2.6043 1.1007 3.6822 5.3271 | lr 1.0e-04 | norm 85.1754 | dt 0.193
type train | step 19110 | loss 2.6760 1.1469 3.6525 5.3835 | lr 1.0e-04 | norm 84.6460 | dt 0.215
type train | step 19120 | loss 2.6698 1.1213 3.6336 5.3275 | lr 1.0e-04 | norm 84.6295 | dt 0.178
type train | step 19130 | loss 2.5835 1.1069 3.6480 5.2404 | lr 1.0e-04 | norm 92.7169 | dt 0.174
type train | step 19140 | loss 2.5732 1.0958 3.5677 5.1380 | lr 1.0e-04 | norm 94.0627 | dt 0.214
type train | step 19150 | loss 2.6298 1.1295 3.6226 5.2951 | lr 1.0e-04 | norm 79.9977 | dt 0.187
type train | step 19160 | loss 2.6234 1.1016 3.6454 5.2090 | lr 1.0e-04 | norm 81.1553 | dt 0.165
type train | step 19170 | loss 2.5652 1.1142 3.6045 5.2479 | lr 1.0e-04 | norm 81.5423 | dt 0.167
type train | step 19180 | loss 2.6078 1.1162 3.6031 5.2620 | lr 1.0e-04 | norm 86.2354 | dt 0.170
type train | step 19190 | loss 2.6355 1.1288 3.6534 5.3601 | lr 1.0e-04 | norm 96.8556 | dt 0.167
type train | step 19200 | loss 2.6122 1.0935 3.6270 5.1765 | lr 1.0e-04 | norm 99.4112 | dt 0.185
type train | step 19210 | loss 2.5652 1.1253 3.6707 5.2720 | lr 1.0e-04 | norm 85.7113 | dt 0.209
type train | step 19220 | loss 2.6365 1.1143 3.6760 5.3097 | lr 1.0e-04 | norm 81.3861 | dt 0.201
type train | step 19230 | loss 2.6093 1.1147 3.6399 5.3059 | lr 1.0e-04 | norm 77.3167 | dt 0.170
type train | step 19240 | loss 2.6165 1.1209 3.6408 5.3115 | lr 1.0e-04 | norm 81.8458 | dt 0.169
type train | step 19250 | loss 2.5598 1.1010 3.6209 5.2098 | lr 1.0e-04 | norm 79.5254 | dt 0.185
type train | step 19260 | loss 2.5883 1.1116 3.6044 5.2294 | lr 1.0e-04 | norm 94.3488 | dt 0.217
type train | step 19270 | loss 2.5946 1.0951 3.6142 5.1846 | lr 1.0e-04 | norm 105.2283 | dt 0.203
type train | step 19280 | loss 2.5559 1.1060 3.6760 5.3184 | lr 1.0e-04 | norm 83.6907 | dt 0.199
type train | step 19290 | loss 2.6274 1.1433 3.7272 5.4274 | lr 1.0e-04 | norm 78.0499 | dt 0.197
type train | step 19300 | loss 2.5665 1.1127 3.5896 5.2669 | lr 1.0e-04 | norm 87.6720 | dt 0.181
type train | step 19310 | loss 2.5767 1.0910 3.6508 5.2087 | lr 1.0e-04 | norm 89.4220 | dt 0.201
type train | step 19320 | loss 2.5705 1.1182 3.6709 5.2729 | lr 1.0e-04 | norm 93.3747 | dt 0.200
type train | step 19330 | loss 2.6081 1.1104 3.6594 5.2470 | lr 1.0e-04 | norm 87.7189 | dt 0.223
type train | step 19340 | loss 2.5534 1.1156 3.6629 5.3339 | lr 1.0e-04 | norm 95.2428 | dt 0.215
type train | step 19350 | loss 2.5526 1.0979 3.6123 5.2794 | lr 1.0e-04 | norm 88.8464 | dt 0.176
type train | step 19360 | loss 2.5537 1.1205 3.6584 5.3135 | lr 1.0e-04 | norm 95.6991 | dt 0.193
type train | step 19370 | loss 2.5707 1.0814 3.6154 5.1680 | lr 1.0e-04 | norm 88.0856 | dt 0.182
type train | step 19380 | loss 2.5822 1.1277 3.6826 5.3433 | lr 1.0e-04 | norm 94.0351 | dt 0.178
type train | step 19390 | loss 2.5682 1.1061 3.7208 5.2908 | lr 1.0e-04 | norm 80.9480 | dt 0.175
type train | step 19400 | loss 2.5405 1.0992 3.6575 5.2910 | lr 1.0e-04 | norm 88.6850 | dt 0.217
type train | step 19410 | loss 2.5486 1.1231 3.7175 5.3070 | lr 1.0e-04 | norm 88.1386 | dt 0.194
type train | step 19420 | loss 2.5262 1.1116 3.6226 5.3308 | lr 1.0e-04 | norm 83.1493 | dt 0.204
type train | step 19430 | loss 2.5044 1.1018 3.6381 5.2983 | lr 1.0e-04 | norm 85.5218 | dt 0.200
type train | step 19440 | loss 2.5745 1.1343 3.6886 5.4257 | lr 1.0e-04 | norm 91.2795 | dt 0.215
type train | step 19450 | loss 2.5147 1.0965 3.6400 5.2485 | lr 1.0e-04 | norm 88.7886 | dt 0.166
type train | step 19460 | loss 2.4850 1.1086 3.7088 5.3381 | lr 1.0e-04 | norm 89.8467 | dt 0.166
type train | step 19470 | loss 2.5951 1.1372 3.6991 5.3332 | lr 1.0e-04 | norm 83.0975 | dt 0.187
type train | step 19480 | loss 2.5374 1.1260 3.6642 5.3436 | lr 1.0e-04 | norm 81.8073 | dt 0.189
type train | step 19490 | loss 2.5425 1.0886 3.6603 5.2469 | lr 1.0e-04 | norm 94.0773 | dt 0.205
type train | step 19500 | loss 2.5654 1.1258 3.6389 5.3850 | lr 1.0e-04 | norm 101.0583 | dt 0.196
type train | step 19510 | loss 2.4339 1.0900 3.6380 5.2513 | lr 1.0e-04 | norm 88.1697 | dt 0.211
type train | step 19520 | loss 2.5187 1.1069 3.6694 5.2835 | lr 1.0e-04 | norm 94.0885 | dt 0.207
type train | step 19530 | loss 2.5003 1.1117 3.6543 5.2897 | lr 1.0e-04 | norm 81.4795 | dt 0.200
type train | step 19540 | loss 2.5119 1.1163 3.6134 5.3376 | lr 1.0e-04 | norm 104.7064 | dt 0.205
type train | step 19550 | loss 2.4752 1.0860 3.6615 5.2112 | lr 1.0e-04 | norm 84.9460 | dt 0.179
type train | step 19560 | loss 2.5638 1.1550 3.7057 5.4536 | lr 1.0e-04 | norm 104.6978 | dt 0.211
type train | step 19570 | loss 2.4415 1.0898 3.6162 5.1714 | lr 1.0e-04 | norm 85.5223 | dt 0.213
type train | step 19580 | loss 2.4484 1.0823 3.6546 5.2451 | lr 1.0e-04 | norm 93.4003 | dt 0.196
type train | step 19590 | loss 2.4726 1.0931 3.6604 5.2572 | lr 1.0e-04 | norm 79.0561 | dt 0.190
type train | step 19600 | loss 2.5348 1.1297 3.6628 5.4377 | lr 1.0e-04 | norm 84.8586 | dt 0.219
type train | step 19610 | loss 2.5128 1.0922 3.6317 5.2292 | lr 1.0e-04 | norm 92.3844 | dt 0.172
type train | step 19620 | loss 2.4246 1.0812 3.5849 5.2288 | lr 1.0e-04 | norm 88.3810 | dt 0.209
type train | step 19630 | loss 2.4735 1.0933 3.5935 5.1780 | lr 1.0e-04 | norm 85.6705 | dt 0.198
type train | step 19640 | loss 2.5441 1.1209 3.6962 5.3054 | lr 1.0e-04 | norm 92.1093 | dt 0.198
type train | step 19650 | loss 2.4984 1.0937 3.7024 5.3233 | lr 1.0e-04 | norm 81.4004 | dt 0.190
type train | step 19660 | loss 2.5967 1.1790 3.7146 5.4389 | lr 1.0e-04 | norm 79.1693 | dt 0.203
type train | step 19670 | loss 2.5106 1.1056 3.6921 5.2720 | lr 1.0e-04 | norm 86.6193 | dt 0.203
type train | step 19680 | loss 2.4587 1.1079 3.6631 5.3132 | lr 1.0e-04 | norm 84.6442 | dt 0.203
type train | step 19690 | loss 2.4272 1.0856 3.6250 5.1508 | lr 1.0e-04 | norm 97.4773 | dt 0.187
type train | step 19700 | loss 2.5067 1.1333 3.6378 5.3021 | lr 1.0e-04 | norm 94.7396 | dt 0.198
type train | step 19710 | loss 2.4533 1.0998 3.6780 5.3519 | lr 1.0e-04 | norm 86.8534 | dt 0.203
type train | step 19720 | loss 2.5413 1.1472 3.6689 5.4186 | lr 1.0e-04 | norm 88.7219 | dt 0.165
type train | step 19730 | loss 2.5061 1.1195 3.6569 5.3579 | lr 1.0e-04 | norm 87.6147 | dt 0.178
type train | step 19740 | loss 2.3986 1.1078 3.6444 5.2748 | lr 1.0e-04 | norm 92.1725 | dt 0.199
type train | step 19750 | loss 2.3755 1.0994 3.5725 5.1853 | lr 1.0e-04 | norm 101.0817 | dt 0.188
type train | step 19760 | loss 2.4816 1.1325 3.7199 5.3213 | lr 1.0e-04 | norm 92.3429 | dt 0.229
type train | step 19770 | loss 2.4746 1.1026 3.6382 5.2481 | lr 1.0e-04 | norm 92.5240 | dt 0.225
type train | step 19780 | loss 2.4254 1.1163 3.6581 5.2821 | lr 1.0e-04 | norm 83.2964 | dt 0.180
type train | step 19790 | loss 2.4504 1.1210 3.6185 5.2922 | lr 1.0e-04 | norm 85.0814 | dt 0.187
type train | step 19800 | loss 2.4949 1.1333 3.7027 5.4057 | lr 1.0e-04 | norm 101.7509 | dt 0.220
type train | step 19810 | loss 2.4447 1.0970 3.6396 5.2265 | lr 1.0e-04 | norm 92.1567 | dt 0.215
type train | step 19820 | loss 2.4355 1.1314 3.6958 5.2992 | lr 1.0e-04 | norm 90.7680 | dt 0.175
type train | step 19830 | loss 2.4897 1.1215 3.6502 5.3254 | lr 1.0e-04 | norm 85.5885 | dt 0.189
type train | step 19840 | loss 2.5123 1.1168 3.6873 5.3347 | lr 1.0e-04 | norm 89.8062 | dt 0.212
type train | step 19850 | loss 2.4513 1.1193 3.6251 5.3285 | lr 1.0e-04 | norm 80.6860 | dt 0.187
type train | step 19860 | loss 2.4453 1.1025 3.6689 5.2258 | lr 1.0e-04 | norm 90.8822 | dt 0.217
type train | step 19870 | loss 2.4464 1.1130 3.6297 5.2550 | lr 1.0e-04 | norm 95.1290 | dt 0.186
type train | step 19880 | loss 2.4242 1.0972 3.6095 5.1942 | lr 1.0e-04 | norm 103.4292 | dt 0.173
type train | step 19890 | loss 2.3975 1.1063 3.7241 5.3306 | lr 1.0e-04 | norm 76.0645 | dt 0.219
type train | step 19900 | loss 2.4852 1.1453 3.7416 5.4555 | lr 1.0e-04 | norm 84.4331 | dt 0.168
type train | step 19910 | loss 2.4309 1.1108 3.6102 5.2989 | lr 1.0e-04 | norm 96.3897 | dt 0.222
type train | step 19920 | loss 2.4270 1.0900 3.6627 5.2315 | lr 1.0e-04 | norm 86.9409 | dt 0.171
type train | step 19930 | loss 2.4413 1.1181 3.6372 5.3037 | lr 1.0e-04 | norm 106.0337 | dt 0.179
type train | step 19940 | loss 2.4401 1.1106 3.6782 5.2609 | lr 1.0e-04 | norm 88.8922 | dt 0.180
type train | step 19950 | loss 2.4367 1.1158 3.6684 5.3355 | lr 1.0e-04 | norm 97.7198 | dt 0.165
type train | step 19960 | loss 2.4038 1.0993 3.6329 5.2929 | lr 1.0e-04 | norm 93.7220 | dt 0.174
type train | step 19970 | loss 2.4298 1.1185 3.6219 5.3221 | lr 1.0e-04 | norm 95.8212 | dt 0.172
type train | step 19980 | loss 2.4110 1.0827 3.6240 5.1817 | lr 1.0e-04 | norm 82.1255 | dt 0.208
type train | step 19990 | loss 2.4348 1.1304 3.6629 5.3571 | lr 1.0e-04 | norm 93.2413 | dt 0.197
type train | step 20000 | loss 2.4512 1.1114 3.6908 5.2922 | lr 1.0e-04 | norm 90.1768 | dt 0.189