davidquarel's picture
Upload folder using huggingface_hub
82f7fef verified
type train | step 10 | loss 63.5102 270.9496 263.7529 260.9329 1223.9734 | lr 1.3e-05 | norm 894.0597 | dt 0.011
type train | step 20 | loss 62.5998 266.8769 260.0431 256.5609 1220.3625 | lr 2.7e-05 | norm 901.6014 | dt 0.011
type train | step 30 | loss 61.1028 258.9111 252.1662 254.3046 1196.8158 | lr 4.0e-05 | norm 872.7061 | dt 0.011
type train | step 40 | loss 60.4122 248.3276 247.0797 249.1780 1115.9929 | lr 5.3e-05 | norm 798.0441 | dt 0.011
type train | step 50 | loss 57.0094 236.9544 234.9551 239.4774 1097.6688 | lr 6.7e-05 | norm 791.4699 | dt 0.011
type train | step 60 | loss 54.3804 223.5708 222.6631 230.6768 1022.8490 | lr 8.0e-05 | norm 735.0872 | dt 0.011
type train | step 70 | loss 51.5982 206.2325 210.4289 219.8820 965.2846 | lr 9.3e-05 | norm 691.8126 | dt 0.011
type train | step 80 | loss 49.6193 191.1598 200.1663 209.1512 874.0125 | lr 1.1e-04 | norm 609.4534 | dt 0.011
type train | step 90 | loss 45.0382 172.9246 182.9581 198.4814 826.4994 | lr 1.2e-04 | norm 570.7281 | dt 0.011
type train | step 100 | loss 41.4162 154.5285 168.3405 182.0743 737.3175 | lr 1.3e-04 | norm 515.4979 | dt 0.011
type train | step 110 | loss 38.2235 137.8807 154.0395 171.8582 671.5147 | lr 1.5e-04 | norm 449.1419 | dt 0.011
type train | step 120 | loss 35.6131 121.8813 141.8355 162.4164 596.1166 | lr 1.6e-04 | norm 383.8590 | dt 0.012
type train | step 130 | loss 31.3293 105.0302 126.8149 148.2206 534.1478 | lr 1.7e-04 | norm 340.6333 | dt 0.011
type train | step 140 | loss 29.4783 90.8469 116.0273 136.5338 466.8782 | lr 1.9e-04 | norm 291.2955 | dt 0.013
type train | step 150 | loss 24.3491 75.8176 98.9363 121.4876 419.9500 | lr 2.0e-04 | norm 258.4358 | dt 0.012
type train | step 160 | loss 20.4789 63.0229 85.9170 107.0702 357.2363 | lr 2.1e-04 | norm 224.3962 | dt 0.011
type train | step 170 | loss 16.6341 51.6078 73.0551 93.0587 307.3560 | lr 2.3e-04 | norm 195.0874 | dt 0.011
type train | step 180 | loss 15.2343 42.8999 64.0034 85.4126 274.7539 | lr 2.4e-04 | norm 172.9758 | dt 0.011
type train | step 190 | loss 11.8356 33.6185 53.4331 73.9640 241.1518 | lr 2.5e-04 | norm 155.1092 | dt 0.011
type train | step 200 | loss 10.4651 27.2174 45.2221 63.4058 208.2998 | lr 2.7e-04 | norm 141.4246 | dt 0.011
type train | step 210 | loss 8.2093 20.9627 36.3946 53.2664 179.2861 | lr 2.8e-04 | norm 128.6444 | dt 0.011
type train | step 220 | loss 5.7176 15.5778 28.8478 43.0472 147.3833 | lr 2.9e-04 | norm 113.5961 | dt 0.011
type train | step 230 | loss 4.1188 11.7261 22.1309 33.4951 120.6738 | lr 3.1e-04 | norm 100.2483 | dt 0.011
type train | step 240 | loss 3.5682 9.1812 18.4734 29.8134 111.9994 | lr 3.2e-04 | norm 98.5674 | dt 0.011
type train | step 250 | loss 2.6923 6.7976 13.9061 22.7181 94.7616 | lr 3.3e-04 | norm 87.0879 | dt 0.011
type train | step 260 | loss 1.9395 5.0808 10.6290 17.4365 75.8101 | lr 3.5e-04 | norm 75.4095 | dt 0.011
type train | step 270 | loss 1.5615 3.9311 8.1141 13.6783 64.9152 | lr 3.6e-04 | norm 67.7015 | dt 0.011
type train | step 280 | loss 1.2488 3.0580 6.3051 11.5300 57.8505 | lr 3.7e-04 | norm 62.1389 | dt 0.011
type train | step 290 | loss 0.7909 2.3172 4.5484 7.6174 42.5184 | lr 3.9e-04 | norm 47.1231 | dt 0.011
type train | step 300 | loss 0.6328 1.9626 3.7429 6.3017 36.6746 | lr 4.0e-04 | norm 41.4358 | dt 0.011
type train | step 310 | loss 0.5356 1.6650 3.2184 5.0016 32.7811 | lr 4.1e-04 | norm 36.3969 | dt 0.011
type train | step 320 | loss 0.4051 1.4218 2.8359 4.2375 26.4489 | lr 4.3e-04 | norm 29.0919 | dt 0.011
type train | step 330 | loss 0.3162 1.2242 2.6099 3.5708 19.8622 | lr 4.4e-04 | norm 20.9098 | dt 0.011
type train | step 340 | loss 0.2461 1.0490 2.3771 3.2187 16.2830 | lr 4.5e-04 | norm 14.6075 | dt 0.011
type train | step 350 | loss 0.2168 0.9375 2.2308 2.9537 14.0179 | lr 4.7e-04 | norm 11.2377 | dt 0.011
type train | step 360 | loss 0.1844 0.8283 2.0870 2.7650 12.4154 | lr 4.8e-04 | norm 8.7360 | dt 0.011
type train | step 370 | loss 0.1660 0.7371 1.9682 2.6458 10.8933 | lr 4.9e-04 | norm 6.2626 | dt 0.011
type train | step 380 | loss 0.1566 0.6906 1.8789 2.5806 10.6700 | lr 5.1e-04 | norm 6.7331 | dt 0.011
type train | step 390 | loss 0.1347 0.6069 1.7446 2.3199 8.5097 | lr 5.2e-04 | norm 2.8190 | dt 0.011
type train | step 400 | loss 0.1239 0.5574 1.6319 2.2333 7.9957 | lr 5.3e-04 | norm 1.9440 | dt 0.011
type train | step 410 | loss 0.1160 0.5152 1.5502 2.1265 7.4229 | lr 5.5e-04 | norm 1.4316 | dt 0.011
type train | step 420 | loss 0.1065 0.4790 1.4679 2.0372 7.1597 | lr 5.6e-04 | norm 1.2874 | dt 0.011
type train | step 430 | loss 0.0982 0.4404 1.4006 1.9461 6.6951 | lr 5.7e-04 | norm 0.9816 | dt 0.011
type train | step 440 | loss 0.0920 0.4127 1.3279 1.8571 6.3711 | lr 5.9e-04 | norm 0.8386 | dt 0.011
type train | step 450 | loss 0.0869 0.3905 1.2803 1.7997 6.0905 | lr 6.0e-04 | norm 0.7456 | dt 0.011
type train | step 460 | loss 0.0815 0.3671 1.2090 1.7239 5.8775 | lr 6.1e-04 | norm 0.7072 | dt 0.011
type train | step 470 | loss 0.0793 0.3535 1.1852 1.6951 5.6428 | lr 6.3e-04 | norm 0.6828 | dt 0.011
type train | step 480 | loss 0.0738 0.3343 1.1387 1.6386 5.5389 | lr 6.4e-04 | norm 0.6004 | dt 0.011
type train | step 490 | loss 0.0700 0.3184 1.0952 1.5986 5.3440 | lr 6.5e-04 | norm 0.5551 | dt 0.011
type train | step 500 | loss 0.0674 0.3056 1.0696 1.5502 5.2019 | lr 6.7e-04 | norm 0.5303 | dt 0.011
type train | step 510 | loss 0.0637 0.2911 1.0301 1.5129 5.0435 | lr 6.8e-04 | norm 0.4941 | dt 0.011
type train | step 520 | loss 0.0608 0.2789 0.9945 1.4799 4.9031 | lr 6.9e-04 | norm 0.4975 | dt 0.011
type train | step 530 | loss 0.0602 0.2731 0.9786 1.4528 4.8141 | lr 7.1e-04 | norm 0.4101 | dt 0.011
type train | step 540 | loss 0.0556 0.2556 0.9347 1.4022 4.6820 | lr 7.2e-04 | norm 0.4239 | dt 0.011
type train | step 550 | loss 0.0536 0.2465 0.9178 1.3910 4.5974 | lr 7.3e-04 | norm 0.4469 | dt 0.011
type train | step 560 | loss 0.0533 0.2425 0.8979 1.3572 4.5023 | lr 7.5e-04 | norm 0.4011 | dt 0.011
type train | step 570 | loss 0.0504 0.2310 0.8728 1.3341 4.4049 | lr 7.6e-04 | norm 0.3824 | dt 0.011
type train | step 580 | loss 0.0483 0.2232 0.8398 1.2940 4.3380 | lr 7.7e-04 | norm 0.4450 | dt 0.011
type train | step 590 | loss 0.0476 0.2173 0.8300 1.2824 4.2061 | lr 7.9e-04 | norm 0.3375 | dt 0.011
type train | step 600 | loss 0.0448 0.2060 0.8006 1.2405 4.1472 | lr 8.0e-04 | norm 0.3197 | dt 0.011
type train | step 610 | loss 0.0439 0.2020 0.7857 1.2327 4.0992 | lr 8.1e-04 | norm 0.4089 | dt 0.013
type train | step 620 | loss 0.0427 0.1956 0.7719 1.2057 4.0114 | lr 8.3e-04 | norm 0.2870 | dt 0.011
type train | step 630 | loss 0.0415 0.1901 0.7581 1.1975 3.9876 | lr 8.4e-04 | norm 0.4565 | dt 0.011
type train | step 640 | loss 0.0403 0.1846 0.7317 1.1659 3.9088 | lr 8.5e-04 | norm 0.2635 | dt 0.011
type train | step 650 | loss 0.0410 0.1852 0.7371 1.1838 3.9111 | lr 8.7e-04 | norm 0.2578 | dt 0.011
type train | step 660 | loss 0.0384 0.1754 0.7125 1.1252 3.8122 | lr 8.8e-04 | norm 0.2859 | dt 0.011
type train | step 670 | loss 0.0372 0.1700 0.6946 1.1142 3.7513 | lr 8.9e-04 | norm 0.2978 | dt 0.011
type train | step 680 | loss 0.0368 0.1665 0.6856 1.1037 3.7152 | lr 9.1e-04 | norm 0.3073 | dt 0.011
type train | step 690 | loss 0.0370 0.1662 0.6879 1.1227 3.7337 | lr 9.2e-04 | norm 0.3098 | dt 0.011
type train | step 700 | loss 0.0357 0.1614 0.6671 1.0815 3.6644 | lr 9.3e-04 | norm 0.3584 | dt 0.011
type train | step 710 | loss 0.0351 0.1572 0.6537 1.0603 3.5926 | lr 9.5e-04 | norm 0.2898 | dt 0.011
type train | step 720 | loss 0.0348 0.1560 0.6515 1.0574 3.5803 | lr 9.6e-04 | norm 0.2061 | dt 0.011
type train | step 730 | loss 0.0347 0.1548 0.6516 1.0546 3.5583 | lr 9.7e-04 | norm 0.2066 | dt 0.011
type train | step 740 | loss 0.0339 0.1504 0.6370 1.0444 3.5279 | lr 9.9e-04 | norm 0.2252 | dt 0.011
type train | step 750 | loss 0.0347 0.1535 0.6508 1.0615 3.5446 | lr 1.0e-03 | norm 0.3150 | dt 0.012
type train | step 760 | loss 0.0332 0.1478 0.6279 1.0258 3.5159 | lr 1.0e-03 | norm 0.3219 | dt 0.012
type train | step 770 | loss 0.0328 0.1444 0.6226 1.0186 3.4670 | lr 1.0e-03 | norm 0.4403 | dt 0.011
type train | step 780 | loss 0.0320 0.1420 0.6156 0.9953 3.4232 | lr 1.0e-03 | norm 0.2616 | dt 0.011
type train | step 790 | loss 0.0325 0.1420 0.6183 1.0081 3.4249 | lr 1.0e-03 | norm 0.2218 | dt 0.011
type train | step 800 | loss 0.0318 0.1395 0.6075 0.9966 3.4227 | lr 1.0e-03 | norm 0.2437 | dt 0.011
type train | step 810 | loss 0.0322 0.1405 0.6117 1.0110 3.4365 | lr 1.0e-03 | norm 0.1896 | dt 0.011
type train | step 820 | loss 0.0319 0.1383 0.6004 0.9895 3.3915 | lr 1.0e-03 | norm 0.4025 | dt 0.011
type train | step 830 | loss 0.0312 0.1354 0.5964 0.9791 3.3483 | lr 1.0e-03 | norm 0.2995 | dt 0.011
type train | step 840 | loss 0.0307 0.1331 0.5923 0.9601 3.3168 | lr 1.0e-03 | norm 0.2489 | dt 0.011
type train | step 850 | loss 0.0311 0.1345 0.6018 0.9743 3.3458 | lr 1.0e-03 | norm 0.1838 | dt 0.011
type train | step 860 | loss 0.0307 0.1325 0.5852 0.9553 3.3263 | lr 1.0e-03 | norm 0.2401 | dt 0.011
type train | step 870 | loss 0.0307 0.1311 0.5859 0.9526 3.3004 | lr 1.0e-03 | norm 0.2150 | dt 0.011
type train | step 880 | loss 0.0306 0.1316 0.5816 0.9524 3.2832 | lr 1.0e-03 | norm 0.1452 | dt 0.011
type train | step 890 | loss 0.0308 0.1312 0.5829 0.9635 3.3363 | lr 1.0e-03 | norm 0.2053 | dt 0.011
type train | step 900 | loss 0.0301 0.1290 0.5748 0.9346 3.2657 | lr 1.0e-03 | norm 0.2266 | dt 0.011
type train | step 910 | loss 0.0301 0.1285 0.5853 0.9454 3.2788 | lr 1.0e-03 | norm 0.2928 | dt 0.011
type train | step 920 | loss 0.0302 0.1288 0.5745 0.9439 3.2944 | lr 1.0e-03 | norm 0.1896 | dt 0.011
type train | step 930 | loss 0.0300 0.1272 0.5701 0.9356 3.2758 | lr 1.0e-03 | norm 0.1693 | dt 0.011
type train | step 940 | loss 0.0298 0.1265 0.5735 0.9400 3.2447 | lr 1.0e-03 | norm 0.1643 | dt 0.011
type train | step 950 | loss 0.0295 0.1249 0.5672 0.9289 3.2417 | lr 1.0e-03 | norm 0.1731 | dt 0.011
type train | step 960 | loss 0.0295 0.1253 0.5688 0.9223 3.2424 | lr 1.0e-03 | norm 0.2706 | dt 0.011
type train | step 970 | loss 0.0294 0.1245 0.5619 0.9112 3.2080 | lr 1.0e-03 | norm 0.3230 | dt 0.011
type train | step 980 | loss 0.0294 0.1238 0.5667 0.9241 3.2335 | lr 1.0e-03 | norm 0.1689 | dt 0.011
type train | step 990 | loss 0.0297 0.1250 0.5672 0.9407 3.2654 | lr 1.0e-03 | norm 0.2067 | dt 0.011
type train | step 1000 | loss 0.0291 0.1224 0.5628 0.9164 3.2011 | lr 1.0e-03 | norm 0.1614 | dt 0.011
type train | step 1010 | loss 0.0291 0.1231 0.5557 0.9072 3.1842 | lr 1.0e-03 | norm 0.1749 | dt 0.011
type train | step 1020 | loss 0.0291 0.1225 0.5611 0.9149 3.1959 | lr 1.0e-03 | norm 0.2509 | dt 0.011
type train | step 1030 | loss 0.0290 0.1224 0.5568 0.9127 3.2177 | lr 1.0e-03 | norm 0.1926 | dt 0.011
type train | step 1040 | loss 0.0288 0.1214 0.5612 0.9149 3.2112 | lr 1.0e-03 | norm 0.2698 | dt 0.011
type train | step 1050 | loss 0.0289 0.1208 0.5517 0.9030 3.1796 | lr 1.0e-03 | norm 0.2286 | dt 0.011
type train | step 1060 | loss 0.0288 0.1211 0.5569 0.9080 3.1782 | lr 1.0e-03 | norm 0.2333 | dt 0.011
type train | step 1070 | loss 0.0287 0.1202 0.5452 0.8904 3.1453 | lr 1.0e-03 | norm 0.1796 | dt 0.011
type train | step 1080 | loss 0.0289 0.1208 0.5535 0.9090 3.1687 | lr 9.9e-04 | norm 0.3500 | dt 0.011
type train | step 1090 | loss 0.0286 0.1202 0.5525 0.9007 3.1938 | lr 9.9e-04 | norm 0.2448 | dt 0.011
type train | step 1100 | loss 0.0285 0.1198 0.5506 0.9044 3.1829 | lr 9.9e-04 | norm 0.1373 | dt 0.011
type train | step 1110 | loss 0.0286 0.1196 0.5508 0.9028 3.1684 | lr 9.9e-04 | norm 0.3194 | dt 0.011
type train | step 1120 | loss 0.0284 0.1191 0.5515 0.9032 3.1710 | lr 9.9e-04 | norm 0.2378 | dt 0.011
type train | step 1130 | loss 0.0284 0.1186 0.5477 0.9050 3.1631 | lr 9.9e-04 | norm 0.2402 | dt 0.011
type train | step 1140 | loss 0.0288 0.1201 0.5513 0.9067 3.1733 | lr 9.9e-04 | norm 0.3584 | dt 0.011
type train | step 1150 | loss 0.0281 0.1175 0.5439 0.8830 3.1333 | lr 9.9e-04 | norm 0.3099 | dt 0.011
type train | step 1160 | loss 0.0281 0.1177 0.5495 0.8980 3.1571 | lr 9.9e-04 | norm 0.2073 | dt 0.011
type train | step 1170 | loss 0.0285 0.1190 0.5465 0.8960 3.1561 | lr 9.9e-04 | norm 0.2721 | dt 0.011
type train | step 1180 | loss 0.0281 0.1177 0.5477 0.9018 3.1673 | lr 9.9e-04 | norm 0.1556 | dt 0.011
type train | step 1190 | loss 0.0280 0.1182 0.5404 0.8850 3.1527 | lr 9.9e-04 | norm 0.1825 | dt 0.011
type train | step 1200 | loss 0.0282 0.1177 0.5445 0.8934 3.1305 | lr 9.9e-04 | norm 0.3302 | dt 0.011
type train | step 1210 | loss 0.0278 0.1159 0.5406 0.8759 3.1100 | lr 9.9e-04 | norm 0.1724 | dt 0.011
type train | step 1220 | loss 0.0278 0.1168 0.5418 0.8899 3.1451 | lr 9.9e-04 | norm 0.1750 | dt 0.013
type train | step 1230 | loss 0.0278 0.1164 0.5418 0.8794 3.1171 | lr 9.9e-04 | norm 0.2216 | dt 0.011
type train | step 1240 | loss 0.0278 0.1162 0.5443 0.8884 3.1510 | lr 9.9e-04 | norm 0.2322 | dt 0.011
type train | step 1250 | loss 0.0277 0.1162 0.5341 0.8753 3.1166 | lr 9.9e-04 | norm 0.2312 | dt 0.011
type train | step 1260 | loss 0.0282 0.1182 0.5441 0.9034 3.1645 | lr 9.9e-04 | norm 0.2257 | dt 0.011
type train | step 1270 | loss 0.0276 0.1158 0.5388 0.8658 3.1103 | lr 9.9e-04 | norm 0.4197 | dt 0.011
type train | step 1280 | loss 0.0274 0.1153 0.5341 0.8692 3.1037 | lr 9.9e-04 | norm 0.2623 | dt 0.011
type train | step 1290 | loss 0.0275 0.1150 0.5354 0.8699 3.1021 | lr 9.9e-04 | norm 0.2051 | dt 0.011
type train | step 1300 | loss 0.0278 0.1167 0.5433 0.8980 3.1678 | lr 9.9e-04 | norm 0.1738 | dt 0.011
type train | step 1310 | loss 0.0275 0.1158 0.5332 0.8749 3.1182 | lr 9.8e-04 | norm 0.1460 | dt 0.011
type train | step 1320 | loss 0.0274 0.1147 0.5296 0.8631 3.0837 | lr 9.8e-04 | norm 0.3039 | dt 0.011
type train | step 1330 | loss 0.0276 0.1158 0.5344 0.8682 3.1053 | lr 9.8e-04 | norm 0.2040 | dt 0.011
type train | step 1340 | loss 0.0277 0.1165 0.5371 0.8708 3.0954 | lr 9.8e-04 | norm 0.1876 | dt 0.011
type train | step 1350 | loss 0.0275 0.1151 0.5322 0.8714 3.1012 | lr 9.8e-04 | norm 0.3317 | dt 0.011
type train | step 1360 | loss 0.0281 0.1181 0.5457 0.8922 3.1250 | lr 9.8e-04 | norm 0.2161 | dt 0.011
type train | step 1370 | loss 0.0274 0.1159 0.5335 0.8736 3.1332 | lr 9.8e-04 | norm 0.2360 | dt 0.011
type train | step 1380 | loss 0.0274 0.1149 0.5337 0.8715 3.1081 | lr 9.8e-04 | norm 0.2015 | dt 0.011
type train | step 1390 | loss 0.0270 0.1142 0.5330 0.8590 3.0868 | lr 9.8e-04 | norm 0.2162 | dt 0.011
type train | step 1400 | loss 0.0274 0.1152 0.5355 0.8708 3.0884 | lr 9.8e-04 | norm 0.1841 | dt 0.011
type train | step 1410 | loss 0.0272 0.1147 0.5314 0.8693 3.1070 | lr 9.8e-04 | norm 0.1714 | dt 0.011
type train | step 1420 | loss 0.0275 0.1161 0.5380 0.8868 3.1372 | lr 9.8e-04 | norm 0.1567 | dt 0.011
type train | step 1430 | loss 0.0274 0.1154 0.5304 0.8711 3.1007 | lr 9.8e-04 | norm 0.1770 | dt 0.011
type train | step 1440 | loss 0.0271 0.1139 0.5304 0.8685 3.0839 | lr 9.8e-04 | norm 0.3191 | dt 0.011
type train | step 1450 | loss 0.0269 0.1132 0.5298 0.8560 3.0629 | lr 9.8e-04 | norm 0.1754 | dt 0.011
type train | step 1460 | loss 0.0272 0.1148 0.5387 0.8736 3.1001 | lr 9.8e-04 | norm 0.2883 | dt 0.011
type train | step 1470 | loss 0.0271 0.1140 0.5273 0.8581 3.0904 | lr 9.7e-04 | norm 0.2291 | dt 0.011
type train | step 1480 | loss 0.0271 0.1134 0.5296 0.8588 3.0749 | lr 9.7e-04 | norm 0.2789 | dt 0.011
type train | step 1490 | loss 0.0271 0.1145 0.5273 0.8609 3.0616 | lr 9.7e-04 | norm 0.2994 | dt 0.011
type train | step 1500 | loss 0.0273 0.1148 0.5298 0.8741 3.1308 | lr 9.7e-04 | norm 0.3073 | dt 0.011
type train | step 1510 | loss 0.0269 0.1137 0.5256 0.8508 3.0628 | lr 9.7e-04 | norm 0.2961 | dt 0.011
type train | step 1520 | loss 0.0270 0.1138 0.5361 0.8653 3.0813 | lr 9.7e-04 | norm 0.2469 | dt 0.011
type train | step 1530 | loss 0.0272 0.1143 0.5276 0.8662 3.1078 | lr 9.7e-04 | norm 0.2035 | dt 0.011
type train | step 1540 | loss 0.0270 0.1138 0.5254 0.8605 3.0908 | lr 9.7e-04 | norm 0.2008 | dt 0.011
type train | step 1550 | loss 0.0269 0.1135 0.5297 0.8659 3.0738 | lr 9.7e-04 | norm 0.1830 | dt 0.011
type train | step 1560 | loss 0.0267 0.1126 0.5248 0.8595 3.0748 | lr 9.7e-04 | norm 0.1704 | dt 0.011
type train | step 1570 | loss 0.0268 0.1132 0.5276 0.8546 3.0787 | lr 9.7e-04 | norm 0.3315 | dt 0.011
type train | step 1580 | loss 0.0268 0.1130 0.5222 0.8447 3.0512 | lr 9.7e-04 | norm 0.1907 | dt 0.011
type train | step 1590 | loss 0.0268 0.1129 0.5277 0.8597 3.0808 | lr 9.7e-04 | norm 0.2836 | dt 0.011
type train | step 1600 | loss 0.0271 0.1141 0.5288 0.8765 3.1165 | lr 9.7e-04 | norm 0.2375 | dt 0.011
type train | step 1610 | loss 0.0267 0.1122 0.5272 0.8572 3.0627 | lr 9.6e-04 | norm 0.1828 | dt 0.011
type train | step 1620 | loss 0.0267 0.1133 0.5209 0.8505 3.0437 | lr 9.6e-04 | norm 0.1653 | dt 0.011
type train | step 1630 | loss 0.0268 0.1128 0.5264 0.8590 3.0638 | lr 9.6e-04 | norm 0.3200 | dt 0.012
type train | step 1640 | loss 0.0268 0.1133 0.5238 0.8574 3.0883 | lr 9.6e-04 | norm 0.2039 | dt 0.011
type train | step 1650 | loss 0.0266 0.1127 0.5298 0.8643 3.0883 | lr 9.6e-04 | norm 0.2688 | dt 0.011
type train | step 1660 | loss 0.0267 0.1124 0.5211 0.8497 3.0537 | lr 9.6e-04 | norm 0.2415 | dt 0.011
type train | step 1670 | loss 0.0267 0.1128 0.5266 0.8585 3.0616 | lr 9.6e-04 | norm 0.2156 | dt 0.011
type train | step 1680 | loss 0.0266 0.1122 0.5159 0.8431 3.0255 | lr 9.6e-04 | norm 0.1836 | dt 0.011
type train | step 1690 | loss 0.0268 0.1128 0.5243 0.8607 3.0565 | lr 9.6e-04 | norm 0.4443 | dt 0.011
type train | step 1700 | loss 0.0266 0.1126 0.5247 0.8545 3.0826 | lr 9.6e-04 | norm 0.4206 | dt 0.011
type train | step 1710 | loss 0.0265 0.1124 0.5244 0.8606 3.0802 | lr 9.6e-04 | norm 0.2182 | dt 0.011
type train | step 1720 | loss 0.0266 0.1124 0.5233 0.8577 3.0593 | lr 9.5e-04 | norm 0.2746 | dt 0.011
type train | step 1730 | loss 0.0265 0.1122 0.5259 0.8608 3.0721 | lr 9.5e-04 | norm 0.1678 | dt 0.011
type train | step 1740 | loss 0.0265 0.1118 0.5230 0.8639 3.0642 | lr 9.5e-04 | norm 0.1616 | dt 0.011
type train | step 1750 | loss 0.0269 0.1127 0.5265 0.8655 3.0784 | lr 9.5e-04 | norm 0.2526 | dt 0.011
type train | step 1760 | loss 0.0263 0.1112 0.5204 0.8442 3.0379 | lr 9.5e-04 | norm 0.4865 | dt 0.011
type train | step 1770 | loss 0.0264 0.1114 0.5264 0.8596 3.0602 | lr 9.5e-04 | norm 0.2421 | dt 0.011
type train | step 1780 | loss 0.0267 0.1126 0.5235 0.8583 3.0628 | lr 9.5e-04 | norm 0.2157 | dt 0.011
type train | step 1790 | loss 0.0264 0.1117 0.5252 0.8646 3.0844 | lr 9.5e-04 | norm 0.1391 | dt 0.011
type train | step 1800 | loss 0.0263 0.1124 0.5185 0.8496 3.0664 | lr 9.5e-04 | norm 0.2135 | dt 0.011
type train | step 1810 | loss 0.0266 0.1119 0.5230 0.8577 3.0457 | lr 9.5e-04 | norm 0.3350 | dt 0.011
type train | step 1820 | loss 0.0262 0.1105 0.5200 0.8422 3.0278 | lr 9.5e-04 | norm 0.2122 | dt 0.012
type train | step 1830 | loss 0.0263 0.1114 0.5218 0.8575 3.0674 | lr 9.4e-04 | norm 0.2582 | dt 0.015
type train | step 1840 | loss 0.0263 0.1111 0.5218 0.8461 3.0386 | lr 9.4e-04 | norm 0.2733 | dt 0.012
type train | step 1850 | loss 0.0262 0.1110 0.5248 0.8553 3.0723 | lr 9.4e-04 | norm 0.2423 | dt 0.011
type train | step 1860 | loss 0.0262 0.1111 0.5152 0.8432 3.0392 | lr 9.4e-04 | norm 0.2345 | dt 0.011
type train | step 1870 | loss 0.0267 0.1130 0.5247 0.8708 3.0889 | lr 9.4e-04 | norm 0.1851 | dt 0.011
type train | step 1880 | loss 0.0261 0.1110 0.5206 0.8376 3.0359 | lr 9.4e-04 | norm 0.3309 | dt 0.011
type train | step 1890 | loss 0.0260 0.1106 0.5170 0.8404 3.0337 | lr 9.4e-04 | norm 0.2065 | dt 0.011
type train | step 1900 | loss 0.0261 0.1103 0.5183 0.8413 3.0330 | lr 9.4e-04 | norm 0.2429 | dt 0.011
type train | step 1910 | loss 0.0264 0.1118 0.5254 0.8682 3.0986 | lr 9.4e-04 | norm 0.2509 | dt 0.011
type train | step 1920 | loss 0.0261 0.1112 0.5164 0.8472 3.0509 | lr 9.3e-04 | norm 0.1563 | dt 0.011
type train | step 1930 | loss 0.0261 0.1103 0.5136 0.8351 3.0190 | lr 9.3e-04 | norm 0.3355 | dt 0.011
type train | step 1940 | loss 0.0262 0.1112 0.5182 0.8421 3.0425 | lr 9.3e-04 | norm 0.2053 | dt 0.013
type train | step 1950 | loss 0.0264 0.1120 0.5212 0.8435 3.0315 | lr 9.3e-04 | norm 0.2916 | dt 0.014
type train | step 1960 | loss 0.0262 0.1108 0.5169 0.8437 3.0401 | lr 9.3e-04 | norm 0.4079 | dt 0.011
type train | step 1970 | loss 0.0267 0.1134 0.5279 0.8649 3.0546 | lr 9.3e-04 | norm 0.2343 | dt 0.011
type train | step 1980 | loss 0.0262 0.1116 0.5189 0.8488 3.0730 | lr 9.3e-04 | norm 0.3291 | dt 0.011
type train | step 1990 | loss 0.0262 0.1107 0.5190 0.8465 3.0512 | lr 9.3e-04 | norm 0.2365 | dt 0.011
type train | step 2000 | loss 0.0259 0.1103 0.5186 0.8362 3.0304 | lr 9.3e-04 | norm 0.3085 | dt 0.011
type train | step 2010 | loss 0.0263 0.1111 0.5217 0.8460 3.0307 | lr 9.2e-04 | norm 0.2115 | dt 0.011
type train | step 2020 | loss 0.0261 0.1107 0.5179 0.8448 3.0503 | lr 9.2e-04 | norm 0.2008 | dt 0.011
type train | step 2030 | loss 0.0264 0.1120 0.5235 0.8622 3.0798 | lr 9.2e-04 | norm 0.2290 | dt 0.011
type train | step 2040 | loss 0.0263 0.1114 0.5171 0.8466 3.0475 | lr 9.2e-04 | norm 0.3715 | dt 0.012
type train | step 2050 | loss 0.0260 0.1101 0.5174 0.8459 3.0331 | lr 9.2e-04 | norm 0.2952 | dt 0.011
type train | step 2060 | loss 0.0258 0.1095 0.5171 0.8333 3.0102 | lr 9.2e-04 | norm 0.2700 | dt 0.011
type train | step 2070 | loss 0.0261 0.1112 0.5261 0.8516 3.0489 | lr 9.2e-04 | norm 0.2224 | dt 0.011
type train | step 2080 | loss 0.0261 0.1103 0.5150 0.8359 3.0389 | lr 9.2e-04 | norm 0.3255 | dt 0.011
type train | step 2090 | loss 0.0261 0.1099 0.5176 0.8376 3.0261 | lr 9.2e-04 | norm 0.1641 | dt 0.011
type train | step 2100 | loss 0.0261 0.1109 0.5153 0.8394 3.0111 | lr 9.1e-04 | norm 0.2723 | dt 0.011
type train | step 2110 | loss 0.0263 0.1112 0.5175 0.8523 3.0797 | lr 9.1e-04 | norm 0.1932 | dt 0.011
type train | step 2120 | loss 0.0259 0.1103 0.5140 0.8297 3.0151 | lr 9.1e-04 | norm 0.2941 | dt 0.012
type train | step 2130 | loss 0.0260 0.1105 0.5249 0.8441 3.0354 | lr 9.1e-04 | norm 0.2048 | dt 0.012
type train | step 2140 | loss 0.0263 0.1108 0.5163 0.8451 3.0575 | lr 9.1e-04 | norm 0.2608 | dt 0.011
type train | step 2150 | loss 0.0261 0.1105 0.5146 0.8399 3.0442 | lr 9.1e-04 | norm 0.2462 | dt 0.011
type train | step 2160 | loss 0.0260 0.1102 0.5190 0.8461 3.0309 | lr 9.1e-04 | norm 0.1707 | dt 0.011
type train | step 2170 | loss 0.0258 0.1094 0.5144 0.8408 3.0327 | lr 9.1e-04 | norm 0.2083 | dt 0.011
type train | step 2180 | loss 0.0259 0.1100 0.5169 0.8355 3.0357 | lr 9.0e-04 | norm 0.3698 | dt 0.011
type train | step 2190 | loss 0.0259 0.1098 0.5120 0.8268 3.0081 | lr 9.0e-04 | norm 0.2543 | dt 0.011
type train | step 2200 | loss 0.0259 0.1098 0.5174 0.8414 3.0355 | lr 9.0e-04 | norm 0.2439 | dt 0.011
type train | step 2210 | loss 0.0262 0.1109 0.5182 0.8583 3.0733 | lr 9.0e-04 | norm 0.1983 | dt 0.011
type train | step 2220 | loss 0.0258 0.1093 0.5174 0.8394 3.0224 | lr 9.0e-04 | norm 0.1743 | dt 0.011
type train | step 2230 | loss 0.0258 0.1103 0.5118 0.8335 3.0031 | lr 9.0e-04 | norm 0.1562 | dt 0.011
type train | step 2240 | loss 0.0259 0.1099 0.5165 0.8410 3.0244 | lr 9.0e-04 | norm 0.1885 | dt 0.011
type train | step 2250 | loss 0.0259 0.1104 0.5145 0.8403 3.0495 | lr 8.9e-04 | norm 0.2079 | dt 0.011
type train | step 2260 | loss 0.0258 0.1097 0.5208 0.8473 3.0499 | lr 8.9e-04 | norm 0.1958 | dt 0.011
type train | step 2270 | loss 0.0259 0.1096 0.5122 0.8322 3.0147 | lr 8.9e-04 | norm 0.1646 | dt 0.011
type train | step 2280 | loss 0.0258 0.1100 0.5180 0.8427 3.0248 | lr 8.9e-04 | norm 0.1745 | dt 0.011
type train | step 2290 | loss 0.0257 0.1095 0.5073 0.8270 2.9897 | lr 8.9e-04 | norm 0.2552 | dt 0.011
type train | step 2300 | loss 0.0260 0.1099 0.5155 0.8449 3.0201 | lr 8.9e-04 | norm 0.5377 | dt 0.011
type train | step 2310 | loss 0.0258 0.1099 0.5163 0.8391 3.0443 | lr 8.9e-04 | norm 0.1857 | dt 0.011
type train | step 2320 | loss 0.0258 0.1097 0.5159 0.8443 3.0444 | lr 8.9e-04 | norm 0.3528 | dt 0.012
type train | step 2330 | loss 0.0258 0.1097 0.5150 0.8418 3.0239 | lr 8.8e-04 | norm 0.2686 | dt 0.011
type train | step 2340 | loss 0.0257 0.1095 0.5179 0.8457 3.0381 | lr 8.8e-04 | norm 0.3107 | dt 0.011
type train | step 2350 | loss 0.0257 0.1093 0.5155 0.8488 3.0303 | lr 8.8e-04 | norm 0.1715 | dt 0.011
type train | step 2360 | loss 0.0261 0.1100 0.5181 0.8504 3.0439 | lr 8.8e-04 | norm 0.4372 | dt 0.012
type train | step 2370 | loss 0.0256 0.1087 0.5128 0.8295 3.0035 | lr 8.8e-04 | norm 0.1595 | dt 0.011
type train | step 2380 | loss 0.0256 0.1090 0.5187 0.8450 3.0247 | lr 8.8e-04 | norm 0.2557 | dt 0.011
type train | step 2390 | loss 0.0260 0.1100 0.5158 0.8441 3.0288 | lr 8.8e-04 | norm 0.2221 | dt 0.011
type train | step 2400 | loss 0.0257 0.1092 0.5181 0.8507 3.0529 | lr 8.7e-04 | norm 0.1504 | dt 0.011
type train | step 2410 | loss 0.0256 0.1100 0.5113 0.8363 3.0359 | lr 8.7e-04 | norm 0.1941 | dt 0.011
type train | step 2420 | loss 0.0258 0.1095 0.5155 0.8439 3.0109 | lr 8.7e-04 | norm 0.3675 | dt 0.011
type train | step 2430 | loss 0.0254 0.1082 0.5131 0.8289 2.9976 | lr 8.7e-04 | norm 0.1519 | dt 0.011
type train | step 2440 | loss 0.0256 0.1091 0.5147 0.8448 3.0390 | lr 8.7e-04 | norm 0.2089 | dt 0.013
type train | step 2450 | loss 0.0256 0.1088 0.5151 0.8334 3.0106 | lr 8.7e-04 | norm 0.5013 | dt 0.011
type train | step 2460 | loss 0.0256 0.1087 0.5179 0.8419 3.0407 | lr 8.6e-04 | norm 0.2261 | dt 0.011
type train | step 2470 | loss 0.0255 0.1089 0.5086 0.8313 3.0096 | lr 8.6e-04 | norm 0.2620 | dt 0.011
type train | step 2480 | loss 0.0260 0.1105 0.5178 0.8579 3.0569 | lr 8.6e-04 | norm 0.2181 | dt 0.011
type train | step 2490 | loss 0.0254 0.1089 0.5141 0.8258 3.0053 | lr 8.6e-04 | norm 0.2834 | dt 0.011
type train | step 2500 | loss 0.0253 0.1084 0.5110 0.8292 3.0071 | lr 8.6e-04 | norm 0.2628 | dt 0.011
type train | step 2510 | loss 0.0255 0.1082 0.5121 0.8296 3.0048 | lr 8.6e-04 | norm 0.1911 | dt 0.011
type train | step 2520 | loss 0.0258 0.1096 0.5189 0.8561 3.0686 | lr 8.6e-04 | norm 0.1986 | dt 0.011
type train | step 2530 | loss 0.0255 0.1090 0.5104 0.8361 3.0239 | lr 8.5e-04 | norm 0.1849 | dt 0.011
type train | step 2540 | loss 0.0254 0.1083 0.5076 0.8242 2.9920 | lr 8.5e-04 | norm 0.3722 | dt 0.011
type train | step 2550 | loss 0.0255 0.1092 0.5120 0.8314 3.0154 | lr 8.5e-04 | norm 0.3046 | dt 0.011
type train | step 2560 | loss 0.0258 0.1098 0.5149 0.8322 3.0049 | lr 8.5e-04 | norm 0.1516 | dt 0.011
type train | step 2570 | loss 0.0256 0.1088 0.5111 0.8325 3.0118 | lr 8.5e-04 | norm 0.4286 | dt 0.011
type train | step 2580 | loss 0.0260 0.1112 0.5215 0.8524 3.0237 | lr 8.5e-04 | norm 0.3259 | dt 0.011
type train | step 2590 | loss 0.0256 0.1096 0.5132 0.8392 3.0466 | lr 8.4e-04 | norm 0.1984 | dt 0.011
type train | step 2600 | loss 0.0255 0.1086 0.5133 0.8360 3.0264 | lr 8.4e-04 | norm 0.1956 | dt 0.011
type train | step 2610 | loss 0.0252 0.1084 0.5129 0.8261 3.0058 | lr 8.4e-04 | norm 0.2693 | dt 0.011
type train | step 2620 | loss 0.0257 0.1091 0.5161 0.8354 3.0056 | lr 8.4e-04 | norm 0.1952 | dt 0.011
type train | step 2630 | loss 0.0254 0.1088 0.5125 0.8353 3.0257 | lr 8.4e-04 | norm 0.1834 | dt 0.011
type train | step 2640 | loss 0.0258 0.1100 0.5180 0.8513 3.0524 | lr 8.4e-04 | norm 0.1965 | dt 0.012
type train | step 2650 | loss 0.0256 0.1094 0.5121 0.8371 3.0233 | lr 8.4e-04 | norm 0.3811 | dt 0.011
type train | step 2660 | loss 0.0254 0.1083 0.5122 0.8369 3.0089 | lr 8.3e-04 | norm 0.2852 | dt 0.012
type train | step 2670 | loss 0.0252 0.1077 0.5122 0.8237 2.9881 | lr 8.3e-04 | norm 0.2970 | dt 0.012
type train | step 2680 | loss 0.0255 0.1093 0.5208 0.8420 3.0244 | lr 8.3e-04 | norm 0.1516 | dt 0.012
type train | step 2690 | loss 0.0254 0.1085 0.5100 0.8266 3.0126 | lr 8.3e-04 | norm 0.2767 | dt 0.012
type train | step 2700 | loss 0.0255 0.1082 0.5129 0.8286 3.0013 | lr 8.3e-04 | norm 0.2052 | dt 0.012
type train | step 2710 | loss 0.0255 0.1091 0.5108 0.8302 2.9873 | lr 8.3e-04 | norm 0.3410 | dt 0.013
type train | step 2720 | loss 0.0257 0.1093 0.5125 0.8435 3.0551 | lr 8.2e-04 | norm 0.2596 | dt 0.012
type train | step 2730 | loss 0.0254 0.1085 0.5090 0.8208 2.9934 | lr 8.2e-04 | norm 0.2936 | dt 0.012
type train | step 2740 | loss 0.0254 0.1087 0.5203 0.8347 3.0129 | lr 8.2e-04 | norm 0.2308 | dt 0.012
type train | step 2750 | loss 0.0257 0.1091 0.5116 0.8360 3.0305 | lr 8.2e-04 | norm 0.1888 | dt 0.012
type train | step 2760 | loss 0.0255 0.1087 0.5101 0.8313 3.0221 | lr 8.2e-04 | norm 0.3493 | dt 0.012
type train | step 2770 | loss 0.0254 0.1085 0.5145 0.8376 3.0098 | lr 8.2e-04 | norm 0.1752 | dt 0.013
type train | step 2780 | loss 0.0252 0.1078 0.5101 0.8328 3.0118 | lr 8.1e-04 | norm 0.2028 | dt 0.012
type train | step 2790 | loss 0.0253 0.1083 0.5125 0.8271 3.0145 | lr 8.1e-04 | norm 0.2469 | dt 0.011
type train | step 2800 | loss 0.0254 0.1082 0.5075 0.8194 2.9867 | lr 8.1e-04 | norm 0.1963 | dt 0.011
type train | step 2810 | loss 0.0253 0.1082 0.5134 0.8333 3.0117 | lr 8.1e-04 | norm 0.2366 | dt 0.011
type train | step 2820 | loss 0.0257 0.1093 0.5139 0.8504 3.0499 | lr 8.1e-04 | norm 0.1583 | dt 0.012
type train | step 2830 | loss 0.0253 0.1077 0.5135 0.8319 3.0032 | lr 8.1e-04 | norm 0.2706 | dt 0.012
type train | step 2840 | loss 0.0253 0.1088 0.5080 0.8261 2.9829 | lr 8.0e-04 | norm 0.1668 | dt 0.012
type train | step 2850 | loss 0.0254 0.1084 0.5123 0.8334 3.0043 | lr 8.0e-04 | norm 0.1967 | dt 0.012
type train | step 2860 | loss 0.0254 0.1089 0.5106 0.8332 3.0299 | lr 8.0e-04 | norm 0.2219 | dt 0.012
type train | step 2870 | loss 0.0253 0.1082 0.5168 0.8395 3.0284 | lr 8.0e-04 | norm 0.1899 | dt 0.012
type train | step 2880 | loss 0.0254 0.1081 0.5083 0.8245 2.9934 | lr 8.0e-04 | norm 0.2990 | dt 0.012
type train | step 2890 | loss 0.0253 0.1085 0.5146 0.8356 3.0048 | lr 7.9e-04 | norm 0.3059 | dt 0.014
type train | step 2900 | loss 0.0252 0.1080 0.5038 0.8201 2.9717 | lr 7.9e-04 | norm 0.1731 | dt 0.011
type train | step 2910 | loss 0.0255 0.1084 0.5116 0.8376 3.0002 | lr 7.9e-04 | norm 0.3850 | dt 0.011
type train | step 2920 | loss 0.0254 0.1083 0.5127 0.8320 3.0251 | lr 7.9e-04 | norm 0.1648 | dt 0.011
type train | step 2930 | loss 0.0253 0.1083 0.5122 0.8373 3.0254 | lr 7.9e-04 | norm 0.2344 | dt 0.012
type train | step 2940 | loss 0.0254 0.1083 0.5115 0.8343 3.0051 | lr 7.9e-04 | norm 0.1595 | dt 0.012
type train | step 2950 | loss 0.0252 0.1081 0.5144 0.8387 3.0198 | lr 7.8e-04 | norm 0.1837 | dt 0.012
type train | step 2960 | loss 0.0252 0.1079 0.5121 0.8421 3.0126 | lr 7.8e-04 | norm 0.2009 | dt 0.012
type train | step 2970 | loss 0.0256 0.1085 0.5144 0.8436 3.0261 | lr 7.8e-04 | norm 0.3097 | dt 0.012
type train | step 2980 | loss 0.0251 0.1073 0.5093 0.8224 2.9864 | lr 7.8e-04 | norm 0.2134 | dt 0.012
type train | step 2990 | loss 0.0251 0.1077 0.5151 0.8374 3.0060 | lr 7.8e-04 | norm 0.2210 | dt 0.011
type train | step 3000 | loss 0.0256 0.1086 0.5123 0.8371 3.0100 | lr 7.8e-04 | norm 0.2109 | dt 0.011
type train | step 3010 | loss 0.0253 0.1079 0.5149 0.8441 3.0365 | lr 7.7e-04 | norm 0.2182 | dt 0.011
type train | step 3020 | loss 0.0251 0.1087 0.5082 0.8298 3.0191 | lr 7.7e-04 | norm 0.1967 | dt 0.011
type train | step 3030 | loss 0.0254 0.1081 0.5122 0.8374 2.9936 | lr 7.7e-04 | norm 0.3770 | dt 0.011
type train | step 3040 | loss 0.0250 0.1069 0.5101 0.8223 2.9812 | lr 7.7e-04 | norm 0.2585 | dt 0.011
type train | step 3050 | loss 0.0252 0.1077 0.5114 0.8383 3.0236 | lr 7.7e-04 | norm 0.1544 | dt 0.013
type train | step 3060 | loss 0.0252 0.1075 0.5121 0.8272 2.9944 | lr 7.6e-04 | norm 0.3206 | dt 0.011
type train | step 3070 | loss 0.0251 0.1074 0.5149 0.8353 3.0240 | lr 7.6e-04 | norm 0.1693 | dt 0.011
type train | step 3080 | loss 0.0251 0.1076 0.5058 0.8257 2.9936 | lr 7.6e-04 | norm 0.2277 | dt 0.011
type train | step 3090 | loss 0.0256 0.1092 0.5145 0.8515 3.0390 | lr 7.6e-04 | norm 0.2285 | dt 0.011
type train | step 3100 | loss 0.0250 0.1075 0.5112 0.8197 2.9884 | lr 7.6e-04 | norm 0.2516 | dt 0.011
type train | step 3110 | loss 0.0249 0.1072 0.5082 0.8238 2.9923 | lr 7.5e-04 | norm 0.1787 | dt 0.011
type train | step 3120 | loss 0.0251 0.1071 0.5093 0.8238 2.9891 | lr 7.5e-04 | norm 0.2665 | dt 0.011
type train | step 3130 | loss 0.0254 0.1084 0.5161 0.8500 3.0526 | lr 7.5e-04 | norm 0.1695 | dt 0.011
type train | step 3140 | loss 0.0250 0.1078 0.5076 0.8303 3.0083 | lr 7.5e-04 | norm 0.1617 | dt 0.011
type train | step 3150 | loss 0.0250 0.1070 0.5047 0.8187 2.9764 | lr 7.5e-04 | norm 0.3192 | dt 0.011
type train | step 3160 | loss 0.0251 0.1079 0.5090 0.8258 3.0005 | lr 7.5e-04 | norm 0.2954 | dt 0.011
type train | step 3170 | loss 0.0254 0.1086 0.5122 0.8261 2.9905 | lr 7.4e-04 | norm 0.1512 | dt 0.011
type train | step 3180 | loss 0.0252 0.1076 0.5084 0.8261 2.9952 | lr 7.4e-04 | norm 0.4337 | dt 0.011
type train | step 3190 | loss 0.0256 0.1100 0.5186 0.8458 3.0064 | lr 7.4e-04 | norm 0.2913 | dt 0.011
type train | step 3200 | loss 0.0252 0.1084 0.5106 0.8341 3.0326 | lr 7.4e-04 | norm 0.1940 | dt 0.011
type train | step 3210 | loss 0.0252 0.1075 0.5106 0.8307 3.0125 | lr 7.4e-04 | norm 0.2451 | dt 0.011
type train | step 3220 | loss 0.0249 0.1071 0.5102 0.8207 2.9915 | lr 7.3e-04 | norm 0.2495 | dt 0.012
type train | step 3230 | loss 0.0253 0.1080 0.5136 0.8295 2.9911 | lr 7.3e-04 | norm 0.1766 | dt 0.011
type train | step 3240 | loss 0.0250 0.1076 0.5102 0.8297 3.0107 | lr 7.3e-04 | norm 0.1960 | dt 0.011
type train | step 3250 | loss 0.0254 0.1089 0.5152 0.8460 3.0366 | lr 7.3e-04 | norm 0.1634 | dt 0.011
type train | step 3260 | loss 0.0253 0.1083 0.5098 0.8323 3.0095 | lr 7.3e-04 | norm 0.2742 | dt 0.011
type train | step 3270 | loss 0.0250 0.1072 0.5098 0.8326 2.9957 | lr 7.2e-04 | norm 0.2296 | dt 0.011
type train | step 3280 | loss 0.0249 0.1067 0.5098 0.8185 2.9751 | lr 7.2e-04 | norm 0.2468 | dt 0.011
type train | step 3290 | loss 0.0252 0.1082 0.5183 0.8364 3.0112 | lr 7.2e-04 | norm 0.1806 | dt 0.011
type train | step 3300 | loss 0.0251 0.1074 0.5075 0.8214 2.9985 | lr 7.2e-04 | norm 0.3552 | dt 0.011
type train | step 3310 | loss 0.0251 0.1071 0.5104 0.8238 2.9879 | lr 7.2e-04 | norm 0.2196 | dt 0.011
type train | step 3320 | loss 0.0252 0.1081 0.5086 0.8256 2.9736 | lr 7.1e-04 | norm 0.2185 | dt 0.011
type train | step 3330 | loss 0.0253 0.1082 0.5100 0.8390 3.0419 | lr 7.1e-04 | norm 0.4020 | dt 0.011
type train | step 3340 | loss 0.0250 0.1074 0.5066 0.8156 2.9804 | lr 7.1e-04 | norm 0.3261 | dt 0.011
type train | step 3350 | loss 0.0251 0.1076 0.5179 0.8292 3.0001 | lr 7.1e-04 | norm 0.2618 | dt 0.011
type train | step 3360 | loss 0.0254 0.1080 0.5092 0.8307 3.0143 | lr 7.1e-04 | norm 0.1850 | dt 0.011
type train | step 3370 | loss 0.0252 0.1077 0.5079 0.8264 3.0090 | lr 7.0e-04 | norm 0.2487 | dt 0.011
type train | step 3380 | loss 0.0251 0.1075 0.5124 0.8331 2.9985 | lr 7.0e-04 | norm 0.2958 | dt 0.011
type train | step 3390 | loss 0.0249 0.1068 0.5081 0.8289 3.0006 | lr 7.0e-04 | norm 0.3908 | dt 0.011
type train | step 3400 | loss 0.0250 0.1073 0.5103 0.8221 3.0015 | lr 7.0e-04 | norm 0.3476 | dt 0.011
type train | step 3410 | loss 0.0251 0.1072 0.5052 0.8152 2.9749 | lr 7.0e-04 | norm 0.1988 | dt 0.011
type train | step 3420 | loss 0.0250 0.1072 0.5112 0.8284 2.9982 | lr 7.0e-04 | norm 0.3078 | dt 0.011
type train | step 3430 | loss 0.0254 0.1083 0.5117 0.8454 3.0374 | lr 6.9e-04 | norm 0.3214 | dt 0.011
type train | step 3440 | loss 0.0250 0.1068 0.5116 0.8278 2.9918 | lr 6.9e-04 | norm 0.1834 | dt 0.011
type train | step 3450 | loss 0.0250 0.1079 0.5060 0.8224 2.9705 | lr 6.9e-04 | norm 0.2222 | dt 0.011
type train | step 3460 | loss 0.0251 0.1073 0.5101 0.8290 2.9924 | lr 6.9e-04 | norm 0.1871 | dt 0.011
type train | step 3470 | loss 0.0251 0.1079 0.5085 0.8290 3.0188 | lr 6.9e-04 | norm 0.3141 | dt 0.011
type train | step 3480 | loss 0.0250 0.1073 0.5148 0.8350 3.0166 | lr 6.8e-04 | norm 0.2577 | dt 0.012
type train | step 3490 | loss 0.0251 0.1072 0.5062 0.8200 2.9811 | lr 6.8e-04 | norm 0.1593 | dt 0.011
type train | step 3500 | loss 0.0250 0.1075 0.5127 0.8312 2.9937 | lr 6.8e-04 | norm 0.1643 | dt 0.011
type train | step 3510 | loss 0.0250 0.1072 0.5018 0.8165 2.9617 | lr 6.8e-04 | norm 0.3160 | dt 0.011
type train | step 3520 | loss 0.0252 0.1074 0.5095 0.8333 2.9895 | lr 6.8e-04 | norm 0.1452 | dt 0.011
type train | step 3530 | loss 0.0251 0.1074 0.5106 0.8279 3.0140 | lr 6.7e-04 | norm 0.2911 | dt 0.011
type train | step 3540 | loss 0.0250 0.1074 0.5102 0.8335 3.0156 | lr 6.7e-04 | norm 0.2613 | dt 0.012
type train | step 3550 | loss 0.0251 0.1074 0.5095 0.8300 2.9945 | lr 6.7e-04 | norm 0.1562 | dt 0.012
type train | step 3560 | loss 0.0250 0.1072 0.5124 0.8346 3.0092 | lr 6.7e-04 | norm 0.2177 | dt 0.011
type train | step 3570 | loss 0.0249 0.1071 0.5103 0.8384 3.0021 | lr 6.6e-04 | norm 0.1920 | dt 0.011
type train | step 3580 | loss 0.0253 0.1075 0.5124 0.8396 3.0168 | lr 6.6e-04 | norm 0.2786 | dt 0.012
type train | step 3590 | loss 0.0249 0.1064 0.5073 0.8183 2.9763 | lr 6.6e-04 | norm 0.2535 | dt 0.011
type train | step 3600 | loss 0.0249 0.1068 0.5132 0.8331 2.9952 | lr 6.6e-04 | norm 0.2582 | dt 0.012
type train | step 3610 | loss 0.0253 0.1077 0.5103 0.8328 2.9999 | lr 6.6e-04 | norm 0.3112 | dt 0.011
type train | step 3620 | loss 0.0250 0.1070 0.5131 0.8401 3.0267 | lr 6.5e-04 | norm 0.1991 | dt 0.011
type train | step 3630 | loss 0.0249 0.1078 0.5065 0.8261 3.0092 | lr 6.5e-04 | norm 0.2762 | dt 0.012
type train | step 3640 | loss 0.0251 0.1072 0.5103 0.8333 2.9836 | lr 6.5e-04 | norm 0.1635 | dt 0.011
type train | step 3650 | loss 0.0248 0.1060 0.5083 0.8184 2.9716 | lr 6.5e-04 | norm 0.1635 | dt 0.011
type train | step 3660 | loss 0.0249 0.1070 0.5098 0.8342 3.0146 | lr 6.5e-04 | norm 0.1516 | dt 0.013
type train | step 3670 | loss 0.0249 0.1067 0.5103 0.8234 2.9857 | lr 6.4e-04 | norm 0.3102 | dt 0.012
type train | step 3680 | loss 0.0249 0.1066 0.5132 0.8315 3.0148 | lr 6.4e-04 | norm 0.1962 | dt 0.011
type train | step 3690 | loss 0.0248 0.1069 0.5041 0.8223 2.9837 | lr 6.4e-04 | norm 0.1794 | dt 0.011
type train | step 3700 | loss 0.0253 0.1082 0.5126 0.8474 3.0297 | lr 6.4e-04 | norm 0.3344 | dt 0.011
type train | step 3710 | loss 0.0248 0.1067 0.5095 0.8162 2.9781 | lr 6.4e-04 | norm 0.2262 | dt 0.011
type train | step 3720 | loss 0.0247 0.1065 0.5065 0.8205 2.9845 | lr 6.3e-04 | norm 0.1748 | dt 0.011
type train | step 3730 | loss 0.0248 0.1063 0.5077 0.8205 2.9801 | lr 6.3e-04 | norm 0.3053 | dt 0.011
type train | step 3740 | loss 0.0251 0.1075 0.5145 0.8468 3.0439 | lr 6.3e-04 | norm 0.2374 | dt 0.011
type train | step 3750 | loss 0.0248 0.1071 0.5058 0.8269 2.9988 | lr 6.3e-04 | norm 0.2051 | dt 0.011
type train | step 3760 | loss 0.0248 0.1062 0.5031 0.8153 2.9674 | lr 6.3e-04 | norm 0.1935 | dt 0.011
type train | step 3770 | loss 0.0249 0.1071 0.5072 0.8224 2.9906 | lr 6.2e-04 | norm 0.1580 | dt 0.011
type train | step 3780 | loss 0.0252 0.1078 0.5105 0.8223 2.9815 | lr 6.2e-04 | norm 0.2179 | dt 0.011
type train | step 3790 | loss 0.0250 0.1069 0.5070 0.8226 2.9850 | lr 6.2e-04 | norm 0.3565 | dt 0.011
type train | step 3800 | loss 0.0254 0.1092 0.5165 0.8420 2.9962 | lr 6.2e-04 | norm 0.2023 | dt 0.011
type train | step 3810 | loss 0.0250 0.1077 0.5089 0.8311 3.0234 | lr 6.2e-04 | norm 0.1941 | dt 0.011
type train | step 3820 | loss 0.0249 0.1067 0.5090 0.8277 3.0039 | lr 6.1e-04 | norm 0.2892 | dt 0.011
type train | step 3830 | loss 0.0246 0.1063 0.5085 0.8175 2.9821 | lr 6.1e-04 | norm 0.1836 | dt 0.011
type train | step 3840 | loss 0.0251 0.1073 0.5119 0.8263 2.9818 | lr 6.1e-04 | norm 0.1729 | dt 0.011
type train | step 3850 | loss 0.0248 0.1069 0.5088 0.8266 3.0018 | lr 6.1e-04 | norm 0.2343 | dt 0.011
type train | step 3860 | loss 0.0252 0.1082 0.5134 0.8427 3.0274 | lr 6.1e-04 | norm 0.3102 | dt 0.011
type train | step 3870 | loss 0.0251 0.1075 0.5083 0.8292 3.0005 | lr 6.0e-04 | norm 0.1734 | dt 0.011
type train | step 3880 | loss 0.0248 0.1065 0.5082 0.8298 2.9870 | lr 6.0e-04 | norm 0.1430 | dt 0.011
type train | step 3890 | loss 0.0247 0.1059 0.5082 0.8152 2.9665 | lr 6.0e-04 | norm 0.2152 | dt 0.012
type train | step 3900 | loss 0.0250 0.1074 0.5168 0.8329 3.0028 | lr 6.0e-04 | norm 0.1407 | dt 0.012
type train | step 3910 | loss 0.0249 0.1068 0.5060 0.8183 2.9899 | lr 5.9e-04 | norm 0.3471 | dt 0.012
type train | step 3920 | loss 0.0249 0.1064 0.5091 0.8206 2.9801 | lr 5.9e-04 | norm 0.1982 | dt 0.012
type train | step 3930 | loss 0.0250 0.1074 0.5073 0.8227 2.9656 | lr 5.9e-04 | norm 0.1673 | dt 0.012
type train | step 3940 | loss 0.0251 0.1074 0.5086 0.8358 3.0334 | lr 5.9e-04 | norm 0.3616 | dt 0.012
type train | step 3950 | loss 0.0248 0.1065 0.5052 0.8120 2.9707 | lr 5.9e-04 | norm 0.2383 | dt 0.012
type train | step 3960 | loss 0.0249 0.1070 0.5165 0.8260 2.9913 | lr 5.8e-04 | norm 0.1914 | dt 0.012
type train | step 3970 | loss 0.0252 0.1074 0.5079 0.8275 3.0047 | lr 5.8e-04 | norm 0.2039 | dt 0.012
type train | step 3980 | loss 0.0250 0.1070 0.5064 0.8232 3.0003 | lr 5.8e-04 | norm 0.1645 | dt 0.012
type train | step 3990 | loss 0.0249 0.1068 0.5110 0.8304 2.9904 | lr 5.8e-04 | norm 0.1297 | dt 0.012
type train | step 4000 | loss 0.0247 0.1061 0.5069 0.8267 2.9928 | lr 5.8e-04 | norm 0.1776 | dt 0.012
type train | step 4010 | loss 0.0248 0.1066 0.5089 0.8186 2.9922 | lr 5.7e-04 | norm 0.1766 | dt 0.012
type train | step 4020 | loss 0.0249 0.1066 0.5037 0.8124 2.9669 | lr 5.7e-04 | norm 0.1475 | dt 0.012
type train | step 4030 | loss 0.0248 0.1066 0.5100 0.8251 2.9899 | lr 5.7e-04 | norm 0.3048 | dt 0.012
type train | step 4040 | loss 0.0252 0.1075 0.5102 0.8419 3.0290 | lr 5.7e-04 | norm 0.3322 | dt 0.012
type train | step 4050 | loss 0.0248 0.1062 0.5104 0.8250 2.9847 | lr 5.7e-04 | norm 0.1675 | dt 0.012
type train | step 4060 | loss 0.0248 0.1073 0.5048 0.8199 2.9622 | lr 5.6e-04 | norm 0.2286 | dt 0.012
type train | step 4070 | loss 0.0249 0.1066 0.5088 0.8259 2.9857 | lr 5.6e-04 | norm 0.1587 | dt 0.012
type train | step 4080 | loss 0.0249 0.1072 0.5071 0.8262 3.0106 | lr 5.6e-04 | norm 0.1953 | dt 0.012
type train | step 4090 | loss 0.0248 0.1066 0.5135 0.8322 3.0087 | lr 5.6e-04 | norm 0.2036 | dt 0.012
type train | step 4100 | loss 0.0249 0.1066 0.5049 0.8171 2.9732 | lr 5.6e-04 | norm 0.2003 | dt 0.012
type train | step 4110 | loss 0.0249 0.1069 0.5115 0.8283 2.9866 | lr 5.5e-04 | norm 0.3282 | dt 0.011
type train | step 4120 | loss 0.0248 0.1066 0.5007 0.8141 2.9544 | lr 5.5e-04 | norm 0.3721 | dt 0.011
type train | step 4130 | loss 0.0250 0.1068 0.5084 0.8305 2.9830 | lr 5.5e-04 | norm 0.1578 | dt 0.011
type train | step 4140 | loss 0.0249 0.1068 0.5093 0.8251 3.0064 | lr 5.5e-04 | norm 0.2427 | dt 0.011
type train | step 4150 | loss 0.0248 0.1068 0.5091 0.8310 3.0088 | lr 5.4e-04 | norm 0.2955 | dt 0.011
type train | step 4160 | loss 0.0249 0.1069 0.5083 0.8270 2.9880 | lr 5.4e-04 | norm 0.3468 | dt 0.011
type train | step 4170 | loss 0.0248 0.1066 0.5113 0.8320 3.0024 | lr 5.4e-04 | norm 0.1449 | dt 0.012
type train | step 4180 | loss 0.0248 0.1065 0.5092 0.8360 2.9951 | lr 5.4e-04 | norm 0.2150 | dt 0.012
type train | step 4190 | loss 0.0252 0.1069 0.5112 0.8369 3.0101 | lr 5.4e-04 | norm 0.2987 | dt 0.011
type train | step 4200 | loss 0.0247 0.1058 0.5061 0.8156 2.9690 | lr 5.3e-04 | norm 0.2521 | dt 0.011
type train | step 4210 | loss 0.0247 0.1063 0.5120 0.8301 2.9880 | lr 5.3e-04 | norm 0.2493 | dt 0.011
type train | step 4220 | loss 0.0251 0.1070 0.5090 0.8296 2.9927 | lr 5.3e-04 | norm 0.1676 | dt 0.011
type train | step 4230 | loss 0.0248 0.1065 0.5120 0.8375 3.0196 | lr 5.3e-04 | norm 0.1663 | dt 0.011
type train | step 4240 | loss 0.0247 0.1072 0.5054 0.8235 3.0023 | lr 5.3e-04 | norm 0.2534 | dt 0.011
type train | step 4250 | loss 0.0249 0.1065 0.5091 0.8306 2.9770 | lr 5.2e-04 | norm 0.1687 | dt 0.011
type train | step 4260 | loss 0.0246 0.1054 0.5072 0.8158 2.9643 | lr 5.2e-04 | norm 0.1466 | dt 0.011
type train | step 4270 | loss 0.0247 0.1064 0.5087 0.8319 3.0088 | lr 5.2e-04 | norm 0.1603 | dt 0.012
type train | step 4280 | loss 0.0248 0.1060 0.5093 0.8209 2.9791 | lr 5.2e-04 | norm 0.2105 | dt 0.011
type train | step 4290 | loss 0.0247 0.1061 0.5121 0.8291 3.0086 | lr 5.2e-04 | norm 0.1927 | dt 0.011
type train | step 4300 | loss 0.0247 0.1063 0.5030 0.8198 2.9770 | lr 5.1e-04 | norm 0.2715 | dt 0.011
type train | step 4310 | loss 0.0251 0.1076 0.5114 0.8444 3.0225 | lr 5.1e-04 | norm 0.1829 | dt 0.011
type train | step 4320 | loss 0.0246 0.1062 0.5083 0.8137 2.9714 | lr 5.1e-04 | norm 0.1835 | dt 0.011
type train | step 4330 | loss 0.0245 0.1060 0.5055 0.8182 2.9791 | lr 5.1e-04 | norm 0.2272 | dt 0.011
type train | step 4340 | loss 0.0247 0.1060 0.5067 0.8183 2.9744 | lr 5.1e-04 | norm 0.3725 | dt 0.011
type train | step 4350 | loss 0.0250 0.1070 0.5133 0.8446 3.0383 | lr 5.0e-04 | norm 0.1515 | dt 0.011
type train | step 4360 | loss 0.0246 0.1066 0.5048 0.8244 2.9921 | lr 5.0e-04 | norm 0.1483 | dt 0.011
type train | step 4370 | loss 0.0246 0.1057 0.5020 0.8130 2.9610 | lr 5.0e-04 | norm 0.1941 | dt 0.011
type train | step 4380 | loss 0.0247 0.1065 0.5061 0.8201 2.9843 | lr 5.0e-04 | norm 0.1597 | dt 0.011
type train | step 4390 | loss 0.0250 0.1073 0.5095 0.8199 2.9750 | lr 4.9e-04 | norm 0.2018 | dt 0.011
type train | step 4400 | loss 0.0248 0.1064 0.5061 0.8205 2.9779 | lr 4.9e-04 | norm 0.1403 | dt 0.011
type train | step 4410 | loss 0.0252 0.1086 0.5152 0.8392 2.9899 | lr 4.9e-04 | norm 0.2301 | dt 0.011
type train | step 4420 | loss 0.0248 0.1071 0.5078 0.8290 3.0167 | lr 4.9e-04 | norm 0.1836 | dt 0.011
type train | step 4430 | loss 0.0248 0.1061 0.5079 0.8257 2.9977 | lr 4.9e-04 | norm 0.2213 | dt 0.011
type train | step 4440 | loss 0.0245 0.1058 0.5074 0.8153 2.9758 | lr 4.8e-04 | norm 0.1734 | dt 0.011
type train | step 4450 | loss 0.0249 0.1068 0.5109 0.8242 2.9759 | lr 4.8e-04 | norm 0.1648 | dt 0.011
type train | step 4460 | loss 0.0247 0.1064 0.5080 0.8245 2.9969 | lr 4.8e-04 | norm 0.3713 | dt 0.011
type train | step 4470 | loss 0.0250 0.1077 0.5124 0.8404 3.0209 | lr 4.8e-04 | norm 0.1423 | dt 0.011
type train | step 4480 | loss 0.0250 0.1070 0.5074 0.8271 2.9948 | lr 4.8e-04 | norm 0.1998 | dt 0.012
type train | step 4490 | loss 0.0247 0.1059 0.5073 0.8281 2.9818 | lr 4.7e-04 | norm 0.2039 | dt 0.011
type train | step 4500 | loss 0.0245 0.1053 0.5072 0.8131 2.9607 | lr 4.7e-04 | norm 0.1765 | dt 0.011
type train | step 4510 | loss 0.0248 0.1069 0.5159 0.8306 2.9970 | lr 4.7e-04 | norm 0.1864 | dt 0.011
type train | step 4520 | loss 0.0248 0.1065 0.5051 0.8165 2.9841 | lr 4.7e-04 | norm 0.3225 | dt 0.011
type train | step 4530 | loss 0.0248 0.1060 0.5081 0.8186 2.9749 | lr 4.7e-04 | norm 0.2072 | dt 0.011
type train | step 4540 | loss 0.0248 0.1069 0.5065 0.8208 2.9594 | lr 4.6e-04 | norm 0.1354 | dt 0.011
type train | step 4550 | loss 0.0250 0.1069 0.5076 0.8337 3.0276 | lr 4.6e-04 | norm 0.3145 | dt 0.011
type train | step 4560 | loss 0.0247 0.1060 0.5042 0.8096 2.9636 | lr 4.6e-04 | norm 0.1965 | dt 0.011
type train | step 4570 | loss 0.0248 0.1066 0.5155 0.8237 2.9852 | lr 4.6e-04 | norm 0.2578 | dt 0.011
type train | step 4580 | loss 0.0250 0.1069 0.5069 0.8253 2.9983 | lr 4.6e-04 | norm 0.2144 | dt 0.011
type train | step 4590 | loss 0.0249 0.1066 0.5055 0.8212 2.9943 | lr 4.5e-04 | norm 0.1873 | dt 0.011
type train | step 4600 | loss 0.0248 0.1064 0.5102 0.8287 2.9850 | lr 4.5e-04 | norm 0.2403 | dt 0.011
type train | step 4610 | loss 0.0246 0.1057 0.5061 0.8250 2.9878 | lr 4.5e-04 | norm 0.1650 | dt 0.011
type train | step 4620 | loss 0.0247 0.1061 0.5080 0.8161 2.9857 | lr 4.5e-04 | norm 0.1754 | dt 0.011
type train | step 4630 | loss 0.0247 0.1061 0.5028 0.8105 2.9612 | lr 4.5e-04 | norm 0.1373 | dt 0.011
type train | step 4640 | loss 0.0247 0.1061 0.5090 0.8228 2.9840 | lr 4.4e-04 | norm 0.3391 | dt 0.011
type train | step 4650 | loss 0.0250 0.1071 0.5093 0.8395 3.0225 | lr 4.4e-04 | norm 0.1665 | dt 0.011
type train | step 4660 | loss 0.0247 0.1057 0.5095 0.8233 2.9798 | lr 4.4e-04 | norm 0.1875 | dt 0.011
type train | step 4670 | loss 0.0247 0.1068 0.5040 0.8182 2.9569 | lr 4.4e-04 | norm 0.2719 | dt 0.011
type train | step 4680 | loss 0.0247 0.1061 0.5080 0.8238 2.9808 | lr 4.4e-04 | norm 0.1522 | dt 0.011
type train | step 4690 | loss 0.0248 0.1067 0.5063 0.8243 3.0050 | lr 4.3e-04 | norm 0.2144 | dt 0.011
type train | step 4700 | loss 0.0247 0.1061 0.5127 0.8304 3.0030 | lr 4.3e-04 | norm 0.1606 | dt 0.011
type train | step 4710 | loss 0.0248 0.1062 0.5040 0.8153 2.9672 | lr 4.3e-04 | norm 0.2809 | dt 0.011
type train | step 4720 | loss 0.0248 0.1064 0.5106 0.8264 2.9809 | lr 4.3e-04 | norm 0.1986 | dt 0.011
type train | step 4730 | loss 0.0247 0.1062 0.4999 0.8126 2.9489 | lr 4.2e-04 | norm 0.3744 | dt 0.011
type train | step 4740 | loss 0.0249 0.1063 0.5076 0.8290 2.9785 | lr 4.2e-04 | norm 0.2157 | dt 0.011
type train | step 4750 | loss 0.0248 0.1064 0.5084 0.8233 3.0006 | lr 4.2e-04 | norm 0.2432 | dt 0.011
type train | step 4760 | loss 0.0247 0.1064 0.5083 0.8293 3.0037 | lr 4.2e-04 | norm 0.3357 | dt 0.011
type train | step 4770 | loss 0.0248 0.1063 0.5074 0.8251 2.9824 | lr 4.2e-04 | norm 0.1406 | dt 0.011
type train | step 4780 | loss 0.0247 0.1062 0.5107 0.8304 2.9968 | lr 4.1e-04 | norm 0.1516 | dt 0.011
type train | step 4790 | loss 0.0247 0.1061 0.5084 0.8344 2.9897 | lr 4.1e-04 | norm 0.1928 | dt 0.011
type train | step 4800 | loss 0.0250 0.1064 0.5103 0.8353 3.0054 | lr 4.1e-04 | norm 0.1954 | dt 0.011
type train | step 4810 | loss 0.0246 0.1055 0.5052 0.8139 2.9636 | lr 4.1e-04 | norm 0.2258 | dt 0.011
type train | step 4820 | loss 0.0246 0.1058 0.5111 0.8280 2.9826 | lr 4.1e-04 | norm 0.1966 | dt 0.011
type train | step 4830 | loss 0.0250 0.1066 0.5082 0.8277 2.9872 | lr 4.0e-04 | norm 0.1681 | dt 0.011
type train | step 4840 | loss 0.0247 0.1060 0.5112 0.8357 3.0142 | lr 4.0e-04 | norm 0.1780 | dt 0.011
type train | step 4850 | loss 0.0246 0.1068 0.5046 0.8215 2.9969 | lr 4.0e-04 | norm 0.3011 | dt 0.011
type train | step 4860 | loss 0.0248 0.1061 0.5083 0.8288 2.9725 | lr 4.0e-04 | norm 0.1826 | dt 0.011
type train | step 4870 | loss 0.0245 0.1050 0.5065 0.8140 2.9590 | lr 4.0e-04 | norm 0.1735 | dt 0.011
type train | step 4880 | loss 0.0246 0.1060 0.5080 0.8305 3.0047 | lr 4.0e-04 | norm 0.2620 | dt 0.013
type train | step 4890 | loss 0.0247 0.1057 0.5086 0.8194 2.9739 | lr 3.9e-04 | norm 0.1341 | dt 0.011
type train | step 4900 | loss 0.0246 0.1057 0.5113 0.8275 3.0037 | lr 3.9e-04 | norm 0.1620 | dt 0.011
type train | step 4910 | loss 0.0246 0.1060 0.5022 0.8181 2.9714 | lr 3.9e-04 | norm 0.1579 | dt 0.011
type train | step 4920 | loss 0.0250 0.1071 0.5106 0.8425 3.0177 | lr 3.9e-04 | norm 0.1651 | dt 0.011
type train | step 4930 | loss 0.0245 0.1058 0.5075 0.8120 2.9658 | lr 3.9e-04 | norm 0.1908 | dt 0.011
type train | step 4940 | loss 0.0244 0.1056 0.5048 0.8168 2.9746 | lr 3.8e-04 | norm 0.3107 | dt 0.011
type train | step 4950 | loss 0.0246 0.1055 0.5060 0.8171 2.9692 | lr 3.8e-04 | norm 0.1417 | dt 0.011
type train | step 4960 | loss 0.0249 0.1067 0.5125 0.8430 3.0335 | lr 3.8e-04 | norm 0.1759 | dt 0.011
type train | step 4970 | loss 0.0245 0.1062 0.5039 0.8226 2.9868 | lr 3.8e-04 | norm 0.2791 | dt 0.011
type train | step 4980 | loss 0.0245 0.1053 0.5012 0.8116 2.9567 | lr 3.8e-04 | norm 0.1646 | dt 0.011
type train | step 4990 | loss 0.0247 0.1061 0.5054 0.8184 2.9791 | lr 3.7e-04 | norm 0.1483 | dt 0.011
type train | step 5000 | loss 0.0249 0.1069 0.5088 0.8184 2.9705 | lr 3.7e-04 | norm 0.2298 | dt 0.011
type train | step 5010 | loss 0.0247 0.1060 0.5054 0.8191 2.9736 | lr 3.7e-04 | norm 0.1579 | dt 0.011
type train | step 5020 | loss 0.0251 0.1082 0.5142 0.8372 2.9848 | lr 3.7e-04 | norm 0.1897 | dt 0.011
type train | step 5030 | loss 0.0247 0.1067 0.5069 0.8274 3.0120 | lr 3.7e-04 | norm 0.1833 | dt 0.012
type train | step 5040 | loss 0.0247 0.1057 0.5071 0.8244 2.9934 | lr 3.6e-04 | norm 0.1705 | dt 0.011
type train | step 5050 | loss 0.0244 0.1054 0.5067 0.8136 2.9709 | lr 3.6e-04 | norm 0.2062 | dt 0.011
type train | step 5060 | loss 0.0248 0.1063 0.5103 0.8228 2.9711 | lr 3.6e-04 | norm 0.1880 | dt 0.011
type train | step 5070 | loss 0.0246 0.1060 0.5074 0.8230 2.9925 | lr 3.6e-04 | norm 0.2716 | dt 0.011
type train | step 5080 | loss 0.0249 0.1073 0.5116 0.8387 3.0160 | lr 3.6e-04 | norm 0.1426 | dt 0.011
type train | step 5090 | loss 0.0248 0.1066 0.5068 0.8257 2.9902 | lr 3.5e-04 | norm 0.1333 | dt 0.011
type train | step 5100 | loss 0.0246 0.1056 0.5066 0.8270 2.9775 | lr 3.5e-04 | norm 0.2237 | dt 0.011
type train | step 5110 | loss 0.0245 0.1050 0.5065 0.8116 2.9562 | lr 3.5e-04 | norm 0.2128 | dt 0.011
type train | step 5120 | loss 0.0247 0.1066 0.5152 0.8292 2.9925 | lr 3.5e-04 | norm 0.1995 | dt 0.011
type train | step 5130 | loss 0.0247 0.1061 0.5045 0.8151 2.9797 | lr 3.5e-04 | norm 0.1347 | dt 0.011
type train | step 5140 | loss 0.0247 0.1056 0.5074 0.8172 2.9708 | lr 3.5e-04 | norm 0.2214 | dt 0.011
type train | step 5150 | loss 0.0248 0.1065 0.5058 0.8193 2.9554 | lr 3.4e-04 | norm 0.1314 | dt 0.011
type train | step 5160 | loss 0.0249 0.1065 0.5070 0.8323 3.0233 | lr 3.4e-04 | norm 0.3075 | dt 0.011
type train | step 5170 | loss 0.0246 0.1057 0.5036 0.8080 2.9586 | lr 3.4e-04 | norm 0.1781 | dt 0.012
type train | step 5180 | loss 0.0247 0.1063 0.5149 0.8222 2.9803 | lr 3.4e-04 | norm 0.1755 | dt 0.011
type train | step 5190 | loss 0.0249 0.1065 0.5063 0.8238 2.9934 | lr 3.4e-04 | norm 0.1696 | dt 0.011
type train | step 5200 | loss 0.0248 0.1063 0.5048 0.8199 2.9896 | lr 3.3e-04 | norm 0.1706 | dt 0.011
type train | step 5210 | loss 0.0247 0.1060 0.5095 0.8276 2.9804 | lr 3.3e-04 | norm 0.1571 | dt 0.011
type train | step 5220 | loss 0.0245 0.1054 0.5055 0.8240 2.9840 | lr 3.3e-04 | norm 0.1342 | dt 0.011
type train | step 5230 | loss 0.0246 0.1057 0.5074 0.8145 2.9812 | lr 3.3e-04 | norm 0.1945 | dt 0.011
type train | step 5240 | loss 0.0247 0.1058 0.5022 0.8092 2.9576 | lr 3.3e-04 | norm 0.2162 | dt 0.011
type train | step 5250 | loss 0.0246 0.1058 0.5083 0.8213 2.9795 | lr 3.3e-04 | norm 0.1631 | dt 0.011
type train | step 5260 | loss 0.0249 0.1068 0.5086 0.8379 3.0179 | lr 3.2e-04 | norm 0.1847 | dt 0.013
type train | step 5270 | loss 0.0246 0.1055 0.5089 0.8222 2.9761 | lr 3.2e-04 | norm 0.1446 | dt 0.011
type train | step 5280 | loss 0.0246 0.1065 0.5034 0.8172 2.9530 | lr 3.2e-04 | norm 0.2625 | dt 0.011
type train | step 5290 | loss 0.0246 0.1058 0.5074 0.8225 2.9770 | lr 3.2e-04 | norm 0.1573 | dt 0.011
type train | step 5300 | loss 0.0247 0.1064 0.5056 0.8229 3.0008 | lr 3.2e-04 | norm 0.2635 | dt 0.011
type train | step 5310 | loss 0.0246 0.1058 0.5120 0.8291 3.0002 | lr 3.1e-04 | norm 0.2561 | dt 0.011
type train | step 5320 | loss 0.0247 0.1058 0.5034 0.8142 2.9631 | lr 3.1e-04 | norm 0.1522 | dt 0.011
type train | step 5330 | loss 0.0247 0.1062 0.5100 0.8251 2.9775 | lr 3.1e-04 | norm 0.1746 | dt 0.011
type train | step 5340 | loss 0.0246 0.1059 0.4993 0.8114 2.9446 | lr 3.1e-04 | norm 0.2827 | dt 0.011
type train | step 5350 | loss 0.0248 0.1060 0.5070 0.8281 2.9755 | lr 3.1e-04 | norm 0.1549 | dt 0.011
type train | step 5360 | loss 0.0247 0.1061 0.5078 0.8221 2.9967 | lr 3.1e-04 | norm 0.1633 | dt 0.011
type train | step 5370 | loss 0.0246 0.1060 0.5076 0.8282 3.0002 | lr 3.0e-04 | norm 0.1720 | dt 0.012
type train | step 5380 | loss 0.0247 0.1061 0.5068 0.8238 2.9790 | lr 3.0e-04 | norm 0.1825 | dt 0.011
type train | step 5390 | loss 0.0246 0.1059 0.5101 0.8292 2.9936 | lr 3.0e-04 | norm 0.1522 | dt 0.011
type train | step 5400 | loss 0.0246 0.1058 0.5078 0.8331 2.9861 | lr 3.0e-04 | norm 0.1926 | dt 0.011
type train | step 5410 | loss 0.0250 0.1060 0.5097 0.8343 3.0027 | lr 3.0e-04 | norm 0.2649 | dt 0.011
type train | step 5420 | loss 0.0245 0.1051 0.5047 0.8126 2.9603 | lr 2.9e-04 | norm 0.3267 | dt 0.011
type train | step 5430 | loss 0.0245 0.1055 0.5105 0.8265 2.9796 | lr 2.9e-04 | norm 0.3205 | dt 0.011
type train | step 5440 | loss 0.0249 0.1064 0.5076 0.8263 2.9837 | lr 2.9e-04 | norm 0.1619 | dt 0.011
type train | step 5450 | loss 0.0246 0.1058 0.5107 0.8346 3.0104 | lr 2.9e-04 | norm 0.1711 | dt 0.011
type train | step 5460 | loss 0.0245 0.1065 0.5039 0.8202 2.9932 | lr 2.9e-04 | norm 0.2406 | dt 0.011
type train | step 5470 | loss 0.0247 0.1058 0.5077 0.8278 2.9695 | lr 2.9e-04 | norm 0.1842 | dt 0.011
type train | step 5480 | loss 0.0244 0.1048 0.5059 0.8127 2.9557 | lr 2.8e-04 | norm 0.1928 | dt 0.011
type train | step 5490 | loss 0.0246 0.1057 0.5074 0.8296 3.0024 | lr 2.8e-04 | norm 0.2847 | dt 0.014
type train | step 5500 | loss 0.0246 0.1054 0.5081 0.8184 2.9709 | lr 2.8e-04 | norm 0.1426 | dt 0.011
type train | step 5510 | loss 0.0246 0.1054 0.5108 0.8263 3.0005 | lr 2.8e-04 | norm 0.1475 | dt 0.011
type train | step 5520 | loss 0.0245 0.1057 0.5017 0.8169 2.9674 | lr 2.8e-04 | norm 0.1453 | dt 0.011
type train | step 5530 | loss 0.0250 0.1068 0.5101 0.8413 3.0142 | lr 2.8e-04 | norm 0.1516 | dt 0.011
type train | step 5540 | loss 0.0245 0.1056 0.5069 0.8108 2.9625 | lr 2.7e-04 | norm 0.2086 | dt 0.011
type train | step 5550 | loss 0.0243 0.1053 0.5042 0.8159 2.9712 | lr 2.7e-04 | norm 0.2091 | dt 0.011
type train | step 5560 | loss 0.0245 0.1052 0.5056 0.8162 2.9668 | lr 2.7e-04 | norm 0.2219 | dt 0.011
type train | step 5570 | loss 0.0248 0.1064 0.5119 0.8421 3.0301 | lr 2.7e-04 | norm 0.1816 | dt 0.011
type train | step 5580 | loss 0.0245 0.1059 0.5033 0.8214 2.9830 | lr 2.7e-04 | norm 0.1533 | dt 0.011
type train | step 5590 | loss 0.0244 0.1051 0.5008 0.8106 2.9537 | lr 2.7e-04 | norm 0.1731 | dt 0.011
type train | step 5600 | loss 0.0246 0.1059 0.5049 0.8172 2.9762 | lr 2.6e-04 | norm 0.2432 | dt 0.011
type train | step 5610 | loss 0.0248 0.1066 0.5083 0.8174 2.9671 | lr 2.6e-04 | norm 0.1865 | dt 0.011
type train | step 5620 | loss 0.0247 0.1057 0.5050 0.8183 2.9705 | lr 2.6e-04 | norm 0.1241 | dt 0.011
type train | step 5630 | loss 0.0251 0.1080 0.5135 0.8358 2.9816 | lr 2.6e-04 | norm 0.2582 | dt 0.011
type train | step 5640 | loss 0.0246 0.1064 0.5063 0.8262 3.0088 | lr 2.6e-04 | norm 0.1591 | dt 0.011
type train | step 5650 | loss 0.0246 0.1055 0.5067 0.8235 2.9904 | lr 2.6e-04 | norm 0.2048 | dt 0.011
type train | step 5660 | loss 0.0243 0.1052 0.5062 0.8124 2.9672 | lr 2.6e-04 | norm 0.1590 | dt 0.011
type train | step 5670 | loss 0.0247 0.1061 0.5097 0.8218 2.9682 | lr 2.5e-04 | norm 0.1554 | dt 0.011
type train | step 5680 | loss 0.0245 0.1058 0.5070 0.8220 2.9902 | lr 2.5e-04 | norm 0.2975 | dt 0.011
type train | step 5690 | loss 0.0249 0.1071 0.5111 0.8376 3.0134 | lr 2.5e-04 | norm 0.1611 | dt 0.011
type train | step 5700 | loss 0.0248 0.1064 0.5065 0.8248 2.9873 | lr 2.5e-04 | norm 0.1399 | dt 0.011
type train | step 5710 | loss 0.0245 0.1053 0.5062 0.8262 2.9749 | lr 2.5e-04 | norm 0.1427 | dt 0.011
type train | step 5720 | loss 0.0244 0.1047 0.5060 0.8105 2.9531 | lr 2.5e-04 | norm 0.1477 | dt 0.012
type train | step 5730 | loss 0.0247 0.1064 0.5146 0.8281 2.9898 | lr 2.4e-04 | norm 0.1819 | dt 0.011
type train | step 5740 | loss 0.0246 0.1058 0.5041 0.8142 2.9774 | lr 2.4e-04 | norm 0.2394 | dt 0.011
type train | step 5750 | loss 0.0246 0.1054 0.5070 0.8163 2.9685 | lr 2.4e-04 | norm 0.1691 | dt 0.011
type train | step 5760 | loss 0.0247 0.1063 0.5054 0.8183 2.9527 | lr 2.4e-04 | norm 0.1518 | dt 0.011
type train | step 5770 | loss 0.0248 0.1062 0.5065 0.8313 3.0201 | lr 2.4e-04 | norm 0.2036 | dt 0.011
type train | step 5780 | loss 0.0245 0.1054 0.5031 0.8069 2.9558 | lr 2.4e-04 | norm 0.3412 | dt 0.011
type train | step 5790 | loss 0.0246 0.1060 0.5144 0.8211 2.9775 | lr 2.4e-04 | norm 0.2017 | dt 0.011
type train | step 5800 | loss 0.0249 0.1062 0.5058 0.8227 2.9909 | lr 2.3e-04 | norm 0.1573 | dt 0.011
type train | step 5810 | loss 0.0247 0.1061 0.5043 0.8191 2.9872 | lr 2.3e-04 | norm 0.1841 | dt 0.011
type train | step 5820 | loss 0.0246 0.1057 0.5092 0.8268 2.9779 | lr 2.3e-04 | norm 0.1343 | dt 0.011
type train | step 5830 | loss 0.0245 0.1052 0.5051 0.8231 2.9816 | lr 2.3e-04 | norm 0.1296 | dt 0.011
type train | step 5840 | loss 0.0245 0.1055 0.5069 0.8133 2.9780 | lr 2.3e-04 | norm 0.2217 | dt 0.011
type train | step 5850 | loss 0.0246 0.1056 0.5017 0.8082 2.9548 | lr 2.3e-04 | norm 0.2024 | dt 0.011
type train | step 5860 | loss 0.0246 0.1056 0.5078 0.8203 2.9772 | lr 2.2e-04 | norm 0.1580 | dt 0.011
type train | step 5870 | loss 0.0249 0.1065 0.5081 0.8368 3.0151 | lr 2.2e-04 | norm 0.2073 | dt 0.011
type train | step 5880 | loss 0.0245 0.1053 0.5086 0.8213 2.9739 | lr 2.2e-04 | norm 0.1505 | dt 0.011
type train | step 5890 | loss 0.0245 0.1063 0.5029 0.8163 2.9505 | lr 2.2e-04 | norm 0.3003 | dt 0.011
type train | step 5900 | loss 0.0246 0.1056 0.5070 0.8214 2.9744 | lr 2.2e-04 | norm 0.1247 | dt 0.011
type train | step 5910 | loss 0.0246 0.1062 0.5052 0.8219 2.9977 | lr 2.2e-04 | norm 0.2431 | dt 0.011
type train | step 5920 | loss 0.0245 0.1056 0.5116 0.8282 2.9973 | lr 2.2e-04 | norm 0.1520 | dt 0.011
type train | step 5930 | loss 0.0246 0.1056 0.5030 0.8132 2.9602 | lr 2.1e-04 | norm 0.1843 | dt 0.011
type train | step 5940 | loss 0.0246 0.1059 0.5097 0.8242 2.9745 | lr 2.1e-04 | norm 0.1311 | dt 0.011
type train | step 5950 | loss 0.0245 0.1056 0.4988 0.8104 2.9418 | lr 2.1e-04 | norm 0.2681 | dt 0.011
type train | step 5960 | loss 0.0248 0.1058 0.5067 0.8275 2.9732 | lr 2.1e-04 | norm 0.1504 | dt 0.011
type train | step 5970 | loss 0.0246 0.1059 0.5074 0.8212 2.9939 | lr 2.1e-04 | norm 0.1790 | dt 0.011
type train | step 5980 | loss 0.0246 0.1058 0.5072 0.8274 2.9976 | lr 2.1e-04 | norm 0.1706 | dt 0.011
type train | step 5990 | loss 0.0247 0.1059 0.5063 0.8228 2.9765 | lr 2.1e-04 | norm 0.1558 | dt 0.011
type train | step 6000 | loss 0.0246 0.1057 0.5098 0.8283 2.9911 | lr 2.1e-04 | norm 0.1989 | dt 0.011
type train | step 6010 | loss 0.0245 0.1056 0.5074 0.8323 2.9834 | lr 2.0e-04 | norm 0.2068 | dt 0.011
type train | step 6020 | loss 0.0249 0.1058 0.5093 0.8334 3.0008 | lr 2.0e-04 | norm 0.3458 | dt 0.011
type train | step 6030 | loss 0.0244 0.1050 0.5043 0.8118 2.9569 | lr 2.0e-04 | norm 0.2874 | dt 0.011
type train | step 6040 | loss 0.0245 0.1052 0.5100 0.8255 2.9763 | lr 2.0e-04 | norm 0.1482 | dt 0.011
type train | step 6050 | loss 0.0249 0.1062 0.5071 0.8254 2.9809 | lr 2.0e-04 | norm 0.2629 | dt 0.011
type train | step 6060 | loss 0.0246 0.1056 0.5103 0.8338 3.0074 | lr 2.0e-04 | norm 0.1856 | dt 0.011
type train | step 6070 | loss 0.0245 0.1062 0.5033 0.8192 2.9901 | lr 2.0e-04 | norm 0.2415 | dt 0.011
type train | step 6080 | loss 0.0247 0.1055 0.5073 0.8268 2.9672 | lr 1.9e-04 | norm 0.1332 | dt 0.011
type train | step 6090 | loss 0.0243 0.1046 0.5056 0.8119 2.9531 | lr 1.9e-04 | norm 0.2383 | dt 0.011
type train | step 6100 | loss 0.0245 0.1055 0.5070 0.8290 3.0003 | lr 1.9e-04 | norm 0.1826 | dt 0.013
type train | step 6110 | loss 0.0245 0.1052 0.5077 0.8177 2.9685 | lr 1.9e-04 | norm 0.1460 | dt 0.011
type train | step 6120 | loss 0.0245 0.1053 0.5104 0.8255 2.9984 | lr 1.9e-04 | norm 0.1854 | dt 0.011
type train | step 6130 | loss 0.0244 0.1055 0.5012 0.8160 2.9649 | lr 1.9e-04 | norm 0.1643 | dt 0.011
type train | step 6140 | loss 0.0249 0.1066 0.5097 0.8404 3.0119 | lr 1.9e-04 | norm 0.2159 | dt 0.011
type train | step 6150 | loss 0.0244 0.1054 0.5065 0.8100 2.9598 | lr 1.9e-04 | norm 0.2875 | dt 0.011
type train | step 6160 | loss 0.0243 0.1051 0.5038 0.8152 2.9689 | lr 1.8e-04 | norm 0.2127 | dt 0.011
type train | step 6170 | loss 0.0244 0.1050 0.5053 0.8155 2.9647 | lr 1.8e-04 | norm 0.1612 | dt 0.011
type train | step 6180 | loss 0.0247 0.1062 0.5116 0.8413 3.0275 | lr 1.8e-04 | norm 0.1465 | dt 0.011
type train | step 6190 | loss 0.0244 0.1057 0.5028 0.8204 2.9805 | lr 1.8e-04 | norm 0.1379 | dt 0.011
type train | step 6200 | loss 0.0244 0.1049 0.5005 0.8099 2.9517 | lr 1.8e-04 | norm 0.2126 | dt 0.011
type train | step 6210 | loss 0.0245 0.1057 0.5045 0.8164 2.9734 | lr 1.8e-04 | norm 0.1337 | dt 0.011
type train | step 6220 | loss 0.0248 0.1065 0.5080 0.8166 2.9642 | lr 1.8e-04 | norm 0.1714 | dt 0.011
type train | step 6230 | loss 0.0246 0.1056 0.5047 0.8177 2.9683 | lr 1.8e-04 | norm 0.1249 | dt 0.011
type train | step 6240 | loss 0.0250 0.1078 0.5131 0.8349 2.9788 | lr 1.8e-04 | norm 0.1946 | dt 0.011
type train | step 6250 | loss 0.0246 0.1062 0.5058 0.8254 3.0063 | lr 1.7e-04 | norm 0.1550 | dt 0.012
type train | step 6260 | loss 0.0246 0.1053 0.5064 0.8228 2.9885 | lr 1.7e-04 | norm 0.2260 | dt 0.012
type train | step 6270 | loss 0.0243 0.1050 0.5058 0.8116 2.9646 | lr 1.7e-04 | norm 0.1984 | dt 0.012
type train | step 6280 | loss 0.0247 0.1059 0.5094 0.8211 2.9659 | lr 1.7e-04 | norm 0.1518 | dt 0.012
type train | step 6290 | loss 0.0245 0.1056 0.5066 0.8213 2.9881 | lr 1.7e-04 | norm 0.2617 | dt 0.012
type train | step 6300 | loss 0.0248 0.1069 0.5107 0.8367 3.0109 | lr 1.7e-04 | norm 0.1472 | dt 0.012
type train | step 6310 | loss 0.0247 0.1062 0.5062 0.8242 2.9851 | lr 1.7e-04 | norm 0.1529 | dt 0.012
type train | step 6320 | loss 0.0245 0.1052 0.5059 0.8256 2.9728 | lr 1.7e-04 | norm 0.1190 | dt 0.012
type train | step 6330 | loss 0.0243 0.1046 0.5057 0.8098 2.9511 | lr 1.7e-04 | norm 0.2720 | dt 0.012
type train | step 6340 | loss 0.0246 0.1062 0.5143 0.8274 2.9877 | lr 1.6e-04 | norm 0.1447 | dt 0.012
type train | step 6350 | loss 0.0246 0.1056 0.5038 0.8136 2.9756 | lr 1.6e-04 | norm 0.2126 | dt 0.012
type train | step 6360 | loss 0.0246 0.1052 0.5066 0.8157 2.9664 | lr 1.6e-04 | norm 0.1653 | dt 0.012
type train | step 6370 | loss 0.0246 0.1061 0.5052 0.8176 2.9505 | lr 1.6e-04 | norm 0.1158 | dt 0.012
type train | step 6380 | loss 0.0248 0.1061 0.5063 0.8306 3.0177 | lr 1.6e-04 | norm 0.1990 | dt 0.012
type train | step 6390 | loss 0.0245 0.1053 0.5028 0.8060 2.9530 | lr 1.6e-04 | norm 0.2463 | dt 0.012
type train | step 6400 | loss 0.0246 0.1058 0.5141 0.8203 2.9749 | lr 1.6e-04 | norm 0.1507 | dt 0.013
type train | step 6410 | loss 0.0248 0.1061 0.5055 0.8220 2.9887 | lr 1.6e-04 | norm 0.1471 | dt 0.012
type train | step 6420 | loss 0.0247 0.1059 0.5040 0.8185 2.9854 | lr 1.6e-04 | norm 0.2621 | dt 0.012
type train | step 6430 | loss 0.0246 0.1056 0.5090 0.8263 2.9760 | lr 1.5e-04 | norm 0.1702 | dt 0.012
type train | step 6440 | loss 0.0244 0.1050 0.5049 0.8225 2.9797 | lr 1.5e-04 | norm 0.2168 | dt 0.012
type train | step 6450 | loss 0.0245 0.1054 0.5066 0.8125 2.9751 | lr 1.5e-04 | norm 0.1990 | dt 0.012
type train | step 6460 | loss 0.0245 0.1054 0.5014 0.8075 2.9528 | lr 1.5e-04 | norm 0.1730 | dt 0.011
type train | step 6470 | loss 0.0245 0.1055 0.5075 0.8196 2.9749 | lr 1.5e-04 | norm 0.1402 | dt 0.011
type train | step 6480 | loss 0.0248 0.1064 0.5078 0.8360 3.0129 | lr 1.5e-04 | norm 0.1684 | dt 0.011
type train | step 6490 | loss 0.0245 0.1051 0.5083 0.8208 2.9723 | lr 1.5e-04 | norm 0.2067 | dt 0.011
type train | step 6500 | loss 0.0245 0.1061 0.5026 0.8158 2.9481 | lr 1.5e-04 | norm 0.1953 | dt 0.011
type train | step 6510 | loss 0.0245 0.1054 0.5067 0.8208 2.9723 | lr 1.5e-04 | norm 0.2077 | dt 0.011
type train | step 6520 | loss 0.0246 0.1061 0.5049 0.8212 2.9952 | lr 1.5e-04 | norm 0.1941 | dt 0.011
type train | step 6530 | loss 0.0245 0.1055 0.5114 0.8277 2.9954 | lr 1.5e-04 | norm 0.1197 | dt 0.011
type train | step 6540 | loss 0.0246 0.1055 0.5026 0.8127 2.9584 | lr 1.4e-04 | norm 0.1450 | dt 0.012
type train | step 6550 | loss 0.0246 0.1058 0.5094 0.8236 2.9729 | lr 1.4e-04 | norm 0.1414 | dt 0.011
type train | step 6560 | loss 0.0245 0.1054 0.4984 0.8098 2.9399 | lr 1.4e-04 | norm 0.1619 | dt 0.011
type train | step 6570 | loss 0.0247 0.1057 0.5065 0.8271 2.9719 | lr 1.4e-04 | norm 0.1684 | dt 0.011
type train | step 6580 | loss 0.0246 0.1057 0.5070 0.8206 2.9917 | lr 1.4e-04 | norm 0.2003 | dt 0.011
type train | step 6590 | loss 0.0245 0.1057 0.5070 0.8268 2.9957 | lr 1.4e-04 | norm 0.1563 | dt 0.011
type train | step 6600 | loss 0.0246 0.1057 0.5060 0.8222 2.9746 | lr 1.4e-04 | norm 0.1890 | dt 0.011
type train | step 6610 | loss 0.0245 0.1056 0.5096 0.8278 2.9897 | lr 1.4e-04 | norm 0.1432 | dt 0.011
type train | step 6620 | loss 0.0245 0.1054 0.5070 0.8317 2.9820 | lr 1.4e-04 | norm 0.2604 | dt 0.011
type train | step 6630 | loss 0.0249 0.1056 0.5091 0.8330 2.9995 | lr 1.4e-04 | norm 0.1913 | dt 0.011
type train | step 6640 | loss 0.0244 0.1048 0.5040 0.8112 2.9545 | lr 1.4e-04 | norm 0.1702 | dt 0.011
type train | step 6650 | loss 0.0244 0.1050 0.5098 0.8249 2.9743 | lr 1.3e-04 | norm 0.2393 | dt 0.011
type train | step 6660 | loss 0.0248 0.1060 0.5068 0.8247 2.9795 | lr 1.3e-04 | norm 0.2797 | dt 0.011
type train | step 6670 | loss 0.0245 0.1055 0.5100 0.8332 3.0052 | lr 1.3e-04 | norm 0.1420 | dt 0.011
type train | step 6680 | loss 0.0244 0.1060 0.5029 0.8185 2.9882 | lr 1.3e-04 | norm 0.2090 | dt 0.011
type train | step 6690 | loss 0.0247 0.1054 0.5071 0.8264 2.9663 | lr 1.3e-04 | norm 0.2273 | dt 0.011
type train | step 6700 | loss 0.0243 0.1045 0.5053 0.8114 2.9511 | lr 1.3e-04 | norm 0.1416 | dt 0.011
type train | step 6710 | loss 0.0245 0.1054 0.5068 0.8285 2.9987 | lr 1.3e-04 | norm 0.1663 | dt 0.014
type train | step 6720 | loss 0.0245 0.1050 0.5074 0.8172 2.9673 | lr 1.3e-04 | norm 0.1330 | dt 0.011
type train | step 6730 | loss 0.0245 0.1051 0.5101 0.8250 2.9968 | lr 1.3e-04 | norm 0.2052 | dt 0.011
type train | step 6740 | loss 0.0244 0.1053 0.5009 0.8155 2.9629 | lr 1.3e-04 | norm 0.1423 | dt 0.011
type train | step 6750 | loss 0.0249 0.1064 0.5095 0.8399 3.0109 | lr 1.3e-04 | norm 0.2557 | dt 0.011
type train | step 6760 | loss 0.0244 0.1053 0.5063 0.8094 2.9576 | lr 1.3e-04 | norm 0.1427 | dt 0.011
type train | step 6770 | loss 0.0242 0.1050 0.5035 0.8148 2.9674 | lr 1.3e-04 | norm 0.1554 | dt 0.011
type train | step 6780 | loss 0.0244 0.1049 0.5051 0.8151 2.9636 | lr 1.3e-04 | norm 0.1126 | dt 0.011
type train | step 6790 | loss 0.0247 0.1061 0.5113 0.8409 3.0259 | lr 1.2e-04 | norm 0.1360 | dt 0.011
type train | step 6800 | loss 0.0244 0.1055 0.5026 0.8199 2.9787 | lr 1.2e-04 | norm 0.1703 | dt 0.011
type train | step 6810 | loss 0.0243 0.1047 0.5004 0.8095 2.9501 | lr 1.2e-04 | norm 0.1890 | dt 0.011
type train | step 6820 | loss 0.0245 0.1056 0.5043 0.8159 2.9716 | lr 1.2e-04 | norm 0.1462 | dt 0.011
type train | step 6830 | loss 0.0248 0.1063 0.5078 0.8161 2.9627 | lr 1.2e-04 | norm 0.1655 | dt 0.011
type train | step 6840 | loss 0.0246 0.1055 0.5045 0.8174 2.9671 | lr 1.2e-04 | norm 0.1538 | dt 0.011
type train | step 6850 | loss 0.0250 0.1077 0.5128 0.8342 2.9770 | lr 1.2e-04 | norm 0.1902 | dt 0.011
type train | step 6860 | loss 0.0246 0.1060 0.5055 0.8249 3.0045 | lr 1.2e-04 | norm 0.1629 | dt 0.011
type train | step 6870 | loss 0.0245 0.1052 0.5063 0.8225 2.9868 | lr 1.2e-04 | norm 0.1318 | dt 0.011
type train | step 6880 | loss 0.0242 0.1049 0.5056 0.8111 2.9627 | lr 1.2e-04 | norm 0.1560 | dt 0.011
type train | step 6890 | loss 0.0247 0.1058 0.5092 0.8206 2.9643 | lr 1.2e-04 | norm 0.1430 | dt 0.011
type train | step 6900 | loss 0.0244 0.1054 0.5065 0.8208 2.9867 | lr 1.2e-04 | norm 0.3096 | dt 0.011
type train | step 6910 | loss 0.0248 0.1068 0.5104 0.8362 3.0097 | lr 1.2e-04 | norm 0.2330 | dt 0.011
type train | step 6920 | loss 0.0247 0.1061 0.5061 0.8238 2.9835 | lr 1.2e-04 | norm 0.1492 | dt 0.011
type train | step 6930 | loss 0.0244 0.1051 0.5058 0.8254 2.9715 | lr 1.2e-04 | norm 0.1397 | dt 0.011
type train | step 6940 | loss 0.0243 0.1045 0.5055 0.8094 2.9492 | lr 1.2e-04 | norm 0.1803 | dt 0.012
type train | step 6950 | loss 0.0246 0.1060 0.5141 0.8269 2.9859 | lr 1.1e-04 | norm 0.1339 | dt 0.011
type train | step 6960 | loss 0.0245 0.1055 0.5036 0.8133 2.9739 | lr 1.1e-04 | norm 0.1842 | dt 0.011
type train | step 6970 | loss 0.0245 0.1051 0.5064 0.8153 2.9648 | lr 1.1e-04 | norm 0.1601 | dt 0.011
type train | step 6980 | loss 0.0246 0.1060 0.5050 0.8172 2.9488 | lr 1.1e-04 | norm 0.1409 | dt 0.011
type train | step 6990 | loss 0.0247 0.1060 0.5061 0.8303 3.0162 | lr 1.1e-04 | norm 0.1688 | dt 0.011
type train | step 7000 | loss 0.0245 0.1052 0.5024 0.8056 2.9506 | lr 1.1e-04 | norm 0.1630 | dt 0.011
type train | step 7010 | loss 0.0245 0.1057 0.5139 0.8199 2.9734 | lr 1.1e-04 | norm 0.1617 | dt 0.011
type train | step 7020 | loss 0.0248 0.1060 0.5053 0.8215 2.9869 | lr 1.1e-04 | norm 0.1341 | dt 0.012
type train | step 7030 | loss 0.0247 0.1058 0.5038 0.8181 2.9834 | lr 1.1e-04 | norm 0.1906 | dt 0.011
type train | step 7040 | loss 0.0246 0.1055 0.5087 0.8260 2.9743 | lr 1.1e-04 | norm 0.1412 | dt 0.011
type train | step 7050 | loss 0.0244 0.1049 0.5047 0.8222 2.9784 | lr 1.1e-04 | norm 0.1811 | dt 0.011
type train | step 7060 | loss 0.0245 0.1052 0.5064 0.8120 2.9736 | lr 1.1e-04 | norm 0.1954 | dt 0.011
type train | step 7070 | loss 0.0245 0.1053 0.5012 0.8073 2.9513 | lr 1.1e-04 | norm 0.1529 | dt 0.011
type train | step 7080 | loss 0.0245 0.1054 0.5074 0.8192 2.9735 | lr 1.1e-04 | norm 0.1626 | dt 0.011
type train | step 7090 | loss 0.0248 0.1062 0.5075 0.8356 3.0111 | lr 1.1e-04 | norm 0.1442 | dt 0.011
type train | step 7100 | loss 0.0244 0.1050 0.5081 0.8205 2.9705 | lr 1.1e-04 | norm 0.1393 | dt 0.011
type train | step 7110 | loss 0.0245 0.1060 0.5024 0.8154 2.9468 | lr 1.1e-04 | norm 0.1935 | dt 0.011
type train | step 7120 | loss 0.0245 0.1053 0.5065 0.8205 2.9711 | lr 1.1e-04 | norm 0.1969 | dt 0.011
type train | step 7130 | loss 0.0246 0.1060 0.5047 0.8209 2.9936 | lr 1.1e-04 | norm 0.1767 | dt 0.011
type train | step 7140 | loss 0.0245 0.1054 0.5112 0.8274 2.9941 | lr 1.1e-04 | norm 0.1459 | dt 0.011
type train | step 7150 | loss 0.0245 0.1054 0.5024 0.8124 2.9569 | lr 1.1e-04 | norm 0.1658 | dt 0.011
type train | step 7160 | loss 0.0245 0.1057 0.5092 0.8233 2.9711 | lr 1.1e-04 | norm 0.1712 | dt 0.011
type train | step 7170 | loss 0.0244 0.1053 0.4983 0.8094 2.9381 | lr 1.1e-04 | norm 0.1707 | dt 0.011
type train | step 7180 | loss 0.0247 0.1056 0.5063 0.8270 2.9707 | lr 1.0e-04 | norm 0.1505 | dt 0.011
type train | step 7190 | loss 0.0246 0.1056 0.5069 0.8203 2.9901 | lr 1.0e-04 | norm 0.1454 | dt 0.011
type train | step 7200 | loss 0.0245 0.1056 0.5069 0.8264 2.9945 | lr 1.0e-04 | norm 0.1474 | dt 0.011
type train | step 7210 | loss 0.0246 0.1056 0.5058 0.8218 2.9732 | lr 1.0e-04 | norm 0.1849 | dt 0.011
type train | step 7220 | loss 0.0245 0.1055 0.5094 0.8274 2.9881 | lr 1.0e-04 | norm 0.1351 | dt 0.011
type train | step 7230 | loss 0.0244 0.1053 0.5069 0.8315 2.9802 | lr 1.0e-04 | norm 0.1984 | dt 0.011
type train | step 7240 | loss 0.0248 0.1055 0.5089 0.8328 2.9986 | lr 1.0e-04 | norm 0.1904 | dt 0.011
type train | step 7250 | loss 0.0244 0.1047 0.5038 0.8108 2.9532 | lr 1.0e-04 | norm 0.1876 | dt 0.011
type train | step 7260 | loss 0.0244 0.1050 0.5096 0.8245 2.9733 | lr 1.0e-04 | norm 0.2011 | dt 0.012
type train | step 7270 | loss 0.0248 0.1059 0.5065 0.8244 2.9780 | lr 1.0e-04 | norm 0.2652 | dt 0.011
type train | step 7280 | loss 0.0245 0.1054 0.5098 0.8329 3.0037 | lr 1.0e-04 | norm 0.1294 | dt 0.012
type train | step 7290 | loss 0.0244 0.1059 0.5028 0.8182 2.9867 | lr 1.0e-04 | norm 0.1739 | dt 0.011
type train | step 7300 | loss 0.0246 0.1053 0.5069 0.8262 2.9651 | lr 1.0e-04 | norm 0.1557 | dt 0.011
type train | step 7310 | loss 0.0243 0.1044 0.5051 0.8111 2.9499 | lr 1.0e-04 | norm 0.1382 | dt 0.013
type train | step 7320 | loss 0.0244 0.1053 0.5068 0.8283 2.9977 | lr 1.0e-04 | norm 0.1594 | dt 0.016
type train | step 7330 | loss 0.0245 0.1049 0.5073 0.8170 2.9661 | lr 1.0e-04 | norm 0.1167 | dt 0.012
type train | step 7340 | loss 0.0244 0.1050 0.5100 0.8246 2.9954 | lr 1.0e-04 | norm 0.1722 | dt 0.012
type train | step 7350 | loss 0.0244 0.1052 0.5008 0.8152 2.9619 | lr 1.0e-04 | norm 0.1412 | dt 0.011
type train | step 7360 | loss 0.0248 0.1063 0.5094 0.8397 3.0098 | lr 1.0e-04 | norm 0.2651 | dt 0.011
type train | step 7370 | loss 0.0243 0.1052 0.5062 0.8092 2.9564 | lr 1.0e-04 | norm 0.1447 | dt 0.013
type train | step 7380 | loss 0.0242 0.1049 0.5034 0.8145 2.9661 | lr 1.0e-04 | norm 0.1516 | dt 0.011
type train | step 7390 | loss 0.0244 0.1048 0.5050 0.8149 2.9628 | lr 1.0e-04 | norm 0.1549 | dt 0.012
type train | step 7400 | loss 0.0247 0.1060 0.5112 0.8406 3.0248 | lr 1.0e-04 | norm 0.1588 | dt 0.011
type train | step 7410 | loss 0.0244 0.1054 0.5025 0.8196 2.9776 | lr 1.0e-04 | norm 0.1467 | dt 0.011
type train | step 7420 | loss 0.0243 0.1047 0.5003 0.8093 2.9492 | lr 1.0e-04 | norm 0.1937 | dt 0.011
type train | step 7430 | loss 0.0245 0.1055 0.5043 0.8156 2.9705 | lr 1.0e-04 | norm 0.1316 | dt 0.011
type train | step 7440 | loss 0.0247 0.1063 0.5077 0.8159 2.9613 | lr 1.0e-04 | norm 0.1572 | dt 0.011
type train | step 7450 | loss 0.0245 0.1054 0.5045 0.8172 2.9659 | lr 1.0e-04 | norm 0.1731 | dt 0.011
type train | step 7460 | loss 0.0249 0.1076 0.5127 0.8338 2.9758 | lr 1.0e-04 | norm 0.1914 | dt 0.011
type train | step 7470 | loss 0.0245 0.1059 0.5054 0.8247 3.0033 | lr 1.0e-04 | norm 0.1804 | dt 0.011
type train | step 7480 | loss 0.0245 0.1051 0.5062 0.8224 2.9860 | lr 1.0e-04 | norm 0.1387 | dt 0.011
type train | step 7490 | loss 0.0242 0.1048 0.5056 0.8109 2.9616 | lr 1.0e-04 | norm 0.1421 | dt 0.011
type train | step 7500 | loss 0.0246 0.1057 0.5092 0.8203 2.9634 | lr 1.0e-04 | norm 0.1411 | dt 0.011