davidquarel's picture
Upload folder using huggingface_hub
5eaa98a verified
type train | step 10 | loss 69.4636 226.5789 250.8517 256.6940 1153.1455 | lr 1.3e-05 | norm 886.2820 | dt 0.018
type train | step 20 | loss 68.1997 222.8179 247.9711 253.0499 1149.1156 | lr 2.7e-05 | norm 894.3831 | dt 0.019
type train | step 30 | loss 66.6820 216.2963 242.8411 251.0308 1125.3365 | lr 4.0e-05 | norm 868.0057 | dt 0.018
type train | step 40 | loss 66.3617 208.2336 235.0398 244.6176 1050.3451 | lr 5.3e-05 | norm 796.7020 | dt 0.018
type train | step 50 | loss 62.1938 197.4808 224.5412 236.7062 1034.7133 | lr 6.7e-05 | norm 794.4216 | dt 0.018
type train | step 60 | loss 59.2433 185.0247 211.9988 224.8304 956.9526 | lr 8.0e-05 | norm 733.3275 | dt 0.018
type train | step 70 | loss 56.1140 171.2373 199.1852 215.3797 903.0059 | lr 9.3e-05 | norm 696.8870 | dt 0.018
type train | step 80 | loss 53.7543 158.4786 188.2167 203.7323 817.3013 | lr 1.1e-04 | norm 617.6252 | dt 0.018
type train | step 90 | loss 49.1172 142.7348 172.4434 193.9439 769.2914 | lr 1.2e-04 | norm 581.0894 | dt 0.018
type train | step 100 | loss 44.8693 128.0235 157.2219 177.9275 680.1750 | lr 1.3e-04 | norm 524.4868 | dt 0.018
type train | step 110 | loss 41.6606 113.5900 142.0942 167.6646 614.4323 | lr 1.5e-04 | norm 459.5229 | dt 0.018
type train | step 120 | loss 39.1014 99.7290 128.9482 156.4952 538.9907 | lr 1.6e-04 | norm 391.7111 | dt 0.018
type train | step 130 | loss 34.2333 85.3391 114.0419 142.0932 476.9624 | lr 1.7e-04 | norm 347.3186 | dt 0.018
type train | step 140 | loss 32.3965 74.5033 102.4195 132.5119 411.1675 | lr 1.9e-04 | norm 297.0230 | dt 0.018
type train | step 150 | loss 26.6403 61.2950 87.1429 117.0333 363.1526 | lr 2.0e-04 | norm 261.2342 | dt 0.018
type train | step 160 | loss 22.6446 51.0130 75.2246 102.4038 301.4481 | lr 2.1e-04 | norm 223.7403 | dt 0.018
type train | step 170 | loss 18.4608 40.8693 63.0367 88.4909 253.7374 | lr 2.3e-04 | norm 190.5051 | dt 0.018
type train | step 180 | loss 16.9705 33.6819 53.5488 80.8746 221.5518 | lr 2.4e-04 | norm 162.3525 | dt 0.018
type train | step 190 | loss 13.2541 26.1239 44.0511 69.7721 190.0946 | lr 2.5e-04 | norm 142.4153 | dt 0.018
type train | step 200 | loss 11.7273 20.8617 36.2357 58.9314 160.5240 | lr 2.7e-04 | norm 126.1539 | dt 0.018
type train | step 210 | loss 9.1706 15.9394 28.7063 49.1267 135.3334 | lr 2.8e-04 | norm 112.9733 | dt 0.018
type train | step 220 | loss 6.3784 11.5876 21.9930 39.4931 108.4306 | lr 2.9e-04 | norm 98.0527 | dt 0.018
type train | step 230 | loss 4.5263 8.4531 16.3871 30.2598 86.2431 | lr 3.1e-04 | norm 84.8507 | dt 0.018
type train | step 240 | loss 3.8490 6.5626 13.0430 27.0773 79.8424 | lr 3.2e-04 | norm 82.5597 | dt 0.018
type train | step 250 | loss 2.9099 4.7822 9.4725 19.9634 66.4382 | lr 3.3e-04 | norm 71.3396 | dt 0.018
type train | step 260 | loss 2.0588 3.5635 7.0585 15.0222 51.2331 | lr 3.5e-04 | norm 60.0523 | dt 0.018
type train | step 270 | loss 1.5776 2.7857 5.3007 11.4343 43.0835 | lr 3.6e-04 | norm 52.4717 | dt 0.018
type train | step 280 | loss 1.2177 2.2190 4.0215 9.5068 37.3215 | lr 3.7e-04 | norm 46.9859 | dt 0.018
type train | step 290 | loss 0.7455 1.6687 2.9226 6.0741 25.5075 | lr 3.9e-04 | norm 33.5766 | dt 0.018
type train | step 300 | loss 0.5948 1.4152 2.4800 4.9125 22.3764 | lr 4.0e-04 | norm 29.6173 | dt 0.018
type train | step 310 | loss 0.4870 1.2102 2.1670 3.7058 19.2448 | lr 4.1e-04 | norm 24.6486 | dt 0.018
type train | step 320 | loss 0.3746 1.0141 1.9373 3.1380 14.3225 | lr 4.3e-04 | norm 18.3827 | dt 0.018
type train | step 330 | loss 0.2919 0.8577 1.7706 2.5948 10.5112 | lr 4.4e-04 | norm 12.4146 | dt 0.018
type train | step 340 | loss 0.2257 0.7246 1.5766 2.3074 8.2138 | lr 4.5e-04 | norm 8.1415 | dt 0.018
type train | step 350 | loss 0.1994 0.6405 1.4454 2.0842 6.8487 | lr 4.7e-04 | norm 6.2130 | dt 0.018
type train | step 360 | loss 0.1723 0.5604 1.3191 1.8661 5.9103 | lr 4.8e-04 | norm 4.8704 | dt 0.018
type train | step 370 | loss 0.1559 0.4981 1.2278 1.7410 5.0767 | lr 4.9e-04 | norm 3.4502 | dt 0.018
type train | step 380 | loss 0.1488 0.4646 1.1538 1.6798 4.8698 | lr 5.1e-04 | norm 3.8159 | dt 0.023
type train | step 390 | loss 0.1277 0.4035 1.0497 1.4549 3.6398 | lr 5.2e-04 | norm 1.4677 | dt 0.019
type train | step 400 | loss 0.1177 0.3710 0.9695 1.3665 3.3093 | lr 5.3e-04 | norm 1.0679 | dt 0.018
type train | step 410 | loss 0.1104 0.3417 0.9191 1.2880 2.9529 | lr 5.5e-04 | norm 0.7334 | dt 0.018
type train | step 420 | loss 0.1025 0.3170 0.8595 1.2216 2.8003 | lr 5.6e-04 | norm 0.8519 | dt 0.018
type train | step 430 | loss 0.0953 0.2916 0.8185 1.1490 2.5262 | lr 5.7e-04 | norm 0.5294 | dt 0.018
type train | step 440 | loss 0.0904 0.2748 0.7711 1.0751 2.3714 | lr 5.9e-04 | norm 0.4445 | dt 0.019
type train | step 450 | loss 0.0859 0.2614 0.7437 1.0359 2.2252 | lr 6.0e-04 | norm 0.3785 | dt 0.018
type train | step 460 | loss 0.0811 0.2460 0.6982 0.9798 2.0998 | lr 6.1e-04 | norm 0.3374 | dt 0.019
type train | step 470 | loss 0.0790 0.2378 0.6846 0.9601 1.9977 | lr 6.3e-04 | norm 0.3118 | dt 0.018
type train | step 480 | loss 0.0740 0.2241 0.6524 0.9228 1.9176 | lr 6.4e-04 | norm 0.3128 | dt 0.018
type train | step 490 | loss 0.0701 0.2136 0.6277 0.8918 1.8196 | lr 6.5e-04 | norm 0.3199 | dt 0.018
type train | step 500 | loss 0.0676 0.2055 0.6086 0.8596 1.7490 | lr 6.7e-04 | norm 0.2709 | dt 0.018
type train | step 510 | loss 0.0640 0.1964 0.5873 0.8365 1.6768 | lr 6.8e-04 | norm 0.3042 | dt 0.018
type train | step 520 | loss 0.0611 0.1875 0.5639 0.8139 1.6041 | lr 6.9e-04 | norm 0.2596 | dt 0.018
type train | step 530 | loss 0.0606 0.1842 0.5559 0.7943 1.5613 | lr 7.1e-04 | norm 0.2156 | dt 0.018
type train | step 540 | loss 0.0559 0.1723 0.5272 0.7619 1.4930 | lr 7.2e-04 | norm 0.1883 | dt 0.018
type train | step 550 | loss 0.0538 0.1665 0.5176 0.7531 1.4499 | lr 7.3e-04 | norm 0.2875 | dt 0.018
type train | step 560 | loss 0.0534 0.1641 0.5058 0.7313 1.4123 | lr 7.5e-04 | norm 0.2460 | dt 0.018
type train | step 570 | loss 0.0505 0.1567 0.4917 0.7192 1.3694 | lr 7.6e-04 | norm 0.2039 | dt 0.018
type train | step 580 | loss 0.0482 0.1516 0.4714 0.6964 1.3353 | lr 7.7e-04 | norm 0.1957 | dt 0.018
type train | step 590 | loss 0.0476 0.1479 0.4661 0.6865 1.2945 | lr 7.9e-04 | norm 0.1735 | dt 0.018
type train | step 600 | loss 0.0448 0.1401 0.4472 0.6638 1.2605 | lr 8.0e-04 | norm 0.1817 | dt 0.018
type train | step 610 | loss 0.0438 0.1376 0.4396 0.6592 1.2431 | lr 8.1e-04 | norm 0.2034 | dt 0.019
type train | step 620 | loss 0.0427 0.1333 0.4312 0.6423 1.2101 | lr 8.3e-04 | norm 0.2978 | dt 0.018
type train | step 630 | loss 0.0414 0.1295 0.4235 0.6374 1.1932 | lr 8.4e-04 | norm 0.2064 | dt 0.018
type train | step 640 | loss 0.0403 0.1263 0.4073 0.6205 1.1666 | lr 8.5e-04 | norm 0.2201 | dt 0.018
type train | step 650 | loss 0.0410 0.1273 0.4121 0.6271 1.1645 | lr 8.7e-04 | norm 0.1735 | dt 0.018
type train | step 660 | loss 0.0385 0.1200 0.3961 0.5986 1.1275 | lr 8.8e-04 | norm 0.1498 | dt 0.018
type train | step 670 | loss 0.0371 0.1164 0.3851 0.5909 1.1092 | lr 8.9e-04 | norm 0.1515 | dt 0.018
type train | step 680 | loss 0.0367 0.1138 0.3803 0.5826 1.0908 | lr 9.1e-04 | norm 0.2868 | dt 0.018
type train | step 690 | loss 0.0369 0.1140 0.3835 0.5924 1.0937 | lr 9.2e-04 | norm 0.1878 | dt 0.018
type train | step 700 | loss 0.0357 0.1106 0.3694 0.5723 1.0684 | lr 9.3e-04 | norm 0.3153 | dt 0.018
type train | step 710 | loss 0.0350 0.1072 0.3616 0.5592 1.0431 | lr 9.5e-04 | norm 0.1431 | dt 0.018
type train | step 720 | loss 0.0347 0.1068 0.3607 0.5571 1.0391 | lr 9.6e-04 | norm 0.1545 | dt 0.018
type train | step 730 | loss 0.0346 0.1057 0.3598 0.5542 1.0308 | lr 9.7e-04 | norm 0.1393 | dt 0.018
type train | step 740 | loss 0.0338 0.1026 0.3517 0.5476 1.0176 | lr 9.9e-04 | norm 0.2018 | dt 0.018
type train | step 750 | loss 0.0346 0.1048 0.3608 0.5531 1.0136 | lr 1.0e-03 | norm 0.3024 | dt 0.018
type train | step 760 | loss 0.0332 0.1007 0.3462 0.5398 1.0070 | lr 1.0e-03 | norm 0.1674 | dt 0.018
type train | step 770 | loss 0.0327 0.0982 0.3425 0.5328 0.9883 | lr 1.0e-03 | norm 0.1503 | dt 0.018
type train | step 780 | loss 0.0320 0.0964 0.3376 0.5223 0.9753 | lr 1.0e-03 | norm 0.2171 | dt 0.018
type train | step 790 | loss 0.0323 0.0964 0.3391 0.5261 0.9738 | lr 1.0e-03 | norm 0.1257 | dt 0.018
type train | step 800 | loss 0.0317 0.0949 0.3327 0.5201 0.9673 | lr 1.0e-03 | norm 0.1716 | dt 0.018
type train | step 810 | loss 0.0321 0.0955 0.3367 0.5272 0.9673 | lr 1.0e-03 | norm 0.1156 | dt 0.018
type train | step 820 | loss 0.0317 0.0939 0.3289 0.5142 0.9521 | lr 1.0e-03 | norm 0.1073 | dt 0.018
type train | step 830 | loss 0.0311 0.0916 0.3256 0.5094 0.9415 | lr 1.0e-03 | norm 0.1762 | dt 0.018
type train | step 840 | loss 0.0306 0.0900 0.3225 0.5011 0.9338 | lr 1.0e-03 | norm 0.2765 | dt 0.018
type train | step 850 | loss 0.0309 0.0911 0.3284 0.5082 0.9395 | lr 1.0e-03 | norm 0.1067 | dt 0.018
type train | step 860 | loss 0.0306 0.0897 0.3183 0.4968 0.9312 | lr 1.0e-03 | norm 0.2106 | dt 0.018
type train | step 870 | loss 0.0306 0.0886 0.3186 0.4964 0.9252 | lr 1.0e-03 | norm 0.1065 | dt 0.018
type train | step 880 | loss 0.0305 0.0892 0.3164 0.4939 0.9167 | lr 1.0e-03 | norm 0.1697 | dt 0.018
type train | step 890 | loss 0.0306 0.0890 0.3174 0.4981 0.9259 | lr 1.0e-03 | norm 0.1404 | dt 0.018
type train | step 900 | loss 0.0299 0.0872 0.3114 0.4870 0.9140 | lr 1.0e-03 | norm 0.2454 | dt 0.018
type train | step 910 | loss 0.0300 0.0868 0.3174 0.4915 0.9141 | lr 1.0e-03 | norm 0.1200 | dt 0.018
type train | step 920 | loss 0.0302 0.0871 0.3113 0.4892 0.9152 | lr 1.0e-03 | norm 0.2026 | dt 0.018
type train | step 930 | loss 0.0298 0.0860 0.3090 0.4855 0.9092 | lr 1.0e-03 | norm 0.1073 | dt 0.018
type train | step 940 | loss 0.0297 0.0854 0.3101 0.4864 0.9008 | lr 1.0e-03 | norm 0.2066 | dt 0.018
type train | step 950 | loss 0.0294 0.0844 0.3066 0.4812 0.8966 | lr 1.0e-03 | norm 0.1644 | dt 0.018
type train | step 960 | loss 0.0293 0.0846 0.3070 0.4804 0.9002 | lr 1.0e-03 | norm 0.1300 | dt 0.018
type train | step 970 | loss 0.0293 0.0838 0.3024 0.4727 0.8890 | lr 1.0e-03 | norm 0.0967 | dt 0.018
type train | step 980 | loss 0.0292 0.0836 0.3055 0.4788 0.8960 | lr 1.0e-03 | norm 0.1529 | dt 0.018
type train | step 990 | loss 0.0296 0.0843 0.3067 0.4863 0.9027 | lr 1.0e-03 | norm 0.1585 | dt 0.018
type train | step 1000 | loss 0.0290 0.0824 0.3028 0.4741 0.8827 | lr 1.0e-03 | norm 0.1293 | dt 0.018
type train | step 1010 | loss 0.0289 0.0831 0.2987 0.4699 0.8815 | lr 1.0e-03 | norm 0.1887 | dt 0.018
type train | step 1020 | loss 0.0290 0.0826 0.3015 0.4733 0.8835 | lr 1.0e-03 | norm 0.1759 | dt 0.018
type train | step 1030 | loss 0.0289 0.0823 0.2997 0.4718 0.8867 | lr 1.0e-03 | norm 0.1432 | dt 0.018
type train | step 1040 | loss 0.0287 0.0816 0.3016 0.4747 0.8849 | lr 1.0e-03 | norm 0.1084 | dt 0.018
type train | step 1050 | loss 0.0287 0.0812 0.2963 0.4667 0.8794 | lr 1.0e-03 | norm 0.1330 | dt 0.018
type train | step 1060 | loss 0.0287 0.0814 0.2989 0.4708 0.8769 | lr 1.0e-03 | norm 0.1368 | dt 0.018
type train | step 1070 | loss 0.0285 0.0806 0.2923 0.4602 0.8679 | lr 1.0e-03 | norm 0.0953 | dt 0.018
type train | step 1080 | loss 0.0288 0.0810 0.2972 0.4678 0.8692 | lr 9.9e-04 | norm 0.1065 | dt 0.018
type train | step 1090 | loss 0.0285 0.0805 0.2962 0.4662 0.8766 | lr 9.9e-04 | norm 0.2667 | dt 0.018
type train | step 1100 | loss 0.0283 0.0801 0.2956 0.4674 0.8741 | lr 9.9e-04 | norm 0.1118 | dt 0.018
type train | step 1110 | loss 0.0285 0.0801 0.2956 0.4663 0.8721 | lr 9.9e-04 | norm 0.1035 | dt 0.018
type train | step 1120 | loss 0.0283 0.0795 0.2957 0.4664 0.8680 | lr 9.9e-04 | norm 0.1478 | dt 0.018
type train | step 1130 | loss 0.0282 0.0794 0.2933 0.4662 0.8652 | lr 9.9e-04 | norm 0.2210 | dt 0.018
type train | step 1140 | loss 0.0287 0.0800 0.2951 0.4666 0.8696 | lr 9.9e-04 | norm 0.1183 | dt 0.018
type train | step 1150 | loss 0.0280 0.0782 0.2904 0.4572 0.8621 | lr 9.9e-04 | norm 0.0950 | dt 0.018
type train | step 1160 | loss 0.0280 0.0786 0.2945 0.4642 0.8652 | lr 9.9e-04 | norm 0.1461 | dt 0.018
type train | step 1170 | loss 0.0284 0.0793 0.2930 0.4634 0.8696 | lr 9.9e-04 | norm 0.1252 | dt 0.018
type train | step 1180 | loss 0.0281 0.0783 0.2930 0.4662 0.8656 | lr 9.9e-04 | norm 0.1411 | dt 0.018
type train | step 1190 | loss 0.0278 0.0786 0.2888 0.4564 0.8612 | lr 9.9e-04 | norm 0.1097 | dt 0.018
type train | step 1200 | loss 0.0281 0.0782 0.2909 0.4605 0.8579 | lr 9.9e-04 | norm 0.1376 | dt 0.018
type train | step 1210 | loss 0.0277 0.0769 0.2881 0.4524 0.8536 | lr 9.9e-04 | norm 0.1107 | dt 0.018
type train | step 1220 | loss 0.0277 0.0774 0.2889 0.4593 0.8585 | lr 9.9e-04 | norm 0.1439 | dt 0.022
type train | step 1230 | loss 0.0277 0.0770 0.2891 0.4545 0.8555 | lr 9.9e-04 | norm 0.1200 | dt 0.018
type train | step 1240 | loss 0.0277 0.0770 0.2901 0.4583 0.8586 | lr 9.9e-04 | norm 0.1661 | dt 0.018
type train | step 1250 | loss 0.0276 0.0769 0.2845 0.4516 0.8516 | lr 9.9e-04 | norm 0.2225 | dt 0.018
type train | step 1260 | loss 0.0281 0.0783 0.2905 0.4659 0.8656 | lr 9.9e-04 | norm 0.1148 | dt 0.018
type train | step 1270 | loss 0.0275 0.0765 0.2867 0.4472 0.8503 | lr 9.9e-04 | norm 0.2198 | dt 0.018
type train | step 1280 | loss 0.0273 0.0762 0.2843 0.4490 0.8484 | lr 9.9e-04 | norm 0.1443 | dt 0.018
type train | step 1290 | loss 0.0274 0.0760 0.2847 0.4482 0.8476 | lr 9.9e-04 | norm 0.1802 | dt 0.018
type train | step 1300 | loss 0.0277 0.0770 0.2894 0.4633 0.8625 | lr 9.9e-04 | norm 0.0969 | dt 0.018
type train | step 1310 | loss 0.0274 0.0763 0.2837 0.4505 0.8515 | lr 9.8e-04 | norm 0.2014 | dt 0.018
type train | step 1320 | loss 0.0273 0.0755 0.2812 0.4442 0.8398 | lr 9.8e-04 | norm 0.1201 | dt 0.018
type train | step 1330 | loss 0.0274 0.0762 0.2842 0.4474 0.8472 | lr 9.8e-04 | norm 0.0912 | dt 0.018
type train | step 1340 | loss 0.0276 0.0768 0.2860 0.4492 0.8521 | lr 9.8e-04 | norm 0.0993 | dt 0.018
type train | step 1350 | loss 0.0274 0.0757 0.2829 0.4484 0.8482 | lr 9.8e-04 | norm 0.0936 | dt 0.018
type train | step 1360 | loss 0.0280 0.0779 0.2909 0.4599 0.8562 | lr 9.8e-04 | norm 0.2049 | dt 0.018
type train | step 1370 | loss 0.0273 0.0761 0.2839 0.4508 0.8537 | lr 9.8e-04 | norm 0.1813 | dt 0.018
type train | step 1380 | loss 0.0273 0.0754 0.2834 0.4482 0.8449 | lr 9.8e-04 | norm 0.0986 | dt 0.018
type train | step 1390 | loss 0.0270 0.0750 0.2828 0.4424 0.8405 | lr 9.8e-04 | norm 0.2460 | dt 0.018
type train | step 1400 | loss 0.0274 0.0756 0.2848 0.4494 0.8477 | lr 9.8e-04 | norm 0.2080 | dt 0.018
type train | step 1410 | loss 0.0271 0.0753 0.2822 0.4464 0.8457 | lr 9.8e-04 | norm 0.1807 | dt 0.019
type train | step 1420 | loss 0.0274 0.0763 0.2863 0.4574 0.8541 | lr 9.8e-04 | norm 0.1084 | dt 0.018
type train | step 1430 | loss 0.0273 0.0756 0.2814 0.4481 0.8454 | lr 9.8e-04 | norm 0.1572 | dt 0.018
type train | step 1440 | loss 0.0271 0.0745 0.2814 0.4460 0.8382 | lr 9.8e-04 | norm 0.0923 | dt 0.018
type train | step 1450 | loss 0.0268 0.0739 0.2809 0.4416 0.8363 | lr 9.8e-04 | norm 0.2386 | dt 0.018
type train | step 1460 | loss 0.0271 0.0752 0.2864 0.4498 0.8461 | lr 9.8e-04 | norm 0.1115 | dt 0.018
type train | step 1470 | loss 0.0270 0.0746 0.2795 0.4415 0.8414 | lr 9.7e-04 | norm 0.1573 | dt 0.018
type train | step 1480 | loss 0.0271 0.0741 0.2808 0.4425 0.8395 | lr 9.7e-04 | norm 0.1882 | dt 0.018
type train | step 1490 | loss 0.0270 0.0748 0.2797 0.4429 0.8364 | lr 9.7e-04 | norm 0.1861 | dt 0.018
type train | step 1500 | loss 0.0273 0.0750 0.2818 0.4491 0.8484 | lr 9.7e-04 | norm 0.1673 | dt 0.018
type train | step 1510 | loss 0.0269 0.0742 0.2785 0.4394 0.8391 | lr 9.7e-04 | norm 0.1711 | dt 0.018
type train | step 1520 | loss 0.0269 0.0742 0.2846 0.4453 0.8425 | lr 9.7e-04 | norm 0.1103 | dt 0.018
type train | step 1530 | loss 0.0272 0.0749 0.2797 0.4447 0.8459 | lr 9.7e-04 | norm 0.2115 | dt 0.018
type train | step 1540 | loss 0.0269 0.0742 0.2784 0.4426 0.8421 | lr 9.7e-04 | norm 0.1256 | dt 0.018
type train | step 1550 | loss 0.0269 0.0741 0.2805 0.4459 0.8381 | lr 9.7e-04 | norm 0.0932 | dt 0.018
type train | step 1560 | loss 0.0267 0.0734 0.2784 0.4426 0.8355 | lr 9.7e-04 | norm 0.1850 | dt 0.018
type train | step 1570 | loss 0.0268 0.0739 0.2795 0.4420 0.8405 | lr 9.7e-04 | norm 0.1793 | dt 0.018
type train | step 1580 | loss 0.0268 0.0736 0.2761 0.4365 0.8324 | lr 9.7e-04 | norm 0.1044 | dt 0.018
type train | step 1590 | loss 0.0267 0.0735 0.2798 0.4417 0.8396 | lr 9.7e-04 | norm 0.1356 | dt 0.018
type train | step 1600 | loss 0.0271 0.0744 0.2807 0.4513 0.8488 | lr 9.7e-04 | norm 0.0916 | dt 0.018
type train | step 1610 | loss 0.0266 0.0730 0.2786 0.4407 0.8323 | lr 9.6e-04 | norm 0.1403 | dt 0.018
type train | step 1620 | loss 0.0266 0.0738 0.2757 0.4385 0.8317 | lr 9.6e-04 | norm 0.1411 | dt 0.018
type train | step 1630 | loss 0.0267 0.0734 0.2787 0.4419 0.8352 | lr 9.6e-04 | norm 0.1060 | dt 0.018
type train | step 1640 | loss 0.0267 0.0737 0.2776 0.4412 0.8401 | lr 9.6e-04 | norm 0.1146 | dt 0.018
type train | step 1650 | loss 0.0266 0.0732 0.2803 0.4445 0.8395 | lr 9.6e-04 | norm 0.1714 | dt 0.018
type train | step 1660 | loss 0.0266 0.0729 0.2754 0.4379 0.8346 | lr 9.6e-04 | norm 0.1015 | dt 0.018
type train | step 1670 | loss 0.0266 0.0734 0.2784 0.4424 0.8337 | lr 9.6e-04 | norm 0.1191 | dt 0.018
type train | step 1680 | loss 0.0265 0.0729 0.2728 0.4335 0.8265 | lr 9.6e-04 | norm 0.1011 | dt 0.018
type train | step 1690 | loss 0.0268 0.0733 0.2775 0.4415 0.8298 | lr 9.6e-04 | norm 0.0986 | dt 0.018
type train | step 1700 | loss 0.0266 0.0731 0.2775 0.4400 0.8374 | lr 9.6e-04 | norm 0.2354 | dt 0.018
type train | step 1710 | loss 0.0264 0.0729 0.2776 0.4423 0.8359 | lr 9.6e-04 | norm 0.1153 | dt 0.018
type train | step 1720 | loss 0.0266 0.0730 0.2772 0.4418 0.8345 | lr 9.5e-04 | norm 0.1104 | dt 0.018
type train | step 1730 | loss 0.0265 0.0727 0.2781 0.4430 0.8325 | lr 9.5e-04 | norm 0.1494 | dt 0.018
type train | step 1740 | loss 0.0264 0.0726 0.2766 0.4437 0.8312 | lr 9.5e-04 | norm 0.1823 | dt 0.018
type train | step 1750 | loss 0.0268 0.0732 0.2781 0.4445 0.8358 | lr 9.5e-04 | norm 0.1073 | dt 0.018
type train | step 1760 | loss 0.0263 0.0720 0.2748 0.4353 0.8287 | lr 9.5e-04 | norm 0.1822 | dt 0.018
type train | step 1770 | loss 0.0263 0.0724 0.2786 0.4425 0.8330 | lr 9.5e-04 | norm 0.1370 | dt 0.018
type train | step 1780 | loss 0.0267 0.0731 0.2770 0.4424 0.8361 | lr 9.5e-04 | norm 0.1296 | dt 0.018
type train | step 1790 | loss 0.0264 0.0724 0.2779 0.4458 0.8355 | lr 9.5e-04 | norm 0.1097 | dt 0.018
type train | step 1800 | loss 0.0262 0.0729 0.2744 0.4368 0.8323 | lr 9.5e-04 | norm 0.2461 | dt 0.018
type train | step 1810 | loss 0.0265 0.0725 0.2764 0.4412 0.8287 | lr 9.5e-04 | norm 0.1623 | dt 0.018
type train | step 1820 | loss 0.0261 0.0714 0.2742 0.4332 0.8248 | lr 9.5e-04 | norm 0.1356 | dt 0.018
type train | step 1830 | loss 0.0261 0.0721 0.2755 0.4411 0.8306 | lr 9.4e-04 | norm 0.1211 | dt 0.020
type train | step 1840 | loss 0.0262 0.0718 0.2759 0.4364 0.8279 | lr 9.4e-04 | norm 0.1491 | dt 0.019
type train | step 1850 | loss 0.0261 0.0718 0.2772 0.4409 0.8324 | lr 9.4e-04 | norm 0.1092 | dt 0.018
type train | step 1860 | loss 0.0261 0.0718 0.2718 0.4344 0.8253 | lr 9.4e-04 | norm 0.1200 | dt 0.018
type train | step 1870 | loss 0.0266 0.0732 0.2775 0.4485 0.8397 | lr 9.4e-04 | norm 0.0993 | dt 0.018
type train | step 1880 | loss 0.0261 0.0717 0.2745 0.4309 0.8246 | lr 9.4e-04 | norm 0.1920 | dt 0.018
type train | step 1890 | loss 0.0259 0.0715 0.2730 0.4332 0.8239 | lr 9.4e-04 | norm 0.1072 | dt 0.019
type train | step 1900 | loss 0.0260 0.0714 0.2732 0.4326 0.8240 | lr 9.4e-04 | norm 0.1930 | dt 0.018
type train | step 1910 | loss 0.0263 0.0724 0.2780 0.4484 0.8387 | lr 9.4e-04 | norm 0.1038 | dt 0.018
type train | step 1920 | loss 0.0260 0.0718 0.2725 0.4359 0.8284 | lr 9.3e-04 | norm 0.1042 | dt 0.019
type train | step 1930 | loss 0.0260 0.0711 0.2704 0.4296 0.8177 | lr 9.3e-04 | norm 0.0915 | dt 0.019
type train | step 1940 | loss 0.0261 0.0719 0.2738 0.4332 0.8253 | lr 9.3e-04 | norm 0.1354 | dt 0.019
type train | step 1950 | loss 0.0263 0.0725 0.2752 0.4348 0.8297 | lr 9.3e-04 | norm 0.1444 | dt 0.018
type train | step 1960 | loss 0.0261 0.0716 0.2730 0.4345 0.8267 | lr 9.3e-04 | norm 0.0957 | dt 0.019
type train | step 1970 | loss 0.0266 0.0736 0.2797 0.4462 0.8348 | lr 9.3e-04 | norm 0.1127 | dt 0.018
type train | step 1980 | loss 0.0261 0.0721 0.2739 0.4379 0.8328 | lr 9.3e-04 | norm 0.1137 | dt 0.019
type train | step 1990 | loss 0.0261 0.0714 0.2735 0.4353 0.8253 | lr 9.3e-04 | norm 0.1640 | dt 0.018
type train | step 2000 | loss 0.0257 0.0711 0.2736 0.4300 0.8209 | lr 9.3e-04 | norm 0.1911 | dt 0.021
type train | step 2010 | loss 0.0261 0.0719 0.2752 0.4366 0.8279 | lr 9.2e-04 | norm 0.1168 | dt 0.018
type train | step 2020 | loss 0.0259 0.0717 0.2731 0.4337 0.8267 | lr 9.2e-04 | norm 0.1561 | dt 0.018
type train | step 2030 | loss 0.0262 0.0725 0.2769 0.4453 0.8354 | lr 9.2e-04 | norm 0.1745 | dt 0.018
type train | step 2040 | loss 0.0261 0.0719 0.2722 0.4365 0.8273 | lr 9.2e-04 | norm 0.2309 | dt 0.018
type train | step 2050 | loss 0.0259 0.0710 0.2727 0.4348 0.8202 | lr 9.2e-04 | norm 0.0933 | dt 0.018
type train | step 2060 | loss 0.0256 0.0704 0.2724 0.4305 0.8182 | lr 9.2e-04 | norm 0.1374 | dt 0.018
type train | step 2070 | loss 0.0260 0.0717 0.2776 0.4388 0.8282 | lr 9.2e-04 | norm 0.0903 | dt 0.018
type train | step 2080 | loss 0.0259 0.0712 0.2713 0.4308 0.8239 | lr 9.2e-04 | norm 0.0952 | dt 0.018
type train | step 2090 | loss 0.0259 0.0708 0.2725 0.4316 0.8219 | lr 9.2e-04 | norm 0.1069 | dt 0.018
type train | step 2100 | loss 0.0259 0.0715 0.2716 0.4326 0.8196 | lr 9.1e-04 | norm 0.1125 | dt 0.018
type train | step 2110 | loss 0.0261 0.0717 0.2733 0.4389 0.8321 | lr 9.1e-04 | norm 0.1456 | dt 0.018
type train | step 2120 | loss 0.0257 0.0710 0.2710 0.4294 0.8225 | lr 9.1e-04 | norm 0.1261 | dt 0.018
type train | step 2130 | loss 0.0258 0.0712 0.2768 0.4353 0.8269 | lr 9.1e-04 | norm 0.1233 | dt 0.018
type train | step 2140 | loss 0.0261 0.0717 0.2722 0.4352 0.8298 | lr 9.1e-04 | norm 0.2011 | dt 0.018
type train | step 2150 | loss 0.0259 0.0712 0.2709 0.4328 0.8267 | lr 9.1e-04 | norm 0.1329 | dt 0.018
type train | step 2160 | loss 0.0258 0.0710 0.2733 0.4365 0.8233 | lr 9.1e-04 | norm 0.1517 | dt 0.019
type train | step 2170 | loss 0.0256 0.0704 0.2711 0.4333 0.8209 | lr 9.1e-04 | norm 0.0977 | dt 0.018
type train | step 2180 | loss 0.0258 0.0709 0.2725 0.4325 0.8250 | lr 9.0e-04 | norm 0.1066 | dt 0.018
type train | step 2190 | loss 0.0258 0.0708 0.2692 0.4273 0.8182 | lr 9.0e-04 | norm 0.1150 | dt 0.018
type train | step 2200 | loss 0.0257 0.0707 0.2728 0.4333 0.8250 | lr 9.0e-04 | norm 0.0986 | dt 0.018
type train | step 2210 | loss 0.0261 0.0715 0.2735 0.4421 0.8348 | lr 9.0e-04 | norm 0.1336 | dt 0.018
type train | step 2220 | loss 0.0257 0.0702 0.2721 0.4322 0.8190 | lr 9.0e-04 | norm 0.1991 | dt 0.018
type train | step 2230 | loss 0.0256 0.0711 0.2694 0.4299 0.8182 | lr 9.0e-04 | norm 0.1596 | dt 0.018
type train | step 2240 | loss 0.0258 0.0706 0.2722 0.4333 0.8215 | lr 9.0e-04 | norm 0.1374 | dt 0.018
type train | step 2250 | loss 0.0257 0.0710 0.2714 0.4325 0.8265 | lr 8.9e-04 | norm 0.1457 | dt 0.018
type train | step 2260 | loss 0.0257 0.0706 0.2740 0.4365 0.8260 | lr 8.9e-04 | norm 0.1279 | dt 0.018
type train | step 2270 | loss 0.0256 0.0704 0.2693 0.4296 0.8209 | lr 8.9e-04 | norm 0.1127 | dt 0.018
type train | step 2280 | loss 0.0257 0.0708 0.2722 0.4344 0.8207 | lr 8.9e-04 | norm 0.0964 | dt 0.018
type train | step 2290 | loss 0.0256 0.0704 0.2667 0.4255 0.8140 | lr 8.9e-04 | norm 0.1023 | dt 0.018
type train | step 2300 | loss 0.0258 0.0706 0.2713 0.4341 0.8180 | lr 8.9e-04 | norm 0.0889 | dt 0.018
type train | step 2310 | loss 0.0257 0.0707 0.2716 0.4324 0.8248 | lr 8.9e-04 | norm 0.1102 | dt 0.018
type train | step 2320 | loss 0.0256 0.0705 0.2718 0.4347 0.8239 | lr 8.9e-04 | norm 0.1677 | dt 0.018
type train | step 2330 | loss 0.0257 0.0706 0.2713 0.4341 0.8219 | lr 8.8e-04 | norm 0.1642 | dt 0.018
type train | step 2340 | loss 0.0256 0.0702 0.2724 0.4355 0.8213 | lr 8.8e-04 | norm 0.1954 | dt 0.019
type train | step 2350 | loss 0.0255 0.0702 0.2710 0.4366 0.8197 | lr 8.8e-04 | norm 0.1230 | dt 0.018
type train | step 2360 | loss 0.0260 0.0707 0.2723 0.4376 0.8244 | lr 8.8e-04 | norm 0.0928 | dt 0.018
type train | step 2370 | loss 0.0254 0.0697 0.2695 0.4282 0.8173 | lr 8.8e-04 | norm 0.1077 | dt 0.018
type train | step 2380 | loss 0.0254 0.0700 0.2731 0.4355 0.8220 | lr 8.8e-04 | norm 0.1314 | dt 0.018
type train | step 2390 | loss 0.0258 0.0708 0.2713 0.4353 0.8243 | lr 8.8e-04 | norm 0.1653 | dt 0.018
type train | step 2400 | loss 0.0256 0.0701 0.2725 0.4392 0.8249 | lr 8.7e-04 | norm 0.0943 | dt 0.018
type train | step 2410 | loss 0.0254 0.0706 0.2693 0.4301 0.8216 | lr 8.7e-04 | norm 0.2082 | dt 0.019
type train | step 2420 | loss 0.0257 0.0702 0.2712 0.4346 0.8185 | lr 8.7e-04 | norm 0.1128 | dt 0.018
type train | step 2430 | loss 0.0253 0.0693 0.2694 0.4266 0.8144 | lr 8.7e-04 | norm 0.1954 | dt 0.018
type train | step 2440 | loss 0.0254 0.0700 0.2708 0.4343 0.8206 | lr 8.7e-04 | norm 0.1513 | dt 0.021
type train | step 2450 | loss 0.0254 0.0697 0.2708 0.4302 0.8177 | lr 8.7e-04 | norm 0.1328 | dt 0.018
type train | step 2460 | loss 0.0254 0.0697 0.2723 0.4347 0.8225 | lr 8.6e-04 | norm 0.1179 | dt 0.018
type train | step 2470 | loss 0.0253 0.0697 0.2669 0.4281 0.8157 | lr 8.6e-04 | norm 0.1201 | dt 0.020
type train | step 2480 | loss 0.0259 0.0711 0.2724 0.4425 0.8299 | lr 8.6e-04 | norm 0.1326 | dt 0.020
type train | step 2490 | loss 0.0253 0.0697 0.2699 0.4250 0.8149 | lr 8.6e-04 | norm 0.1458 | dt 0.018
type train | step 2500 | loss 0.0251 0.0694 0.2685 0.4272 0.8147 | lr 8.6e-04 | norm 0.0996 | dt 0.018
type train | step 2510 | loss 0.0253 0.0694 0.2688 0.4271 0.8143 | lr 8.6e-04 | norm 0.1289 | dt 0.018
type train | step 2520 | loss 0.0255 0.0704 0.2734 0.4424 0.8297 | lr 8.6e-04 | norm 0.1029 | dt 0.018
type train | step 2530 | loss 0.0253 0.0698 0.2680 0.4299 0.8194 | lr 8.5e-04 | norm 0.2135 | dt 0.018
type train | step 2540 | loss 0.0253 0.0691 0.2661 0.4239 0.8094 | lr 8.5e-04 | norm 0.2041 | dt 0.018
type train | step 2550 | loss 0.0254 0.0699 0.2695 0.4276 0.8163 | lr 8.5e-04 | norm 0.1220 | dt 0.018
type train | step 2560 | loss 0.0256 0.0705 0.2708 0.4291 0.8204 | lr 8.5e-04 | norm 0.0921 | dt 0.018
type train | step 2570 | loss 0.0254 0.0697 0.2687 0.4292 0.8179 | lr 8.5e-04 | norm 0.0840 | dt 0.018
type train | step 2580 | loss 0.0259 0.0715 0.2748 0.4401 0.8264 | lr 8.5e-04 | norm 0.1731 | dt 0.018
type train | step 2590 | loss 0.0254 0.0702 0.2698 0.4324 0.8243 | lr 8.4e-04 | norm 0.1832 | dt 0.018
type train | step 2600 | loss 0.0254 0.0696 0.2693 0.4300 0.8172 | lr 8.4e-04 | norm 0.0977 | dt 0.018
type train | step 2610 | loss 0.0251 0.0694 0.2696 0.4247 0.8125 | lr 8.4e-04 | norm 0.1714 | dt 0.025
type train | step 2620 | loss 0.0255 0.0700 0.2712 0.4314 0.8195 | lr 8.4e-04 | norm 0.0990 | dt 0.025
type train | step 2630 | loss 0.0252 0.0697 0.2691 0.4286 0.8192 | lr 8.4e-04 | norm 0.1553 | dt 0.019
type train | step 2640 | loss 0.0256 0.0705 0.2728 0.4396 0.8272 | lr 8.4e-04 | norm 0.0845 | dt 0.018
type train | step 2650 | loss 0.0255 0.0701 0.2681 0.4313 0.8192 | lr 8.4e-04 | norm 0.1207 | dt 0.018
type train | step 2660 | loss 0.0253 0.0692 0.2687 0.4297 0.8127 | lr 8.3e-04 | norm 0.1622 | dt 0.018
type train | step 2670 | loss 0.0250 0.0687 0.2685 0.4257 0.8105 | lr 8.3e-04 | norm 0.1102 | dt 0.018
type train | step 2680 | loss 0.0253 0.0700 0.2736 0.4340 0.8206 | lr 8.3e-04 | norm 0.0987 | dt 0.018
type train | step 2690 | loss 0.0253 0.0695 0.2677 0.4261 0.8162 | lr 8.3e-04 | norm 0.0765 | dt 0.018
type train | step 2700 | loss 0.0253 0.0691 0.2687 0.4267 0.8141 | lr 8.3e-04 | norm 0.1273 | dt 0.018
type train | step 2710 | loss 0.0254 0.0698 0.2678 0.4277 0.8127 | lr 8.3e-04 | norm 0.2020 | dt 0.018
type train | step 2720 | loss 0.0255 0.0700 0.2694 0.4336 0.8249 | lr 8.2e-04 | norm 0.1021 | dt 0.018
type train | step 2730 | loss 0.0252 0.0693 0.2674 0.4244 0.8149 | lr 8.2e-04 | norm 0.1465 | dt 0.018
type train | step 2740 | loss 0.0252 0.0695 0.2732 0.4307 0.8195 | lr 8.2e-04 | norm 0.1198 | dt 0.018
type train | step 2750 | loss 0.0256 0.0700 0.2687 0.4306 0.8225 | lr 8.2e-04 | norm 0.1885 | dt 0.018
type train | step 2760 | loss 0.0253 0.0695 0.2675 0.4282 0.8197 | lr 8.2e-04 | norm 0.1270 | dt 0.018
type train | step 2770 | loss 0.0253 0.0694 0.2699 0.4321 0.8163 | lr 8.2e-04 | norm 0.1093 | dt 0.018
type train | step 2780 | loss 0.0251 0.0689 0.2678 0.4290 0.8147 | lr 8.1e-04 | norm 0.1623 | dt 0.018
type train | step 2790 | loss 0.0252 0.0693 0.2690 0.4278 0.8180 | lr 8.1e-04 | norm 0.1654 | dt 0.018
type train | step 2800 | loss 0.0252 0.0692 0.2658 0.4229 0.8114 | lr 8.1e-04 | norm 0.0865 | dt 0.018
type train | step 2810 | loss 0.0252 0.0692 0.2693 0.4290 0.8180 | lr 8.1e-04 | norm 0.0871 | dt 0.018
type train | step 2820 | loss 0.0255 0.0700 0.2699 0.4373 0.8281 | lr 8.1e-04 | norm 0.1485 | dt 0.018
type train | step 2830 | loss 0.0251 0.0687 0.2688 0.4283 0.8128 | lr 8.1e-04 | norm 0.1920 | dt 0.018
type train | step 2840 | loss 0.0251 0.0696 0.2662 0.4256 0.8118 | lr 8.0e-04 | norm 0.0801 | dt 0.018
type train | step 2850 | loss 0.0253 0.0691 0.2689 0.4290 0.8150 | lr 8.0e-04 | norm 0.1063 | dt 0.019
type train | step 2860 | loss 0.0252 0.0695 0.2683 0.4283 0.8195 | lr 8.0e-04 | norm 0.1148 | dt 0.019
type train | step 2870 | loss 0.0251 0.0692 0.2709 0.4324 0.8198 | lr 8.0e-04 | norm 0.1824 | dt 0.019
type train | step 2880 | loss 0.0251 0.0690 0.2661 0.4251 0.8146 | lr 8.0e-04 | norm 0.1378 | dt 0.018
type train | step 2890 | loss 0.0252 0.0693 0.2692 0.4302 0.8149 | lr 7.9e-04 | norm 0.1870 | dt 0.018
type train | step 2900 | loss 0.0251 0.0690 0.2639 0.4215 0.8079 | lr 7.9e-04 | norm 0.0900 | dt 0.018
type train | step 2910 | loss 0.0254 0.0692 0.2682 0.4302 0.8127 | lr 7.9e-04 | norm 0.1197 | dt 0.018
type train | step 2920 | loss 0.0252 0.0692 0.2685 0.4285 0.8189 | lr 7.9e-04 | norm 0.1070 | dt 0.018
type train | step 2930 | loss 0.0251 0.0691 0.2689 0.4311 0.8179 | lr 7.9e-04 | norm 0.1005 | dt 0.018
type train | step 2940 | loss 0.0252 0.0692 0.2683 0.4299 0.8159 | lr 7.9e-04 | norm 0.1133 | dt 0.018
type train | step 2950 | loss 0.0251 0.0689 0.2695 0.4317 0.8155 | lr 7.8e-04 | norm 0.0951 | dt 0.018
type train | step 2960 | loss 0.0251 0.0689 0.2681 0.4329 0.8143 | lr 7.8e-04 | norm 0.1428 | dt 0.018
type train | step 2970 | loss 0.0255 0.0693 0.2694 0.4338 0.8191 | lr 7.8e-04 | norm 0.0981 | dt 0.018
type train | step 2980 | loss 0.0250 0.0684 0.2667 0.4244 0.8117 | lr 7.8e-04 | norm 0.1399 | dt 0.018
type train | step 2990 | loss 0.0250 0.0687 0.2702 0.4317 0.8164 | lr 7.8e-04 | norm 0.1413 | dt 0.018
type train | step 3000 | loss 0.0254 0.0694 0.2683 0.4312 0.8183 | lr 7.8e-04 | norm 0.1084 | dt 0.018
type train | step 3010 | loss 0.0251 0.0688 0.2697 0.4354 0.8198 | lr 7.7e-04 | norm 0.1497 | dt 0.018
type train | step 3020 | loss 0.0250 0.0694 0.2668 0.4265 0.8164 | lr 7.7e-04 | norm 0.2567 | dt 0.018
type train | step 3030 | loss 0.0253 0.0689 0.2683 0.4309 0.8134 | lr 7.7e-04 | norm 0.1822 | dt 0.018
type train | step 3040 | loss 0.0249 0.0680 0.2669 0.4230 0.8088 | lr 7.7e-04 | norm 0.0975 | dt 0.018
type train | step 3050 | loss 0.0250 0.0687 0.2684 0.4307 0.8152 | lr 7.7e-04 | norm 0.0959 | dt 0.020
type train | step 3060 | loss 0.0251 0.0685 0.2681 0.4268 0.8120 | lr 7.6e-04 | norm 0.0814 | dt 0.018
type train | step 3070 | loss 0.0250 0.0684 0.2696 0.4311 0.8172 | lr 7.6e-04 | norm 0.1092 | dt 0.018
type train | step 3080 | loss 0.0249 0.0685 0.2644 0.4247 0.8105 | lr 7.6e-04 | norm 0.1025 | dt 0.018
type train | step 3090 | loss 0.0255 0.0697 0.2697 0.4387 0.8247 | lr 7.6e-04 | norm 0.1004 | dt 0.019
type train | step 3100 | loss 0.0249 0.0685 0.2675 0.4216 0.8094 | lr 7.6e-04 | norm 0.1086 | dt 0.018
type train | step 3110 | loss 0.0248 0.0683 0.2662 0.4238 0.8098 | lr 7.5e-04 | norm 0.1094 | dt 0.018
type train | step 3120 | loss 0.0249 0.0682 0.2664 0.4241 0.8094 | lr 7.5e-04 | norm 0.1350 | dt 0.018
type train | step 3130 | loss 0.0252 0.0692 0.2709 0.4389 0.8249 | lr 7.5e-04 | norm 0.1336 | dt 0.018
type train | step 3140 | loss 0.0249 0.0687 0.2655 0.4264 0.8141 | lr 7.5e-04 | norm 0.1558 | dt 0.018
type train | step 3150 | loss 0.0249 0.0680 0.2638 0.4205 0.8044 | lr 7.5e-04 | norm 0.0772 | dt 0.018
type train | step 3160 | loss 0.0250 0.0687 0.2671 0.4243 0.8115 | lr 7.5e-04 | norm 0.1289 | dt 0.018
type train | step 3170 | loss 0.0252 0.0693 0.2684 0.4259 0.8152 | lr 7.4e-04 | norm 0.0840 | dt 0.018
type train | step 3180 | loss 0.0251 0.0685 0.2664 0.4261 0.8131 | lr 7.4e-04 | norm 0.1025 | dt 0.018
type train | step 3190 | loss 0.0256 0.0703 0.2722 0.4365 0.8212 | lr 7.4e-04 | norm 0.1584 | dt 0.018
type train | step 3200 | loss 0.0251 0.0690 0.2676 0.4292 0.8194 | lr 7.4e-04 | norm 0.1608 | dt 0.018
type train | step 3210 | loss 0.0251 0.0684 0.2669 0.4267 0.8126 | lr 7.4e-04 | norm 0.0853 | dt 0.018
type train | step 3220 | loss 0.0247 0.0681 0.2674 0.4215 0.8075 | lr 7.3e-04 | norm 0.1150 | dt 0.018
type train | step 3230 | loss 0.0252 0.0689 0.2688 0.4283 0.8148 | lr 7.3e-04 | norm 0.0977 | dt 0.018
type train | step 3240 | loss 0.0249 0.0686 0.2670 0.4258 0.8146 | lr 7.3e-04 | norm 0.1344 | dt 0.018
type train | step 3250 | loss 0.0253 0.0695 0.2706 0.4365 0.8225 | lr 7.3e-04 | norm 0.0960 | dt 0.018
type train | step 3260 | loss 0.0252 0.0690 0.2660 0.4282 0.8148 | lr 7.3e-04 | norm 0.1764 | dt 0.018
type train | step 3270 | loss 0.0249 0.0682 0.2666 0.4266 0.8082 | lr 7.2e-04 | norm 0.1259 | dt 0.018
type train | step 3280 | loss 0.0247 0.0677 0.2664 0.4227 0.8059 | lr 7.2e-04 | norm 0.1430 | dt 0.018
type train | step 3290 | loss 0.0250 0.0689 0.2714 0.4310 0.8160 | lr 7.2e-04 | norm 0.0881 | dt 0.018
type train | step 3300 | loss 0.0250 0.0684 0.2656 0.4233 0.8116 | lr 7.2e-04 | norm 0.0803 | dt 0.018
type train | step 3310 | loss 0.0250 0.0681 0.2666 0.4239 0.8096 | lr 7.2e-04 | norm 0.1290 | dt 0.018
type train | step 3320 | loss 0.0250 0.0688 0.2658 0.4249 0.8081 | lr 7.1e-04 | norm 0.1021 | dt 0.018
type train | step 3330 | loss 0.0252 0.0689 0.2671 0.4307 0.8203 | lr 7.1e-04 | norm 0.1142 | dt 0.018
type train | step 3340 | loss 0.0249 0.0684 0.2652 0.4215 0.8101 | lr 7.1e-04 | norm 0.1542 | dt 0.018
type train | step 3350 | loss 0.0249 0.0686 0.2710 0.4278 0.8150 | lr 7.1e-04 | norm 0.1147 | dt 0.018
type train | step 3360 | loss 0.0253 0.0689 0.2666 0.4278 0.8180 | lr 7.1e-04 | norm 0.1288 | dt 0.018
type train | step 3370 | loss 0.0250 0.0686 0.2655 0.4254 0.8155 | lr 7.0e-04 | norm 0.1106 | dt 0.018
type train | step 3380 | loss 0.0250 0.0684 0.2678 0.4293 0.8125 | lr 7.0e-04 | norm 0.1442 | dt 0.018
type train | step 3390 | loss 0.0248 0.0679 0.2659 0.4265 0.8106 | lr 7.0e-04 | norm 0.1856 | dt 0.018
type train | step 3400 | loss 0.0250 0.0682 0.2670 0.4249 0.8136 | lr 7.0e-04 | norm 0.1398 | dt 0.018
type train | step 3410 | loss 0.0249 0.0682 0.2638 0.4202 0.8072 | lr 7.0e-04 | norm 0.1031 | dt 0.018
type train | step 3420 | loss 0.0249 0.0682 0.2673 0.4263 0.8137 | lr 7.0e-04 | norm 0.0992 | dt 0.019
type train | step 3430 | loss 0.0252 0.0690 0.2678 0.4344 0.8236 | lr 6.9e-04 | norm 0.1117 | dt 0.019
type train | step 3440 | loss 0.0249 0.0678 0.2670 0.4259 0.8088 | lr 6.9e-04 | norm 0.1669 | dt 0.019
type train | step 3450 | loss 0.0248 0.0687 0.2644 0.4230 0.8080 | lr 6.9e-04 | norm 0.0955 | dt 0.020
type train | step 3460 | loss 0.0250 0.0683 0.2670 0.4265 0.8110 | lr 6.9e-04 | norm 0.1354 | dt 0.018
type train | step 3470 | loss 0.0250 0.0686 0.2664 0.4258 0.8152 | lr 6.9e-04 | norm 0.1174 | dt 0.018
type train | step 3480 | loss 0.0249 0.0682 0.2690 0.4299 0.8156 | lr 6.8e-04 | norm 0.1128 | dt 0.018
type train | step 3490 | loss 0.0249 0.0681 0.2643 0.4224 0.8107 | lr 6.8e-04 | norm 0.1033 | dt 0.018
type train | step 3500 | loss 0.0250 0.0684 0.2674 0.4277 0.8109 | lr 6.8e-04 | norm 0.0960 | dt 0.019
type train | step 3510 | loss 0.0248 0.0681 0.2621 0.4189 0.8043 | lr 6.8e-04 | norm 0.1435 | dt 0.018
type train | step 3520 | loss 0.0251 0.0683 0.2664 0.4279 0.8093 | lr 6.8e-04 | norm 0.1367 | dt 0.018
type train | step 3530 | loss 0.0250 0.0683 0.2666 0.4262 0.8149 | lr 6.7e-04 | norm 0.1138 | dt 0.018
type train | step 3540 | loss 0.0249 0.0682 0.2670 0.4288 0.8139 | lr 6.7e-04 | norm 0.0875 | dt 0.018
type train | step 3550 | loss 0.0250 0.0683 0.2666 0.4273 0.8120 | lr 6.7e-04 | norm 0.1535 | dt 0.018
type train | step 3560 | loss 0.0249 0.0681 0.2677 0.4294 0.8120 | lr 6.7e-04 | norm 0.1111 | dt 0.018
type train | step 3570 | loss 0.0248 0.0680 0.2665 0.4306 0.8107 | lr 6.6e-04 | norm 0.0973 | dt 0.018
type train | step 3580 | loss 0.0252 0.0684 0.2677 0.4316 0.8157 | lr 6.6e-04 | norm 0.1057 | dt 0.018
type train | step 3590 | loss 0.0247 0.0675 0.2650 0.4222 0.8078 | lr 6.6e-04 | norm 0.0912 | dt 0.019
type train | step 3600 | loss 0.0248 0.0678 0.2684 0.4293 0.8126 | lr 6.6e-04 | norm 0.1332 | dt 0.018
type train | step 3610 | loss 0.0251 0.0685 0.2666 0.4286 0.8143 | lr 6.6e-04 | norm 0.1239 | dt 0.018
type train | step 3620 | loss 0.0249 0.0680 0.2680 0.4331 0.8162 | lr 6.5e-04 | norm 0.1015 | dt 0.018
type train | step 3630 | loss 0.0247 0.0685 0.2652 0.4243 0.8125 | lr 6.5e-04 | norm 0.1435 | dt 0.018
type train | step 3640 | loss 0.0250 0.0681 0.2668 0.4288 0.8101 | lr 6.5e-04 | norm 0.1151 | dt 0.018
type train | step 3650 | loss 0.0246 0.0672 0.2653 0.4211 0.8053 | lr 6.5e-04 | norm 0.1028 | dt 0.018
type train | step 3660 | loss 0.0248 0.0678 0.2668 0.4285 0.8122 | lr 6.5e-04 | norm 0.0916 | dt 0.021
type train | step 3670 | loss 0.0248 0.0677 0.2665 0.4245 0.8088 | lr 6.4e-04 | norm 0.1152 | dt 0.018
type train | step 3680 | loss 0.0248 0.0677 0.2679 0.4290 0.8140 | lr 6.4e-04 | norm 0.1248 | dt 0.018
type train | step 3690 | loss 0.0247 0.0678 0.2629 0.4228 0.8072 | lr 6.4e-04 | norm 0.1256 | dt 0.018
type train | step 3700 | loss 0.0252 0.0689 0.2682 0.4366 0.8214 | lr 6.4e-04 | norm 0.1068 | dt 0.018
type train | step 3710 | loss 0.0247 0.0677 0.2659 0.4197 0.8061 | lr 6.4e-04 | norm 0.0995 | dt 0.018
type train | step 3720 | loss 0.0246 0.0675 0.2647 0.4218 0.8066 | lr 6.3e-04 | norm 0.1095 | dt 0.018
type train | step 3730 | loss 0.0247 0.0675 0.2650 0.4223 0.8065 | lr 6.3e-04 | norm 0.2110 | dt 0.018
type train | step 3740 | loss 0.0250 0.0684 0.2694 0.4369 0.8217 | lr 6.3e-04 | norm 0.1188 | dt 0.018
type train | step 3750 | loss 0.0247 0.0679 0.2640 0.4243 0.8107 | lr 6.3e-04 | norm 0.1007 | dt 0.019
type train | step 3760 | loss 0.0247 0.0673 0.2624 0.4186 0.8014 | lr 6.3e-04 | norm 0.0949 | dt 0.018
type train | step 3770 | loss 0.0248 0.0680 0.2656 0.4224 0.8082 | lr 6.2e-04 | norm 0.0986 | dt 0.018
type train | step 3780 | loss 0.0250 0.0686 0.2668 0.4239 0.8119 | lr 6.2e-04 | norm 0.0794 | dt 0.019
type train | step 3790 | loss 0.0249 0.0678 0.2650 0.4244 0.8099 | lr 6.2e-04 | norm 0.0873 | dt 0.018
type train | step 3800 | loss 0.0253 0.0694 0.2704 0.4344 0.8176 | lr 6.2e-04 | norm 0.1139 | dt 0.018
type train | step 3810 | loss 0.0249 0.0683 0.2662 0.4272 0.8162 | lr 6.2e-04 | norm 0.1321 | dt 0.018
type train | step 3820 | loss 0.0249 0.0677 0.2655 0.4249 0.8096 | lr 6.1e-04 | norm 0.1207 | dt 0.018
type train | step 3830 | loss 0.0245 0.0674 0.2659 0.4196 0.8044 | lr 6.1e-04 | norm 0.1425 | dt 0.018
type train | step 3840 | loss 0.0249 0.0681 0.2673 0.4265 0.8117 | lr 6.1e-04 | norm 0.0868 | dt 0.018
type train | step 3850 | loss 0.0247 0.0679 0.2657 0.4240 0.8117 | lr 6.1e-04 | norm 0.1476 | dt 0.018
type train | step 3860 | loss 0.0251 0.0687 0.2691 0.4346 0.8194 | lr 6.1e-04 | norm 0.1355 | dt 0.018
type train | step 3870 | loss 0.0250 0.0683 0.2646 0.4265 0.8118 | lr 6.0e-04 | norm 0.0915 | dt 0.018
type train | step 3880 | loss 0.0247 0.0674 0.2652 0.4248 0.8052 | lr 6.0e-04 | norm 0.0786 | dt 0.018
type train | step 3890 | loss 0.0246 0.0670 0.2650 0.4208 0.8027 | lr 6.0e-04 | norm 0.0845 | dt 0.018
type train | step 3900 | loss 0.0248 0.0682 0.2699 0.4291 0.8130 | lr 6.0e-04 | norm 0.0866 | dt 0.018
type train | step 3910 | loss 0.0248 0.0677 0.2642 0.4215 0.8087 | lr 5.9e-04 | norm 0.0762 | dt 0.018
type train | step 3920 | loss 0.0248 0.0673 0.2653 0.4222 0.8066 | lr 5.9e-04 | norm 0.1002 | dt 0.018
type train | step 3930 | loss 0.0249 0.0681 0.2645 0.4232 0.8054 | lr 5.9e-04 | norm 0.0887 | dt 0.018
type train | step 3940 | loss 0.0250 0.0682 0.2657 0.4289 0.8174 | lr 5.9e-04 | norm 0.1069 | dt 0.018
type train | step 3950 | loss 0.0247 0.0677 0.2637 0.4197 0.8069 | lr 5.9e-04 | norm 0.1500 | dt 0.018
type train | step 3960 | loss 0.0248 0.0679 0.2696 0.4260 0.8121 | lr 5.8e-04 | norm 0.1103 | dt 0.018
type train | step 3970 | loss 0.0251 0.0682 0.2652 0.4260 0.8150 | lr 5.8e-04 | norm 0.1289 | dt 0.018
type train | step 3980 | loss 0.0249 0.0679 0.2642 0.4235 0.8125 | lr 5.8e-04 | norm 0.1020 | dt 0.018
type train | step 3990 | loss 0.0248 0.0677 0.2666 0.4276 0.8098 | lr 5.8e-04 | norm 0.0951 | dt 0.018
type train | step 4000 | loss 0.0246 0.0672 0.2647 0.4248 0.8077 | lr 5.8e-04 | norm 0.0750 | dt 0.018
type train | step 4010 | loss 0.0248 0.0676 0.2656 0.4231 0.8107 | lr 5.7e-04 | norm 0.0877 | dt 0.018
type train | step 4020 | loss 0.0248 0.0676 0.2625 0.4184 0.8044 | lr 5.7e-04 | norm 0.0742 | dt 0.019
type train | step 4030 | loss 0.0247 0.0675 0.2659 0.4245 0.8109 | lr 5.7e-04 | norm 0.0814 | dt 0.018
type train | step 4040 | loss 0.0251 0.0683 0.2665 0.4325 0.8205 | lr 5.7e-04 | norm 0.1041 | dt 0.018
type train | step 4050 | loss 0.0247 0.0672 0.2656 0.4243 0.8061 | lr 5.7e-04 | norm 0.0868 | dt 0.018
type train | step 4060 | loss 0.0246 0.0680 0.2632 0.4214 0.8054 | lr 5.6e-04 | norm 0.0861 | dt 0.018
type train | step 4070 | loss 0.0248 0.0676 0.2656 0.4250 0.8081 | lr 5.6e-04 | norm 0.0970 | dt 0.018
type train | step 4080 | loss 0.0248 0.0679 0.2652 0.4242 0.8123 | lr 5.6e-04 | norm 0.0984 | dt 0.020
type train | step 4090 | loss 0.0247 0.0676 0.2678 0.4282 0.8130 | lr 5.6e-04 | norm 0.1264 | dt 0.019
type train | step 4100 | loss 0.0247 0.0675 0.2631 0.4206 0.8082 | lr 5.6e-04 | norm 0.1411 | dt 0.032
type train | step 4110 | loss 0.0248 0.0677 0.2662 0.4260 0.8085 | lr 5.5e-04 | norm 0.1314 | dt 0.018
type train | step 4120 | loss 0.0247 0.0675 0.2611 0.4173 0.8016 | lr 5.5e-04 | norm 0.0841 | dt 0.018
type train | step 4130 | loss 0.0249 0.0676 0.2653 0.4266 0.8068 | lr 5.5e-04 | norm 0.1351 | dt 0.018
type train | step 4140 | loss 0.0248 0.0677 0.2655 0.4246 0.8124 | lr 5.5e-04 | norm 0.1007 | dt 0.018
type train | step 4150 | loss 0.0247 0.0676 0.2658 0.4273 0.8114 | lr 5.4e-04 | norm 0.0805 | dt 0.018
type train | step 4160 | loss 0.0248 0.0677 0.2654 0.4256 0.8091 | lr 5.4e-04 | norm 0.1037 | dt 0.018
type train | step 4170 | loss 0.0247 0.0675 0.2666 0.4279 0.8095 | lr 5.4e-04 | norm 0.1050 | dt 0.018
type train | step 4180 | loss 0.0247 0.0674 0.2653 0.4292 0.8084 | lr 5.4e-04 | norm 0.1262 | dt 0.018
type train | step 4190 | loss 0.0251 0.0677 0.2667 0.4301 0.8133 | lr 5.4e-04 | norm 0.0800 | dt 0.020
type train | step 4200 | loss 0.0246 0.0669 0.2639 0.4208 0.8053 | lr 5.3e-04 | norm 0.0777 | dt 0.018
type train | step 4210 | loss 0.0246 0.0672 0.2672 0.4277 0.8100 | lr 5.3e-04 | norm 0.1233 | dt 0.018
type train | step 4220 | loss 0.0249 0.0679 0.2654 0.4267 0.8116 | lr 5.3e-04 | norm 0.1500 | dt 0.018
type train | step 4230 | loss 0.0248 0.0673 0.2670 0.4316 0.8138 | lr 5.3e-04 | norm 0.1110 | dt 0.018
type train | step 4240 | loss 0.0246 0.0679 0.2641 0.4228 0.8096 | lr 5.3e-04 | norm 0.0961 | dt 0.018
type train | step 4250 | loss 0.0249 0.0675 0.2658 0.4272 0.8078 | lr 5.2e-04 | norm 0.1170 | dt 0.018
type train | step 4260 | loss 0.0245 0.0666 0.2643 0.4196 0.8027 | lr 5.2e-04 | norm 0.0820 | dt 0.018
type train | step 4270 | loss 0.0247 0.0672 0.2658 0.4270 0.8099 | lr 5.2e-04 | norm 0.0795 | dt 0.020
type train | step 4280 | loss 0.0247 0.0672 0.2655 0.4229 0.8065 | lr 5.2e-04 | norm 0.1829 | dt 0.018
type train | step 4290 | loss 0.0246 0.0671 0.2667 0.4274 0.8116 | lr 5.2e-04 | norm 0.1070 | dt 0.018
type train | step 4300 | loss 0.0246 0.0673 0.2619 0.4213 0.8046 | lr 5.1e-04 | norm 0.1116 | dt 0.018
type train | step 4310 | loss 0.0251 0.0683 0.2671 0.4351 0.8189 | lr 5.1e-04 | norm 0.1031 | dt 0.018
type train | step 4320 | loss 0.0246 0.0671 0.2649 0.4181 0.8039 | lr 5.1e-04 | norm 0.0903 | dt 0.018
type train | step 4330 | loss 0.0244 0.0669 0.2637 0.4203 0.8043 | lr 5.1e-04 | norm 0.1022 | dt 0.018
type train | step 4340 | loss 0.0246 0.0669 0.2641 0.4211 0.8041 | lr 5.1e-04 | norm 0.2018 | dt 0.018
type train | step 4350 | loss 0.0248 0.0678 0.2684 0.4353 0.8193 | lr 5.0e-04 | norm 0.1131 | dt 0.018
type train | step 4360 | loss 0.0246 0.0674 0.2630 0.4227 0.8080 | lr 5.0e-04 | norm 0.0864 | dt 0.018
type train | step 4370 | loss 0.0245 0.0667 0.2614 0.4173 0.7991 | lr 5.0e-04 | norm 0.0848 | dt 0.018
type train | step 4380 | loss 0.0247 0.0675 0.2646 0.4210 0.8058 | lr 5.0e-04 | norm 0.0902 | dt 0.018
type train | step 4390 | loss 0.0249 0.0680 0.2658 0.4225 0.8096 | lr 4.9e-04 | norm 0.0826 | dt 0.018
type train | step 4400 | loss 0.0247 0.0673 0.2640 0.4231 0.8078 | lr 4.9e-04 | norm 0.1302 | dt 0.018
type train | step 4410 | loss 0.0252 0.0688 0.2691 0.4328 0.8152 | lr 4.9e-04 | norm 0.1419 | dt 0.018
type train | step 4420 | loss 0.0247 0.0678 0.2653 0.4257 0.8134 | lr 4.9e-04 | norm 0.0992 | dt 0.018
type train | step 4430 | loss 0.0247 0.0672 0.2645 0.4236 0.8073 | lr 4.9e-04 | norm 0.0933 | dt 0.018
type train | step 4440 | loss 0.0244 0.0668 0.2649 0.4183 0.8021 | lr 4.8e-04 | norm 0.1133 | dt 0.018
type train | step 4450 | loss 0.0248 0.0676 0.2663 0.4251 0.8095 | lr 4.8e-04 | norm 0.0796 | dt 0.018
type train | step 4460 | loss 0.0246 0.0674 0.2648 0.4226 0.8096 | lr 4.8e-04 | norm 0.1488 | dt 0.018
type train | step 4470 | loss 0.0249 0.0682 0.2681 0.4331 0.8170 | lr 4.8e-04 | norm 0.1116 | dt 0.018
type train | step 4480 | loss 0.0249 0.0678 0.2638 0.4252 0.8095 | lr 4.8e-04 | norm 0.0845 | dt 0.018
type train | step 4490 | loss 0.0246 0.0669 0.2643 0.4235 0.8029 | lr 4.7e-04 | norm 0.0780 | dt 0.018
type train | step 4500 | loss 0.0244 0.0665 0.2642 0.4194 0.8005 | lr 4.7e-04 | norm 0.1204 | dt 0.018
type train | step 4510 | loss 0.0247 0.0677 0.2689 0.4277 0.8109 | lr 4.7e-04 | norm 0.1049 | dt 0.018
type train | step 4520 | loss 0.0247 0.0672 0.2633 0.4201 0.8065 | lr 4.7e-04 | norm 0.0785 | dt 0.018
type train | step 4530 | loss 0.0247 0.0668 0.2643 0.4209 0.8045 | lr 4.7e-04 | norm 0.1488 | dt 0.018
type train | step 4540 | loss 0.0247 0.0676 0.2637 0.4219 0.8032 | lr 4.6e-04 | norm 0.0723 | dt 0.018
type train | step 4550 | loss 0.0249 0.0677 0.2648 0.4276 0.8150 | lr 4.6e-04 | norm 0.0864 | dt 0.018
type train | step 4560 | loss 0.0246 0.0671 0.2629 0.4184 0.8045 | lr 4.6e-04 | norm 0.1605 | dt 0.018
type train | step 4570 | loss 0.0246 0.0673 0.2687 0.4246 0.8099 | lr 4.6e-04 | norm 0.0874 | dt 0.018
type train | step 4580 | loss 0.0250 0.0676 0.2643 0.4246 0.8128 | lr 4.6e-04 | norm 0.1439 | dt 0.019
type train | step 4590 | loss 0.0247 0.0674 0.2633 0.4221 0.8102 | lr 4.5e-04 | norm 0.1090 | dt 0.018
type train | step 4600 | loss 0.0247 0.0672 0.2658 0.4263 0.8077 | lr 4.5e-04 | norm 0.0854 | dt 0.018
type train | step 4610 | loss 0.0245 0.0667 0.2638 0.4236 0.8056 | lr 4.5e-04 | norm 0.0693 | dt 0.018
type train | step 4620 | loss 0.0247 0.0671 0.2647 0.4217 0.8084 | lr 4.5e-04 | norm 0.0992 | dt 0.018
type train | step 4630 | loss 0.0247 0.0671 0.2617 0.4171 0.8023 | lr 4.5e-04 | norm 0.0688 | dt 0.018
type train | step 4640 | loss 0.0246 0.0671 0.2651 0.4232 0.8088 | lr 4.4e-04 | norm 0.0800 | dt 0.018
type train | step 4650 | loss 0.0249 0.0678 0.2656 0.4311 0.8182 | lr 4.4e-04 | norm 0.1399 | dt 0.018
type train | step 4660 | loss 0.0246 0.0667 0.2649 0.4231 0.8041 | lr 4.4e-04 | norm 0.0713 | dt 0.018
type train | step 4670 | loss 0.0245 0.0676 0.2625 0.4202 0.8033 | lr 4.4e-04 | norm 0.0737 | dt 0.018
type train | step 4680 | loss 0.0247 0.0671 0.2648 0.4238 0.8060 | lr 4.4e-04 | norm 0.1079 | dt 0.018
type train | step 4690 | loss 0.0247 0.0675 0.2644 0.4231 0.8102 | lr 4.3e-04 | norm 0.0948 | dt 0.018
type train | step 4700 | loss 0.0246 0.0671 0.2670 0.4268 0.8109 | lr 4.3e-04 | norm 0.1103 | dt 0.018
type train | step 4710 | loss 0.0246 0.0670 0.2623 0.4192 0.8060 | lr 4.3e-04 | norm 0.1020 | dt 0.018
type train | step 4720 | loss 0.0247 0.0673 0.2655 0.4247 0.8063 | lr 4.3e-04 | norm 0.0794 | dt 0.018
type train | step 4730 | loss 0.0246 0.0670 0.2605 0.4163 0.7996 | lr 4.2e-04 | norm 0.0718 | dt 0.018
type train | step 4740 | loss 0.0248 0.0671 0.2646 0.4256 0.8050 | lr 4.2e-04 | norm 0.0970 | dt 0.018
type train | step 4750 | loss 0.0247 0.0672 0.2647 0.4233 0.8104 | lr 4.2e-04 | norm 0.0694 | dt 0.018
type train | step 4760 | loss 0.0246 0.0671 0.2650 0.4261 0.8096 | lr 4.2e-04 | norm 0.0930 | dt 0.018
type train | step 4770 | loss 0.0247 0.0672 0.2646 0.4243 0.8070 | lr 4.2e-04 | norm 0.1049 | dt 0.018
type train | step 4780 | loss 0.0246 0.0670 0.2658 0.4268 0.8075 | lr 4.1e-04 | norm 0.0971 | dt 0.018
type train | step 4790 | loss 0.0246 0.0669 0.2645 0.4280 0.8064 | lr 4.1e-04 | norm 0.0798 | dt 0.018
type train | step 4800 | loss 0.0250 0.0673 0.2661 0.4290 0.8115 | lr 4.1e-04 | norm 0.0919 | dt 0.018
type train | step 4810 | loss 0.0245 0.0665 0.2632 0.4197 0.8035 | lr 4.1e-04 | norm 0.1002 | dt 0.018
type train | step 4820 | loss 0.0245 0.0667 0.2664 0.4264 0.8080 | lr 4.1e-04 | norm 0.1121 | dt 0.018
type train | step 4830 | loss 0.0248 0.0675 0.2646 0.4252 0.8092 | lr 4.0e-04 | norm 0.1167 | dt 0.018
type train | step 4840 | loss 0.0247 0.0669 0.2663 0.4304 0.8118 | lr 4.0e-04 | norm 0.0842 | dt 0.018
type train | step 4850 | loss 0.0245 0.0675 0.2634 0.4215 0.8075 | lr 4.0e-04 | norm 0.1050 | dt 0.018
type train | step 4860 | loss 0.0248 0.0670 0.2652 0.4261 0.8059 | lr 4.0e-04 | norm 0.1164 | dt 0.018
type train | step 4870 | loss 0.0244 0.0662 0.2636 0.4185 0.8008 | lr 4.0e-04 | norm 0.0734 | dt 0.018
type train | step 4880 | loss 0.0246 0.0668 0.2652 0.4259 0.8082 | lr 4.0e-04 | norm 0.0832 | dt 0.019
type train | step 4890 | loss 0.0246 0.0667 0.2649 0.4218 0.8045 | lr 3.9e-04 | norm 0.1420 | dt 0.018
type train | step 4900 | loss 0.0245 0.0666 0.2660 0.4263 0.8097 | lr 3.9e-04 | norm 0.0881 | dt 0.018
type train | step 4910 | loss 0.0245 0.0668 0.2613 0.4201 0.8026 | lr 3.9e-04 | norm 0.0748 | dt 0.018
type train | step 4920 | loss 0.0250 0.0678 0.2664 0.4340 0.8171 | lr 3.9e-04 | norm 0.0944 | dt 0.018
type train | step 4930 | loss 0.0245 0.0667 0.2642 0.4170 0.8020 | lr 3.9e-04 | norm 0.0747 | dt 0.018
type train | step 4940 | loss 0.0243 0.0665 0.2630 0.4193 0.8023 | lr 3.8e-04 | norm 0.1016 | dt 0.018
type train | step 4950 | loss 0.0245 0.0665 0.2634 0.4202 0.8024 | lr 3.8e-04 | norm 0.0966 | dt 0.018
type train | step 4960 | loss 0.0247 0.0674 0.2676 0.4342 0.8175 | lr 3.8e-04 | norm 0.0884 | dt 0.018
type train | step 4970 | loss 0.0245 0.0670 0.2623 0.4216 0.8061 | lr 3.8e-04 | norm 0.0804 | dt 0.018
type train | step 4980 | loss 0.0244 0.0663 0.2607 0.4164 0.7975 | lr 3.8e-04 | norm 0.1127 | dt 0.018
type train | step 4990 | loss 0.0246 0.0671 0.2638 0.4200 0.8041 | lr 3.7e-04 | norm 0.1030 | dt 0.018
type train | step 5000 | loss 0.0248 0.0676 0.2652 0.4216 0.8080 | lr 3.7e-04 | norm 0.0718 | dt 0.018
type train | step 5010 | loss 0.0247 0.0669 0.2634 0.4222 0.8063 | lr 3.7e-04 | norm 0.1279 | dt 0.018
type train | step 5020 | loss 0.0251 0.0684 0.2682 0.4314 0.8132 | lr 3.7e-04 | norm 0.1176 | dt 0.018
type train | step 5030 | loss 0.0247 0.0673 0.2647 0.4245 0.8116 | lr 3.7e-04 | norm 0.0933 | dt 0.018
type train | step 5040 | loss 0.0247 0.0667 0.2639 0.4228 0.8057 | lr 3.6e-04 | norm 0.0863 | dt 0.018
type train | step 5050 | loss 0.0243 0.0664 0.2642 0.4174 0.8003 | lr 3.6e-04 | norm 0.0745 | dt 0.018
type train | step 5060 | loss 0.0247 0.0672 0.2657 0.4241 0.8080 | lr 3.6e-04 | norm 0.0795 | dt 0.018
type train | step 5070 | loss 0.0245 0.0669 0.2642 0.4217 0.8081 | lr 3.6e-04 | norm 0.0957 | dt 0.018
type train | step 5080 | loss 0.0248 0.0678 0.2674 0.4320 0.8153 | lr 3.6e-04 | norm 0.1075 | dt 0.018
type train | step 5090 | loss 0.0248 0.0673 0.2632 0.4243 0.8081 | lr 3.5e-04 | norm 0.1007 | dt 0.018
type train | step 5100 | loss 0.0245 0.0665 0.2637 0.4227 0.8014 | lr 3.5e-04 | norm 0.1066 | dt 0.018
type train | step 5110 | loss 0.0243 0.0661 0.2636 0.4185 0.7990 | lr 3.5e-04 | norm 0.0798 | dt 0.018
type train | step 5120 | loss 0.0246 0.0673 0.2683 0.4268 0.8093 | lr 3.5e-04 | norm 0.0937 | dt 0.018
type train | step 5130 | loss 0.0246 0.0669 0.2627 0.4193 0.8052 | lr 3.5e-04 | norm 0.1340 | dt 0.018
type train | step 5140 | loss 0.0246 0.0665 0.2637 0.4199 0.8029 | lr 3.5e-04 | norm 0.1096 | dt 0.018
type train | step 5150 | loss 0.0246 0.0672 0.2631 0.4211 0.8017 | lr 3.4e-04 | norm 0.1091 | dt 0.018
type train | step 5160 | loss 0.0248 0.0672 0.2641 0.4266 0.8136 | lr 3.4e-04 | norm 0.1464 | dt 0.018
type train | step 5170 | loss 0.0245 0.0667 0.2622 0.4174 0.8027 | lr 3.4e-04 | norm 0.1186 | dt 0.018
type train | step 5180 | loss 0.0246 0.0670 0.2681 0.4237 0.8083 | lr 3.4e-04 | norm 0.0961 | dt 0.018
type train | step 5190 | loss 0.0249 0.0672 0.2637 0.4236 0.8112 | lr 3.4e-04 | norm 0.0903 | dt 0.019
type train | step 5200 | loss 0.0247 0.0671 0.2627 0.4211 0.8086 | lr 3.3e-04 | norm 0.1437 | dt 0.018
type train | step 5210 | loss 0.0246 0.0669 0.2652 0.4254 0.8063 | lr 3.3e-04 | norm 0.1167 | dt 0.018
type train | step 5220 | loss 0.0244 0.0664 0.2633 0.4227 0.8044 | lr 3.3e-04 | norm 0.1023 | dt 0.018
type train | step 5230 | loss 0.0246 0.0667 0.2641 0.4206 0.8067 | lr 3.3e-04 | norm 0.1018 | dt 0.018
type train | step 5240 | loss 0.0246 0.0667 0.2611 0.4161 0.8008 | lr 3.3e-04 | norm 0.0716 | dt 0.018
type train | step 5250 | loss 0.0245 0.0668 0.2645 0.4223 0.8074 | lr 3.3e-04 | norm 0.1436 | dt 0.018
type train | step 5260 | loss 0.0249 0.0675 0.2649 0.4301 0.8166 | lr 3.2e-04 | norm 0.0805 | dt 0.027
type train | step 5270 | loss 0.0245 0.0664 0.2644 0.4223 0.8028 | lr 3.2e-04 | norm 0.1088 | dt 0.018
type train | step 5280 | loss 0.0245 0.0672 0.2621 0.4194 0.8019 | lr 3.2e-04 | norm 0.0791 | dt 0.019
type train | step 5290 | loss 0.0246 0.0667 0.2642 0.4228 0.8045 | lr 3.2e-04 | norm 0.0790 | dt 0.018
type train | step 5300 | loss 0.0246 0.0671 0.2637 0.4222 0.8088 | lr 3.2e-04 | norm 0.0892 | dt 0.018
type train | step 5310 | loss 0.0246 0.0667 0.2665 0.4258 0.8095 | lr 3.1e-04 | norm 0.0893 | dt 0.018
type train | step 5320 | loss 0.0245 0.0667 0.2617 0.4182 0.8046 | lr 3.1e-04 | norm 0.1332 | dt 0.018
type train | step 5330 | loss 0.0246 0.0670 0.2649 0.4238 0.8051 | lr 3.1e-04 | norm 0.1486 | dt 0.018
type train | step 5340 | loss 0.0245 0.0667 0.2600 0.4155 0.7984 | lr 3.1e-04 | norm 0.1217 | dt 0.018
type train | step 5350 | loss 0.0248 0.0668 0.2641 0.4248 0.8040 | lr 3.1e-04 | norm 0.0671 | dt 0.018
type train | step 5360 | loss 0.0246 0.0669 0.2641 0.4223 0.8090 | lr 3.1e-04 | norm 0.0733 | dt 0.018
type train | step 5370 | loss 0.0245 0.0668 0.2646 0.4252 0.8082 | lr 3.0e-04 | norm 0.1081 | dt 0.018
type train | step 5380 | loss 0.0246 0.0669 0.2640 0.4233 0.8054 | lr 3.0e-04 | norm 0.0792 | dt 0.018
type train | step 5390 | loss 0.0246 0.0667 0.2653 0.4260 0.8062 | lr 3.0e-04 | norm 0.0953 | dt 0.018
type train | step 5400 | loss 0.0245 0.0666 0.2641 0.4271 0.8050 | lr 3.0e-04 | norm 0.0942 | dt 0.018
type train | step 5410 | loss 0.0249 0.0669 0.2656 0.4282 0.8104 | lr 3.0e-04 | norm 0.0795 | dt 0.018
type train | step 5420 | loss 0.0244 0.0662 0.2627 0.4188 0.8022 | lr 2.9e-04 | norm 0.0800 | dt 0.018
type train | step 5430 | loss 0.0244 0.0665 0.2659 0.4254 0.8065 | lr 2.9e-04 | norm 0.1116 | dt 0.019
type train | step 5440 | loss 0.0248 0.0671 0.2640 0.4240 0.8076 | lr 2.9e-04 | norm 0.1083 | dt 0.018
type train | step 5450 | loss 0.0246 0.0666 0.2657 0.4295 0.8106 | lr 2.9e-04 | norm 0.1183 | dt 0.018
type train | step 5460 | loss 0.0244 0.0672 0.2628 0.4206 0.8061 | lr 2.9e-04 | norm 0.1204 | dt 0.018
type train | step 5470 | loss 0.0247 0.0667 0.2646 0.4253 0.8048 | lr 2.9e-04 | norm 0.0909 | dt 0.018
type train | step 5480 | loss 0.0243 0.0659 0.2631 0.4176 0.7996 | lr 2.8e-04 | norm 0.0657 | dt 0.018
type train | step 5490 | loss 0.0245 0.0665 0.2647 0.4251 0.8071 | lr 2.8e-04 | norm 0.0938 | dt 0.021
type train | step 5500 | loss 0.0245 0.0664 0.2644 0.4210 0.8033 | lr 2.8e-04 | norm 0.1031 | dt 0.018
type train | step 5510 | loss 0.0245 0.0664 0.2654 0.4254 0.8085 | lr 2.8e-04 | norm 0.0983 | dt 0.018
type train | step 5520 | loss 0.0244 0.0666 0.2608 0.4193 0.8012 | lr 2.8e-04 | norm 0.0798 | dt 0.018
type train | step 5530 | loss 0.0249 0.0674 0.2658 0.4332 0.8159 | lr 2.8e-04 | norm 0.0897 | dt 0.018
type train | step 5540 | loss 0.0244 0.0664 0.2637 0.4163 0.8008 | lr 2.7e-04 | norm 0.0811 | dt 0.018
type train | step 5550 | loss 0.0243 0.0662 0.2625 0.4185 0.8011 | lr 2.7e-04 | norm 0.1083 | dt 0.018
type train | step 5560 | loss 0.0244 0.0662 0.2630 0.4196 0.8013 | lr 2.7e-04 | norm 0.0806 | dt 0.018
type train | step 5570 | loss 0.0247 0.0671 0.2670 0.4332 0.8162 | lr 2.7e-04 | norm 0.0687 | dt 0.018
type train | step 5580 | loss 0.0244 0.0667 0.2618 0.4208 0.8047 | lr 2.7e-04 | norm 0.0825 | dt 0.018
type train | step 5590 | loss 0.0244 0.0660 0.2603 0.4158 0.7963 | lr 2.7e-04 | norm 0.0774 | dt 0.018
type train | step 5600 | loss 0.0245 0.0668 0.2633 0.4192 0.8028 | lr 2.6e-04 | norm 0.0683 | dt 0.018
type train | step 5610 | loss 0.0247 0.0673 0.2647 0.4209 0.8069 | lr 2.6e-04 | norm 0.0703 | dt 0.018
type train | step 5620 | loss 0.0246 0.0666 0.2630 0.4216 0.8053 | lr 2.6e-04 | norm 0.1344 | dt 0.018
type train | step 5630 | loss 0.0250 0.0681 0.2675 0.4303 0.8118 | lr 2.6e-04 | norm 0.1066 | dt 0.018
type train | step 5640 | loss 0.0246 0.0671 0.2641 0.4236 0.8101 | lr 2.6e-04 | norm 0.0919 | dt 0.018
type train | step 5650 | loss 0.0246 0.0665 0.2634 0.4221 0.8046 | lr 2.6e-04 | norm 0.1015 | dt 0.018
type train | step 5660 | loss 0.0242 0.0662 0.2637 0.4166 0.7990 | lr 2.6e-04 | norm 0.0884 | dt 0.018
type train | step 5670 | loss 0.0247 0.0669 0.2652 0.4234 0.8069 | lr 2.5e-04 | norm 0.0803 | dt 0.018
type train | step 5680 | loss 0.0244 0.0667 0.2637 0.4210 0.8070 | lr 2.5e-04 | norm 0.0921 | dt 0.018
type train | step 5690 | loss 0.0248 0.0675 0.2668 0.4311 0.8139 | lr 2.5e-04 | norm 0.0781 | dt 0.018
type train | step 5700 | loss 0.0247 0.0671 0.2627 0.4236 0.8070 | lr 2.5e-04 | norm 0.0782 | dt 0.018
type train | step 5710 | loss 0.0245 0.0662 0.2632 0.4220 0.8002 | lr 2.5e-04 | norm 0.0887 | dt 0.018
type train | step 5720 | loss 0.0243 0.0658 0.2631 0.4177 0.7979 | lr 2.5e-04 | norm 0.0868 | dt 0.018
type train | step 5730 | loss 0.0245 0.0670 0.2679 0.4260 0.8080 | lr 2.4e-04 | norm 0.0762 | dt 0.018
type train | step 5740 | loss 0.0246 0.0666 0.2623 0.4187 0.8040 | lr 2.4e-04 | norm 0.0959 | dt 0.018
type train | step 5750 | loss 0.0246 0.0662 0.2632 0.4191 0.8018 | lr 2.4e-04 | norm 0.0836 | dt 0.018
type train | step 5760 | loss 0.0246 0.0670 0.2627 0.4204 0.8006 | lr 2.4e-04 | norm 0.0628 | dt 0.018
type train | step 5770 | loss 0.0248 0.0669 0.2636 0.4259 0.8124 | lr 2.4e-04 | norm 0.0870 | dt 0.018
type train | step 5780 | loss 0.0244 0.0664 0.2617 0.4165 0.8014 | lr 2.4e-04 | norm 0.0837 | dt 0.018
type train | step 5790 | loss 0.0245 0.0668 0.2677 0.4230 0.8072 | lr 2.4e-04 | norm 0.0863 | dt 0.018
type train | step 5800 | loss 0.0248 0.0670 0.2632 0.4228 0.8101 | lr 2.3e-04 | norm 0.0727 | dt 0.018
type train | step 5810 | loss 0.0246 0.0668 0.2623 0.4203 0.8073 | lr 2.3e-04 | norm 0.0931 | dt 0.018
type train | step 5820 | loss 0.0245 0.0666 0.2648 0.4247 0.8052 | lr 2.3e-04 | norm 0.0719 | dt 0.018
type train | step 5830 | loss 0.0244 0.0662 0.2629 0.4220 0.8034 | lr 2.3e-04 | norm 0.0899 | dt 0.018
type train | step 5840 | loss 0.0245 0.0664 0.2636 0.4197 0.8056 | lr 2.3e-04 | norm 0.1086 | dt 0.018
type train | step 5850 | loss 0.0245 0.0665 0.2607 0.4155 0.7997 | lr 2.3e-04 | norm 0.0709 | dt 0.018
type train | step 5860 | loss 0.0245 0.0665 0.2641 0.4215 0.8063 | lr 2.2e-04 | norm 0.0932 | dt 0.018
type train | step 5870 | loss 0.0248 0.0673 0.2645 0.4293 0.8153 | lr 2.2e-04 | norm 0.0750 | dt 0.018
type train | step 5880 | loss 0.0244 0.0662 0.2641 0.4217 0.8018 | lr 2.2e-04 | norm 0.0748 | dt 0.018
type train | step 5890 | loss 0.0244 0.0670 0.2617 0.4188 0.8009 | lr 2.2e-04 | norm 0.1184 | dt 0.018
type train | step 5900 | loss 0.0246 0.0664 0.2638 0.4221 0.8033 | lr 2.2e-04 | norm 0.0719 | dt 0.018
type train | step 5910 | loss 0.0246 0.0669 0.2632 0.4216 0.8076 | lr 2.2e-04 | norm 0.0899 | dt 0.018
type train | step 5920 | loss 0.0245 0.0665 0.2661 0.4251 0.8083 | lr 2.2e-04 | norm 0.0686 | dt 0.018
type train | step 5930 | loss 0.0245 0.0665 0.2613 0.4176 0.8033 | lr 2.1e-04 | norm 0.0741 | dt 0.018
type train | step 5940 | loss 0.0246 0.0667 0.2645 0.4231 0.8038 | lr 2.1e-04 | norm 0.0802 | dt 0.018
type train | step 5950 | loss 0.0244 0.0665 0.2597 0.4149 0.7973 | lr 2.1e-04 | norm 0.0968 | dt 0.018
type train | step 5960 | loss 0.0247 0.0666 0.2637 0.4243 0.8031 | lr 2.1e-04 | norm 0.0650 | dt 0.018
type train | step 5970 | loss 0.0246 0.0666 0.2637 0.4216 0.8079 | lr 2.1e-04 | norm 0.0814 | dt 0.018
type train | step 5980 | loss 0.0245 0.0666 0.2642 0.4246 0.8072 | lr 2.1e-04 | norm 0.1038 | dt 0.018
type train | step 5990 | loss 0.0246 0.0667 0.2636 0.4226 0.8042 | lr 2.1e-04 | norm 0.0756 | dt 0.018
type train | step 6000 | loss 0.0245 0.0665 0.2649 0.4253 0.8052 | lr 2.1e-04 | norm 0.0716 | dt 0.018
type train | step 6010 | loss 0.0245 0.0664 0.2637 0.4265 0.8040 | lr 2.0e-04 | norm 0.0872 | dt 0.018
type train | step 6020 | loss 0.0248 0.0667 0.2652 0.4277 0.8096 | lr 2.0e-04 | norm 0.0686 | dt 0.018
type train | step 6030 | loss 0.0244 0.0660 0.2624 0.4182 0.8013 | lr 2.0e-04 | norm 0.0693 | dt 0.018
type train | step 6040 | loss 0.0244 0.0662 0.2656 0.4247 0.8056 | lr 2.0e-04 | norm 0.0888 | dt 0.018
type train | step 6050 | loss 0.0247 0.0670 0.2636 0.4232 0.8063 | lr 2.0e-04 | norm 0.0898 | dt 0.018
type train | step 6060 | loss 0.0245 0.0664 0.2653 0.4287 0.8095 | lr 2.0e-04 | norm 0.0999 | dt 0.018
type train | step 6070 | loss 0.0244 0.0669 0.2624 0.4199 0.8050 | lr 2.0e-04 | norm 0.1172 | dt 0.018
type train | step 6080 | loss 0.0247 0.0665 0.2642 0.4248 0.8038 | lr 1.9e-04 | norm 0.0762 | dt 0.018
type train | step 6090 | loss 0.0243 0.0657 0.2628 0.4171 0.7987 | lr 1.9e-04 | norm 0.0698 | dt 0.018
type train | step 6100 | loss 0.0244 0.0663 0.2644 0.4245 0.8063 | lr 1.9e-04 | norm 0.1158 | dt 0.019
type train | step 6110 | loss 0.0245 0.0662 0.2641 0.4204 0.8022 | lr 1.9e-04 | norm 0.0657 | dt 0.018
type train | step 6120 | loss 0.0244 0.0662 0.2651 0.4248 0.8075 | lr 1.9e-04 | norm 0.0812 | dt 0.018
type train | step 6130 | loss 0.0243 0.0664 0.2605 0.4186 0.8002 | lr 1.9e-04 | norm 0.0796 | dt 0.018
type train | step 6140 | loss 0.0248 0.0672 0.2654 0.4326 0.8150 | lr 1.9e-04 | norm 0.0990 | dt 0.018
type train | step 6150 | loss 0.0244 0.0663 0.2633 0.4157 0.7998 | lr 1.9e-04 | norm 0.0688 | dt 0.018
type train | step 6160 | loss 0.0242 0.0660 0.2621 0.4179 0.8002 | lr 1.8e-04 | norm 0.0990 | dt 0.018
type train | step 6170 | loss 0.0244 0.0660 0.2627 0.4192 0.8005 | lr 1.8e-04 | norm 0.0610 | dt 0.018
type train | step 6180 | loss 0.0246 0.0669 0.2667 0.4326 0.8152 | lr 1.8e-04 | norm 0.0670 | dt 0.018
type train | step 6190 | loss 0.0244 0.0665 0.2614 0.4202 0.8037 | lr 1.8e-04 | norm 0.0831 | dt 0.018
type train | step 6200 | loss 0.0243 0.0659 0.2599 0.4154 0.7955 | lr 1.8e-04 | norm 0.0733 | dt 0.018
type train | step 6210 | loss 0.0245 0.0666 0.2630 0.4186 0.8019 | lr 1.8e-04 | norm 0.0633 | dt 0.018
type train | step 6220 | loss 0.0247 0.0671 0.2644 0.4204 0.8060 | lr 1.8e-04 | norm 0.0638 | dt 0.018
type train | step 6230 | loss 0.0245 0.0664 0.2627 0.4211 0.8045 | lr 1.8e-04 | norm 0.0687 | dt 0.018
type train | step 6240 | loss 0.0250 0.0679 0.2671 0.4295 0.8106 | lr 1.8e-04 | norm 0.1011 | dt 0.018
type train | step 6250 | loss 0.0245 0.0668 0.2636 0.4230 0.8091 | lr 1.7e-04 | norm 0.0876 | dt 0.018
type train | step 6260 | loss 0.0245 0.0663 0.2631 0.4216 0.8039 | lr 1.7e-04 | norm 0.0731 | dt 0.018
type train | step 6270 | loss 0.0242 0.0660 0.2633 0.4160 0.7981 | lr 1.7e-04 | norm 0.0728 | dt 0.018
type train | step 6280 | loss 0.0246 0.0667 0.2649 0.4229 0.8060 | lr 1.7e-04 | norm 0.0801 | dt 0.018
type train | step 6290 | loss 0.0244 0.0665 0.2635 0.4206 0.8062 | lr 1.7e-04 | norm 0.0902 | dt 0.018
type train | step 6300 | loss 0.0247 0.0674 0.2665 0.4304 0.8128 | lr 1.7e-04 | norm 0.0782 | dt 0.018
type train | step 6310 | loss 0.0247 0.0669 0.2625 0.4232 0.8063 | lr 1.7e-04 | norm 0.0716 | dt 0.018
type train | step 6320 | loss 0.0244 0.0661 0.2629 0.4216 0.7996 | lr 1.7e-04 | norm 0.0924 | dt 0.018
type train | step 6330 | loss 0.0242 0.0657 0.2628 0.4171 0.7970 | lr 1.7e-04 | norm 0.0701 | dt 0.018
type train | step 6340 | loss 0.0245 0.0668 0.2676 0.4255 0.8073 | lr 1.6e-04 | norm 0.0691 | dt 0.018
type train | step 6350 | loss 0.0245 0.0664 0.2620 0.4182 0.8032 | lr 1.6e-04 | norm 0.0772 | dt 0.018
type train | step 6360 | loss 0.0245 0.0661 0.2630 0.4186 0.8010 | lr 1.6e-04 | norm 0.0767 | dt 0.018
type train | step 6370 | loss 0.0246 0.0668 0.2624 0.4200 0.7999 | lr 1.6e-04 | norm 0.0580 | dt 0.018
type train | step 6380 | loss 0.0247 0.0667 0.2633 0.4253 0.8116 | lr 1.6e-04 | norm 0.1024 | dt 0.018
type train | step 6390 | loss 0.0244 0.0662 0.2614 0.4158 0.8005 | lr 1.6e-04 | norm 0.0820 | dt 0.019
type train | step 6400 | loss 0.0245 0.0666 0.2674 0.4224 0.8064 | lr 1.6e-04 | norm 0.0726 | dt 0.018
type train | step 6410 | loss 0.0248 0.0668 0.2630 0.4223 0.8092 | lr 1.6e-04 | norm 0.0808 | dt 0.018
type train | step 6420 | loss 0.0246 0.0667 0.2621 0.4198 0.8064 | lr 1.6e-04 | norm 0.0889 | dt 0.018
type train | step 6430 | loss 0.0245 0.0664 0.2646 0.4242 0.8045 | lr 1.5e-04 | norm 0.0674 | dt 0.018
type train | step 6440 | loss 0.0243 0.0660 0.2626 0.4216 0.8026 | lr 1.5e-04 | norm 0.0588 | dt 0.018
type train | step 6450 | loss 0.0245 0.0663 0.2633 0.4190 0.8046 | lr 1.5e-04 | norm 0.0779 | dt 0.018
type train | step 6460 | loss 0.0245 0.0663 0.2604 0.4150 0.7990 | lr 1.5e-04 | norm 0.0815 | dt 0.018
type train | step 6470 | loss 0.0244 0.0663 0.2638 0.4211 0.8056 | lr 1.5e-04 | norm 0.0733 | dt 0.018
type train | step 6480 | loss 0.0247 0.0671 0.2642 0.4287 0.8144 | lr 1.5e-04 | norm 0.0789 | dt 0.018
type train | step 6490 | loss 0.0244 0.0660 0.2638 0.4212 0.8011 | lr 1.5e-04 | norm 0.0705 | dt 0.018
type train | step 6500 | loss 0.0244 0.0669 0.2615 0.4184 0.8001 | lr 1.5e-04 | norm 0.1031 | dt 0.018
type train | step 6510 | loss 0.0245 0.0663 0.2635 0.4216 0.8026 | lr 1.5e-04 | norm 0.0658 | dt 0.018
type train | step 6520 | loss 0.0245 0.0668 0.2629 0.4211 0.8068 | lr 1.5e-04 | norm 0.0850 | dt 0.018
type train | step 6530 | loss 0.0245 0.0663 0.2658 0.4246 0.8077 | lr 1.5e-04 | norm 0.0983 | dt 0.018
type train | step 6540 | loss 0.0244 0.0664 0.2610 0.4171 0.8024 | lr 1.4e-04 | norm 0.0805 | dt 0.018
type train | step 6550 | loss 0.0245 0.0666 0.2642 0.4226 0.8031 | lr 1.4e-04 | norm 0.0631 | dt 0.018
type train | step 6560 | loss 0.0244 0.0663 0.2594 0.4145 0.7966 | lr 1.4e-04 | norm 0.0866 | dt 0.018
type train | step 6570 | loss 0.0247 0.0665 0.2635 0.4241 0.8025 | lr 1.4e-04 | norm 0.0702 | dt 0.018
type train | step 6580 | loss 0.0245 0.0665 0.2634 0.4211 0.8072 | lr 1.4e-04 | norm 0.0639 | dt 0.018
type train | step 6590 | loss 0.0244 0.0665 0.2639 0.4241 0.8066 | lr 1.4e-04 | norm 0.0955 | dt 0.018
type train | step 6600 | loss 0.0245 0.0666 0.2633 0.4221 0.8034 | lr 1.4e-04 | norm 0.0773 | dt 0.018
type train | step 6610 | loss 0.0245 0.0664 0.2647 0.4248 0.8046 | lr 1.4e-04 | norm 0.0882 | dt 0.018
type train | step 6620 | loss 0.0244 0.0662 0.2635 0.4260 0.8034 | lr 1.4e-04 | norm 0.0855 | dt 0.018
type train | step 6630 | loss 0.0248 0.0665 0.2649 0.4274 0.8090 | lr 1.4e-04 | norm 0.0647 | dt 0.018
type train | step 6640 | loss 0.0243 0.0658 0.2622 0.4179 0.8006 | lr 1.4e-04 | norm 0.0714 | dt 0.018
type train | step 6650 | loss 0.0244 0.0660 0.2653 0.4242 0.8049 | lr 1.3e-04 | norm 0.1160 | dt 0.018
type train | step 6660 | loss 0.0247 0.0668 0.2633 0.4226 0.8054 | lr 1.3e-04 | norm 0.0947 | dt 0.018
type train | step 6670 | loss 0.0245 0.0663 0.2651 0.4282 0.8087 | lr 1.3e-04 | norm 0.0758 | dt 0.018
type train | step 6680 | loss 0.0243 0.0668 0.2621 0.4195 0.8042 | lr 1.3e-04 | norm 0.0889 | dt 0.018
type train | step 6690 | loss 0.0246 0.0663 0.2640 0.4244 0.8033 | lr 1.3e-04 | norm 0.0716 | dt 0.018
type train | step 6700 | loss 0.0242 0.0656 0.2626 0.4167 0.7981 | lr 1.3e-04 | norm 0.0730 | dt 0.018
type train | step 6710 | loss 0.0244 0.0662 0.2642 0.4241 0.8057 | lr 1.3e-04 | norm 0.1215 | dt 0.020
type train | step 6720 | loss 0.0244 0.0661 0.2639 0.4200 0.8016 | lr 1.3e-04 | norm 0.0754 | dt 0.018
type train | step 6730 | loss 0.0244 0.0660 0.2648 0.4243 0.8068 | lr 1.3e-04 | norm 0.0862 | dt 0.018
type train | step 6740 | loss 0.0243 0.0662 0.2602 0.4181 0.7994 | lr 1.3e-04 | norm 0.0720 | dt 0.018
type train | step 6750 | loss 0.0248 0.0671 0.2651 0.4322 0.8143 | lr 1.3e-04 | norm 0.0798 | dt 0.018
type train | step 6760 | loss 0.0243 0.0661 0.2631 0.4153 0.7991 | lr 1.3e-04 | norm 0.0732 | dt 0.018
type train | step 6770 | loss 0.0242 0.0659 0.2619 0.4176 0.7996 | lr 1.3e-04 | norm 0.0769 | dt 0.018
type train | step 6780 | loss 0.0243 0.0659 0.2625 0.4189 0.8001 | lr 1.3e-04 | norm 0.0725 | dt 0.018
type train | step 6790 | loss 0.0246 0.0668 0.2665 0.4321 0.8146 | lr 1.2e-04 | norm 0.0670 | dt 0.018
type train | step 6800 | loss 0.0244 0.0663 0.2611 0.4197 0.8030 | lr 1.2e-04 | norm 0.0723 | dt 0.018
type train | step 6810 | loss 0.0243 0.0657 0.2597 0.4150 0.7951 | lr 1.2e-04 | norm 0.0579 | dt 0.018
type train | step 6820 | loss 0.0244 0.0665 0.2628 0.4182 0.8012 | lr 1.2e-04 | norm 0.0604 | dt 0.020
type train | step 6830 | loss 0.0247 0.0670 0.2642 0.4200 0.8055 | lr 1.2e-04 | norm 0.0749 | dt 0.021
type train | step 6840 | loss 0.0245 0.0663 0.2625 0.4208 0.8041 | lr 1.2e-04 | norm 0.0945 | dt 0.019
type train | step 6850 | loss 0.0249 0.0678 0.2668 0.4290 0.8097 | lr 1.2e-04 | norm 0.1018 | dt 0.018
type train | step 6860 | loss 0.0245 0.0667 0.2634 0.4225 0.8083 | lr 1.2e-04 | norm 0.0773 | dt 0.018
type train | step 6870 | loss 0.0245 0.0661 0.2629 0.4213 0.8033 | lr 1.2e-04 | norm 0.0767 | dt 0.019
type train | step 6880 | loss 0.0242 0.0659 0.2632 0.4156 0.7975 | lr 1.2e-04 | norm 0.0731 | dt 0.018
type train | step 6890 | loss 0.0246 0.0666 0.2648 0.4225 0.8055 | lr 1.2e-04 | norm 0.0800 | dt 0.018
type train | step 6900 | loss 0.0244 0.0664 0.2633 0.4203 0.8057 | lr 1.2e-04 | norm 0.0895 | dt 0.018
type train | step 6910 | loss 0.0247 0.0673 0.2662 0.4299 0.8120 | lr 1.2e-04 | norm 0.0822 | dt 0.018
type train | step 6920 | loss 0.0247 0.0668 0.2623 0.4229 0.8057 | lr 1.2e-04 | norm 0.0681 | dt 0.018
type train | step 6930 | loss 0.0244 0.0660 0.2628 0.4213 0.7990 | lr 1.2e-04 | norm 0.0805 | dt 0.018
type train | step 6940 | loss 0.0242 0.0655 0.2626 0.4167 0.7964 | lr 1.2e-04 | norm 0.0699 | dt 0.018
type train | step 6950 | loss 0.0245 0.0667 0.2674 0.4251 0.8067 | lr 1.1e-04 | norm 0.0718 | dt 0.018
type train | step 6960 | loss 0.0245 0.0663 0.2619 0.4179 0.8027 | lr 1.1e-04 | norm 0.0608 | dt 0.018
type train | step 6970 | loss 0.0245 0.0660 0.2627 0.4183 0.8004 | lr 1.1e-04 | norm 0.0727 | dt 0.018
type train | step 6980 | loss 0.0245 0.0667 0.2622 0.4197 0.7994 | lr 1.1e-04 | norm 0.0568 | dt 0.018
type train | step 6990 | loss 0.0247 0.0666 0.2632 0.4250 0.8109 | lr 1.1e-04 | norm 0.0796 | dt 0.018
type train | step 7000 | loss 0.0244 0.0661 0.2612 0.4154 0.7999 | lr 1.1e-04 | norm 0.1025 | dt 0.018
type train | step 7010 | loss 0.0244 0.0665 0.2672 0.4221 0.8059 | lr 1.1e-04 | norm 0.0960 | dt 0.018
type train | step 7020 | loss 0.0248 0.0667 0.2628 0.4220 0.8087 | lr 1.1e-04 | norm 0.0906 | dt 0.019
type train | step 7030 | loss 0.0245 0.0666 0.2619 0.4194 0.8057 | lr 1.1e-04 | norm 0.0776 | dt 0.019
type train | step 7040 | loss 0.0245 0.0663 0.2644 0.4239 0.8039 | lr 1.1e-04 | norm 0.0648 | dt 0.018
type train | step 7050 | loss 0.0243 0.0659 0.2625 0.4213 0.8021 | lr 1.1e-04 | norm 0.0723 | dt 0.019
type train | step 7060 | loss 0.0244 0.0661 0.2632 0.4187 0.8039 | lr 1.1e-04 | norm 0.0942 | dt 0.018
type train | step 7070 | loss 0.0244 0.0662 0.2603 0.4147 0.7985 | lr 1.1e-04 | norm 0.0719 | dt 0.018
type train | step 7080 | loss 0.0244 0.0663 0.2636 0.4208 0.8050 | lr 1.1e-04 | norm 0.0741 | dt 0.024
type train | step 7090 | loss 0.0247 0.0670 0.2640 0.4284 0.8138 | lr 1.1e-04 | norm 0.0700 | dt 0.018
type train | step 7100 | loss 0.0244 0.0659 0.2637 0.4209 0.8007 | lr 1.1e-04 | norm 0.0687 | dt 0.018
type train | step 7110 | loss 0.0243 0.0667 0.2613 0.4181 0.7996 | lr 1.1e-04 | norm 0.1030 | dt 0.018
type train | step 7120 | loss 0.0245 0.0662 0.2634 0.4213 0.8020 | lr 1.1e-04 | norm 0.0637 | dt 0.018
type train | step 7130 | loss 0.0245 0.0666 0.2627 0.4208 0.8064 | lr 1.1e-04 | norm 0.0855 | dt 0.018
type train | step 7140 | loss 0.0244 0.0662 0.2657 0.4243 0.8072 | lr 1.1e-04 | norm 0.0570 | dt 0.018
type train | step 7150 | loss 0.0244 0.0662 0.2608 0.4168 0.8019 | lr 1.1e-04 | norm 0.0703 | dt 0.018
type train | step 7160 | loss 0.0245 0.0665 0.2640 0.4223 0.8026 | lr 1.1e-04 | norm 0.0628 | dt 0.018
type train | step 7170 | loss 0.0244 0.0662 0.2592 0.4143 0.7961 | lr 1.1e-04 | norm 0.0677 | dt 0.018
type train | step 7180 | loss 0.0246 0.0664 0.2633 0.4239 0.8021 | lr 1.0e-04 | norm 0.0634 | dt 0.019
type train | step 7190 | loss 0.0245 0.0664 0.2632 0.4208 0.8068 | lr 1.0e-04 | norm 0.0865 | dt 0.019
type train | step 7200 | loss 0.0244 0.0664 0.2638 0.4238 0.8061 | lr 1.0e-04 | norm 0.0634 | dt 0.018
type train | step 7210 | loss 0.0245 0.0665 0.2631 0.4218 0.8028 | lr 1.0e-04 | norm 0.0721 | dt 0.018
type train | step 7220 | loss 0.0244 0.0663 0.2645 0.4246 0.8042 | lr 1.0e-04 | norm 0.0794 | dt 0.018
type train | step 7230 | loss 0.0244 0.0661 0.2632 0.4257 0.8029 | lr 1.0e-04 | norm 0.0809 | dt 0.019
type train | step 7240 | loss 0.0248 0.0664 0.2647 0.4272 0.8087 | lr 1.0e-04 | norm 0.1074 | dt 0.018
type train | step 7250 | loss 0.0243 0.0658 0.2620 0.4176 0.8001 | lr 1.0e-04 | norm 0.0665 | dt 0.018
type train | step 7260 | loss 0.0243 0.0659 0.2651 0.4239 0.8044 | lr 1.0e-04 | norm 0.0879 | dt 0.018
type train | step 7270 | loss 0.0247 0.0667 0.2630 0.4222 0.8048 | lr 1.0e-04 | norm 0.0755 | dt 0.018
type train | step 7280 | loss 0.0245 0.0662 0.2649 0.4278 0.8083 | lr 1.0e-04 | norm 0.0935 | dt 0.018
type train | step 7290 | loss 0.0243 0.0666 0.2618 0.4192 0.8037 | lr 1.0e-04 | norm 0.0844 | dt 0.018
type train | step 7300 | loss 0.0246 0.0662 0.2638 0.4242 0.8029 | lr 1.0e-04 | norm 0.0651 | dt 0.018
type train | step 7310 | loss 0.0242 0.0655 0.2624 0.4164 0.7977 | lr 1.0e-04 | norm 0.0695 | dt 0.018
type train | step 7320 | loss 0.0244 0.0661 0.2640 0.4238 0.8054 | lr 1.0e-04 | norm 0.1089 | dt 0.020
type train | step 7330 | loss 0.0244 0.0660 0.2637 0.4198 0.8012 | lr 1.0e-04 | norm 0.0664 | dt 0.018
type train | step 7340 | loss 0.0244 0.0659 0.2647 0.4241 0.8063 | lr 1.0e-04 | norm 0.0732 | dt 0.018
type train | step 7350 | loss 0.0243 0.0661 0.2600 0.4179 0.7990 | lr 1.0e-04 | norm 0.0613 | dt 0.018
type train | step 7360 | loss 0.0248 0.0670 0.2650 0.4320 0.8140 | lr 1.0e-04 | norm 0.0764 | dt 0.018
type train | step 7370 | loss 0.0243 0.0660 0.2629 0.4151 0.7987 | lr 1.0e-04 | norm 0.0844 | dt 0.018
type train | step 7380 | loss 0.0242 0.0658 0.2617 0.4173 0.7993 | lr 1.0e-04 | norm 0.0911 | dt 0.020
type train | step 7390 | loss 0.0243 0.0658 0.2624 0.4188 0.7997 | lr 1.0e-04 | norm 0.0678 | dt 0.018
type train | step 7400 | loss 0.0246 0.0667 0.2663 0.4319 0.8141 | lr 1.0e-04 | norm 0.0663 | dt 0.018
type train | step 7410 | loss 0.0243 0.0662 0.2610 0.4195 0.8026 | lr 1.0e-04 | norm 0.0754 | dt 0.018
type train | step 7420 | loss 0.0243 0.0657 0.2596 0.4148 0.7948 | lr 1.0e-04 | norm 0.0655 | dt 0.018
type train | step 7430 | loss 0.0244 0.0664 0.2627 0.4180 0.8008 | lr 1.0e-04 | norm 0.0718 | dt 0.018
type train | step 7440 | loss 0.0246 0.0669 0.2640 0.4198 0.8052 | lr 1.0e-04 | norm 0.0885 | dt 0.018
type train | step 7450 | loss 0.0245 0.0662 0.2624 0.4206 0.8038 | lr 1.0e-04 | norm 0.1102 | dt 0.018
type train | step 7460 | loss 0.0249 0.0677 0.2666 0.4287 0.8093 | lr 1.0e-04 | norm 0.1042 | dt 0.018
type train | step 7470 | loss 0.0245 0.0666 0.2633 0.4223 0.8079 | lr 1.0e-04 | norm 0.0763 | dt 0.018
type train | step 7480 | loss 0.0245 0.0660 0.2628 0.4211 0.8030 | lr 1.0e-04 | norm 0.0867 | dt 0.018
type train | step 7490 | loss 0.0241 0.0658 0.2631 0.4154 0.7971 | lr 1.0e-04 | norm 0.0893 | dt 0.019
type train | step 7500 | loss 0.0246 0.0665 0.2647 0.4223 0.8051 | lr 1.0e-04 | norm 0.0932 | dt 0.018