mlplayer-top20 / eval.log
davidquarel's picture
Upload folder using huggingface_hub
33f2ad2 verified
type eval | step 0 | loss 387.9313 610.8442 1339.3440 1722.0432 | checkpoint False | ce_loss 1.5684 | sae_losses 386.5418 1.3896 600.4725 10.3716 1316.5112 22.8331 1626.5895 95.4537 | ce_loss_increases 3.3836 3.4417 3.2603 2.2772 | compound_ce_loss_increase 4.5394 | l0s 20.2021 20.1830 20.2392 20.2217 20.1722 20.2133 20.2299 20.2164 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 386.5418 1.3896 600.4725 10.3716 1316.5112 22.8331 1626.5895 95.4537
type eval | step 250 | loss 33.0915 75.8742 211.1147 306.7224 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 33.0753 0.0162 75.1329 0.7414 207.6419 3.4730 294.7512 11.9711 | ce_loss_increases 1.2073 2.9429 2.9480 2.2023 | compound_ce_loss_increase 4.1077 | l0s 20.0000 19.9999 20.0000 20.0000 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 33.0753 0.0162 75.1329 0.7414 207.6419 3.4730 294.7512 11.9711
type eval | step 500 | loss 0.6977 4.5824 15.3222 30.4926 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.6969 0.0008 4.5255 0.0568 15.0017 0.3205 29.5599 0.9327 | ce_loss_increases 0.0084 0.2676 0.5823 0.5003 | compound_ce_loss_increase 1.2268 | l0s 19.9999 19.9995 20.0000 19.9998 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.6969 0.0008 4.5255 0.0568 15.0017 0.3205 29.5599 0.9327
type eval | step 750 | loss 0.1569 1.3735 6.6969 19.4454 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1567 0.0002 1.3497 0.0239 6.5077 0.1893 18.8672 0.5783 | ce_loss_increases 0.0019 0.0822 0.2793 0.2757 | compound_ce_loss_increase 0.5771 | l0s 19.9990 19.9930 19.9999 19.9990 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1567 0.0002 1.3497 0.0239 6.5077 0.1893 18.8672 0.5783
type eval | step 1000 | loss 0.1087 0.8191 4.8886 16.8712 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1086 0.0001 0.8022 0.0170 4.7304 0.1581 16.3747 0.4965 | ce_loss_increases 0.0009 0.0484 0.2027 0.2317 | compound_ce_loss_increase 0.4395 | l0s 19.9999 19.9834 19.9994 19.9986 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1086 0.0001 0.8022 0.0170 4.7304 0.1581 16.3747 0.4965
type eval | step 1250 | loss 0.0914 0.6107 4.1955 15.7677 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0913 0.0001 0.5964 0.0142 4.0493 0.1462 15.3024 0.4654 | ce_loss_increases 0.0012 0.0375 0.1786 0.2135 | compound_ce_loss_increase 0.3842 | l0s 20.0000 19.9832 19.9994 19.9981 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0913 0.0001 0.5964 0.0142 4.0493 0.1462 15.3024 0.4654
type eval | step 1500 | loss 0.0820 0.5258 3.8269 14.9671 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0820 0.0001 0.5130 0.0128 3.6874 0.1395 14.5205 0.4466 | ce_loss_increases 0.0009 0.0298 0.1656 0.2049 | compound_ce_loss_increase 0.3545 | l0s 20.0000 19.9820 19.9996 19.9971 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0820 0.0001 0.5130 0.0128 3.6874 0.1395 14.5205 0.4466
type eval | step 1750 | loss 0.0758 0.4745 3.6008 14.4253 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0757 0.0001 0.4626 0.0119 3.4653 0.1355 13.9933 0.4321 | ce_loss_increases 0.0007 0.0262 0.1581 0.1995 | compound_ce_loss_increase 0.3461 | l0s 19.9999 19.9797 19.9997 19.9974 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0757 0.0001 0.4626 0.0119 3.4653 0.1355 13.9933 0.4321
type eval | step 2000 | loss 0.0697 0.4407 3.4374 14.0189 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0696 0.0001 0.4295 0.0112 3.3048 0.1326 13.5984 0.4205 | ce_loss_increases 0.0002 0.0247 0.1556 0.1888 | compound_ce_loss_increase 0.3336 | l0s 20.0000 19.9826 19.9999 19.9965 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0696 0.0001 0.4295 0.0112 3.3048 0.1326 13.5984 0.4205
type eval | step 2250 | loss 0.0654 0.4169 3.3140 13.6836 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0653 0.0001 0.4062 0.0107 3.1838 0.1302 13.2743 0.4094 | ce_loss_increases 0.0005 0.0231 0.1482 0.1845 | compound_ce_loss_increase 0.3276 | l0s 20.0000 19.9714 19.9999 19.9965 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0653 0.0001 0.4062 0.0107 3.1838 0.1302 13.2743 0.4094
type eval | step 2500 | loss 0.0620 0.4001 3.2210 13.3851 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0620 0.0001 0.3898 0.0103 3.0928 0.1282 12.9855 0.3996 | ce_loss_increases 0.0004 0.0222 0.1442 0.1822 | compound_ce_loss_increase 0.3253 | l0s 20.0000 19.9775 19.9999 19.9958 20.0000 19.9999 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0620 0.0001 0.3898 0.0103 3.0928 0.1282 12.9855 0.3996
type eval | step 2750 | loss 0.0590 0.3859 3.1295 13.1120 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0589 0.0001 0.3760 0.0099 3.0035 0.1260 12.7208 0.3912 | ce_loss_increases 0.0006 0.0192 0.1390 0.1784 | compound_ce_loss_increase 0.3138 | l0s 20.0000 19.9636 19.9999 19.9952 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0589 0.0001 0.3760 0.0099 3.0035 0.1260 12.7208 0.3912
type eval | step 3000 | loss 0.0568 0.3758 3.0728 12.9323 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0567 0.0001 0.3662 0.0097 2.9480 0.1248 12.5468 0.3854 | ce_loss_increases 0.0007 0.0190 0.1360 0.1768 | compound_ce_loss_increase 0.3091 | l0s 20.0000 19.9699 19.9999 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0567 0.0001 0.3662 0.0097 2.9480 0.1248 12.5468 0.3854
type eval | step 3250 | loss 0.0545 0.3663 3.0309 12.7911 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0545 0.0000 0.3569 0.0094 2.9071 0.1238 12.4111 0.3800 | ce_loss_increases 0.0005 0.0180 0.1361 0.1725 | compound_ce_loss_increase 0.3037 | l0s 20.0000 19.9669 19.9998 19.9947 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0545 0.0000 0.3569 0.0094 2.9071 0.1238 12.4111 0.3800
type eval | step 3500 | loss 0.0527 0.3602 3.0060 12.6633 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0527 0.0000 0.3509 0.0093 2.8828 0.1233 12.2880 0.3752 | ce_loss_increases 0.0005 0.0171 0.1384 0.1709 | compound_ce_loss_increase 0.3025 | l0s 19.9999 19.9667 19.9998 19.9943 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0527 0.0000 0.3509 0.0093 2.8828 0.1233 12.2880 0.3752
type eval | step 3750 | loss 0.0502 0.3503 2.9675 12.5276 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0502 0.0000 0.3412 0.0091 2.8450 0.1226 12.1567 0.3709 | ce_loss_increases 0.0004 0.0159 0.1367 0.1693 | compound_ce_loss_increase 0.2990 | l0s 20.0000 19.9664 19.9999 19.9944 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0502 0.0000 0.3412 0.0091 2.8450 0.1226 12.1567 0.3709
type eval | step 4000 | loss 0.0487 0.3438 2.9356 12.3813 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0486 0.0000 0.3349 0.0089 2.8140 0.1216 12.0151 0.3662 | ce_loss_increases 0.0001 0.0149 0.1350 0.1686 | compound_ce_loss_increase 0.3019 | l0s 20.0000 19.9625 20.0000 19.9944 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0486 0.0000 0.3349 0.0089 2.8140 0.1216 12.0151 0.3662
type eval | step 4250 | loss 0.0473 0.3393 2.9105 12.2700 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0472 0.0000 0.3305 0.0088 2.7896 0.1209 11.9069 0.3630 | ce_loss_increases 0.0006 0.0148 0.1340 0.1685 | compound_ce_loss_increase 0.3004 | l0s 20.0000 19.9623 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0472 0.0000 0.3305 0.0088 2.7896 0.1209 11.9069 0.3630
type eval | step 4500 | loss 0.0457 0.3336 2.8861 12.1730 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0456 0.0000 0.3249 0.0087 2.7659 0.1202 11.8128 0.3602 | ce_loss_increases 0.0004 0.0162 0.1320 0.1701 | compound_ce_loss_increase 0.3009 | l0s 20.0000 19.9621 20.0000 19.9944 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0456 0.0000 0.3249 0.0087 2.7659 0.1202 11.8128 0.3602
type eval | step 4750 | loss 0.0443 0.3296 2.8601 12.0779 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0443 0.0000 0.3210 0.0086 2.7408 0.1193 11.7206 0.3573 | ce_loss_increases 0.0003 0.0166 0.1323 0.1693 | compound_ce_loss_increase 0.2961 | l0s 19.9999 19.9571 19.9999 19.9942 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0443 0.0000 0.3210 0.0086 2.7408 0.1193 11.7206 0.3573
type eval | step 5000 | loss 0.0433 0.3255 2.8421 11.9861 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0433 0.0000 0.3169 0.0085 2.7237 0.1184 11.6309 0.3551 | ce_loss_increases 0.0004 0.0165 0.1307 0.1683 | compound_ce_loss_increase 0.2921 | l0s 20.0000 19.9602 20.0000 19.9941 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0433 0.0000 0.3169 0.0085 2.7237 0.1184 11.6309 0.3551
type eval | step 5250 | loss 0.0422 0.3204 2.8147 11.8916 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0422 0.0000 0.3119 0.0084 2.6977 0.1171 11.5394 0.3522 | ce_loss_increases 0.0001 0.0158 0.1313 0.1648 | compound_ce_loss_increase 0.2819 | l0s 19.9999 19.9698 19.9999 19.9934 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0422 0.0000 0.3119 0.0084 2.6977 0.1171 11.5394 0.3522
type eval | step 5500 | loss 0.0414 0.3166 2.8055 11.8453 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0414 0.0000 0.3082 0.0084 2.6888 0.1166 11.4948 0.3505 | ce_loss_increases 0.0003 0.0160 0.1306 0.1621 | compound_ce_loss_increase 0.2843 | l0s 19.9999 19.9685 20.0000 19.9932 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0414 0.0000 0.3082 0.0084 2.6888 0.1166 11.4948 0.3505
type eval | step 5750 | loss 0.0410 0.3151 2.8003 11.8083 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0410 0.0000 0.3068 0.0083 2.6841 0.1162 11.4595 0.3488 | ce_loss_increases 0.0005 0.0160 0.1318 0.1601 | compound_ce_loss_increase 0.2831 | l0s 19.9999 19.9402 20.0000 19.9933 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0410 0.0000 0.3068 0.0083 2.6841 0.1162 11.4595 0.3488
type eval | step 6000 | loss 0.0402 0.3137 2.7961 11.7815 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0402 0.0000 0.3054 0.0083 2.6801 0.1160 11.4350 0.3466 | ce_loss_increases 0.0002 0.0166 0.1311 0.1564 | compound_ce_loss_increase 0.2789 | l0s 19.9999 19.9489 20.0000 19.9927 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0402 0.0000 0.3054 0.0083 2.6801 0.1160 11.4350 0.3466
type eval | step 6250 | loss 0.0394 0.3095 2.7865 11.7699 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0394 0.0000 0.3012 0.0082 2.6707 0.1158 11.4256 0.3443 | ce_loss_increases 0.0005 0.0159 0.1297 0.1572 | compound_ce_loss_increase 0.2753 | l0s 19.9999 19.9624 20.0000 19.9931 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0394 0.0000 0.3012 0.0082 2.6707 0.1158 11.4256 0.3443
type eval | step 6500 | loss 0.0386 0.3072 2.7739 11.7269 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0386 0.0000 0.2990 0.0082 2.6586 0.1152 11.3852 0.3417 | ce_loss_increases 0.0004 0.0158 0.1257 0.1573 | compound_ce_loss_increase 0.2727 | l0s 20.0000 19.9603 20.0000 19.9943 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0386 0.0000 0.2990 0.0082 2.6586 0.1152 11.3852 0.3417
type eval | step 6750 | loss 0.0382 0.3052 2.7662 11.7014 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0381 0.0000 0.2970 0.0082 2.6515 0.1147 11.3615 0.3400 | ce_loss_increases 0.0003 0.0162 0.1232 0.1576 | compound_ce_loss_increase 0.2732 | l0s 19.9999 19.9540 20.0000 19.9947 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0381 0.0000 0.2970 0.0082 2.6515 0.1147 11.3615 0.3400
type eval | step 7000 | loss 0.0374 0.3029 2.7507 11.6883 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0374 0.0000 0.2947 0.0081 2.6362 0.1144 11.3504 0.3379 | ce_loss_increases 0.0004 0.0160 0.1219 0.1563 | compound_ce_loss_increase 0.2707 | l0s 19.9998 19.9452 20.0000 19.9943 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0374 0.0000 0.2947 0.0081 2.6362 0.1144 11.3504 0.3379
type eval | step 7250 | loss 0.0369 0.3006 2.7381 11.6585 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0368 0.0000 0.2926 0.0081 2.6240 0.1141 11.3225 0.3361 | ce_loss_increases 0.0003 0.0166 0.1227 0.1557 | compound_ce_loss_increase 0.2711 | l0s 19.9997 19.9503 20.0000 19.9939 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0368 0.0000 0.2926 0.0081 2.6240 0.1141 11.3225 0.3361
type eval | step 7500 | loss 0.0363 0.2981 2.7256 11.6233 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0363 0.0000 0.2901 0.0080 2.6119 0.1137 11.2894 0.3340 | ce_loss_increases 0.0004 0.0166 0.1212 0.1543 | compound_ce_loss_increase 0.2702 | l0s 19.9998 19.9562 20.0000 19.9945 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0363 0.0000 0.2901 0.0080 2.6119 0.1137 11.2894 0.3340
type eval | step 7750 | loss 0.0357 0.2961 2.7036 11.5744 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0357 0.0000 0.2881 0.0080 2.5906 0.1130 11.2431 0.3313 | ce_loss_increases 0.0003 0.0158 0.1199 0.1504 | compound_ce_loss_increase 0.2652 | l0s 19.9997 19.9602 20.0000 19.9942 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0357 0.0000 0.2881 0.0080 2.5906 0.1130 11.2431 0.3313
type eval | step 8000 | loss 0.0352 0.2944 2.6969 11.5568 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0352 0.0000 0.2864 0.0080 2.5842 0.1127 11.2273 0.3295 | ce_loss_increases 0.0003 0.0149 0.1184 0.1494 | compound_ce_loss_increase 0.2627 | l0s 19.9997 19.9494 20.0000 19.9942 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0352 0.0000 0.2864 0.0080 2.5842 0.1127 11.2273 0.3295
type eval | step 8250 | loss 0.0351 0.2946 2.6981 11.5361 | checkpoint True False False True | ce_loss 1.5684 | sae_losses 0.0351 0.0000 0.2866 0.0079 2.5856 0.1126 11.2079 0.3282 | ce_loss_increases 0.0003 0.0153 0.1173 0.1472 | compound_ce_loss_increase 0.2594 | l0s 19.9999 19.9508 20.0000 19.9943 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0351 0.0000 0.2866 0.0079 2.5856 0.1126 11.2079 0.3282
type eval | step 8500 | loss 0.0344 0.2940 2.6940 11.5296 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0344 0.0000 0.2860 0.0079 2.5815 0.1125 11.2023 0.3273 | ce_loss_increases 0.0002 0.0150 0.1170 0.1461 | compound_ce_loss_increase 0.2612 | l0s 19.9997 19.9574 20.0000 19.9941 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0344 0.0000 0.2860 0.0079 2.5815 0.1125 11.2023 0.3273
type eval | step 8750 | loss 0.0343 0.2929 2.6928 11.5313 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0343 0.0000 0.2850 0.0079 2.5803 0.1125 11.2052 0.3260 | ce_loss_increases 0.0002 0.0143 0.1172 0.1451 | compound_ce_loss_increase 0.2631 | l0s 19.9998 19.9606 20.0000 19.9948 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0343 0.0000 0.2850 0.0079 2.5803 0.1125 11.2052 0.3260
type eval | step 9000 | loss 0.0337 0.2924 2.6835 11.5085 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0337 0.0000 0.2845 0.0079 2.5713 0.1122 11.1840 0.3245 | ce_loss_increases 0.0003 0.0148 0.1155 0.1438 | compound_ce_loss_increase 0.2620 | l0s 19.9997 19.9546 20.0000 19.9939 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0337 0.0000 0.2845 0.0079 2.5713 0.1122 11.1840 0.3245
type eval | step 9250 | loss 0.0334 0.2903 2.6802 11.4924 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0334 0.0000 0.2824 0.0079 2.5681 0.1120 11.1692 0.3232 | ce_loss_increases 0.0002 0.0142 0.1130 0.1427 | compound_ce_loss_increase 0.2588 | l0s 19.9996 19.9595 20.0000 19.9945 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0334 0.0000 0.2824 0.0079 2.5681 0.1120 11.1692 0.3232
type eval | step 9500 | loss 0.0331 0.2883 2.6733 11.4938 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0331 0.0000 0.2804 0.0079 2.5613 0.1120 11.1720 0.3218 | ce_loss_increases 0.0003 0.0142 0.1127 0.1440 | compound_ce_loss_increase 0.2604 | l0s 19.9997 19.9580 20.0000 19.9945 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0331 0.0000 0.2804 0.0079 2.5613 0.1120 11.1720 0.3218
type eval | step 9750 | loss 0.0331 0.2871 2.6667 11.4795 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0331 0.0000 0.2792 0.0078 2.5548 0.1119 11.1593 0.3203 | ce_loss_increases 0.0004 0.0142 0.1139 0.1440 | compound_ce_loss_increase 0.2596 | l0s 19.9995 19.9551 20.0000 19.9942 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0331 0.0000 0.2792 0.0078 2.5548 0.1119 11.1593 0.3203
type eval | step 10000 | loss 0.0325 0.2854 2.6580 11.4650 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0325 0.0000 0.2776 0.0078 2.5463 0.1117 11.1461 0.3189 | ce_loss_increases 0.0003 0.0145 0.1119 0.1426 | compound_ce_loss_increase 0.2568 | l0s 19.9994 19.9575 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0325 0.0000 0.2776 0.0078 2.5463 0.1117 11.1461 0.3189
type eval | step 10250 | loss 0.0321 0.2846 2.6477 11.4338 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0321 0.0000 0.2769 0.0077 2.5364 0.1113 11.1165 0.3173 | ce_loss_increases 0.0005 0.0142 0.1120 0.1428 | compound_ce_loss_increase 0.2577 | l0s 19.9994 19.9544 20.0000 19.9947 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0321 0.0000 0.2769 0.0077 2.5364 0.1113 11.1165 0.3173
type eval | step 10500 | loss 0.0317 0.2828 2.6431 11.4236 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0317 0.0000 0.2750 0.0077 2.5320 0.1111 11.1071 0.3165 | ce_loss_increases 0.0002 0.0135 0.1123 0.1415 | compound_ce_loss_increase 0.2580 | l0s 19.9997 19.9601 19.9999 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0317 0.0000 0.2750 0.0077 2.5320 0.1111 11.1071 0.3165
type eval | step 10750 | loss 0.0320 0.2849 2.6426 11.4167 | checkpoint False False True True | ce_loss 1.5684 | sae_losses 0.0320 0.0000 0.2771 0.0078 2.5317 0.1110 11.1007 0.3159 | ce_loss_increases 0.0001 0.0140 0.1141 0.1411 | compound_ce_loss_increase 0.2571 | l0s 19.9996 19.9459 19.9999 19.9941 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0320 0.0000 0.2771 0.0078 2.5317 0.1110 11.1007 0.3159
type eval | step 11000 | loss 0.0316 0.2839 2.6446 11.4193 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0316 0.0000 0.2761 0.0077 2.5336 0.1110 11.1035 0.3158 | ce_loss_increases 0.0002 0.0135 0.1145 0.1413 | compound_ce_loss_increase 0.2568 | l0s 19.9996 19.9606 20.0000 19.9949 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0316 0.0000 0.2761 0.0077 2.5336 0.1110 11.1035 0.3158
type eval | step 11250 | loss 0.0316 0.2832 2.6440 11.4285 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0316 0.0000 0.2755 0.0077 2.5329 0.1112 11.1127 0.3159 | ce_loss_increases 0.0004 0.0145 0.1146 0.1425 | compound_ce_loss_increase 0.2587 | l0s 19.9997 19.9545 20.0000 19.9945 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0316 0.0000 0.2755 0.0077 2.5329 0.1112 11.1127 0.3159
type eval | step 11500 | loss 0.0312 0.2827 2.6400 11.4152 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0312 0.0000 0.2750 0.0077 2.5289 0.1111 11.1000 0.3153 | ce_loss_increases 0.0001 0.0140 0.1140 0.1426 | compound_ce_loss_increase 0.2548 | l0s 19.9997 19.9586 20.0000 19.9943 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0312 0.0000 0.2750 0.0077 2.5289 0.1111 11.1000 0.3153
type eval | step 11750 | loss 0.0311 0.2827 2.6368 11.4025 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0311 0.0000 0.2749 0.0077 2.5258 0.1110 11.0875 0.3151 | ce_loss_increases 0.0003 0.0143 0.1140 0.1429 | compound_ce_loss_increase 0.2552 | l0s 19.9996 19.9490 20.0000 19.9945 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0311 0.0000 0.2749 0.0077 2.5258 0.1110 11.0875 0.3151
type eval | step 12000 | loss 0.0309 0.2818 2.6379 11.3998 | checkpoint True True False True | ce_loss 1.5684 | sae_losses 0.0309 0.0000 0.2741 0.0077 2.5268 0.1111 11.0851 0.3147 | ce_loss_increases 0.0003 0.0140 0.1132 0.1445 | compound_ce_loss_increase 0.2574 | l0s 19.9997 19.9572 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0309 0.0000 0.2741 0.0077 2.5268 0.1111 11.0851 0.3147
type eval | step 12250 | loss 0.0308 0.2804 2.6307 11.3899 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0308 0.0000 0.2727 0.0077 2.5196 0.1111 11.0757 0.3141 | ce_loss_increases 0.0002 0.0148 0.1141 0.1453 | compound_ce_loss_increase 0.2568 | l0s 19.9995 19.9653 20.0000 19.9946 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0308 0.0000 0.2727 0.0077 2.5196 0.1111 11.0757 0.3141
type eval | step 12500 | loss 0.0303 0.2789 2.6269 11.3777 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0303 0.0000 0.2713 0.0077 2.5159 0.1110 11.0641 0.3136 | ce_loss_increases 0.0004 0.0149 0.1129 0.1449 | compound_ce_loss_increase 0.2554 | l0s 19.9997 19.9548 20.0000 19.9948 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0303 0.0000 0.2713 0.0077 2.5159 0.1110 11.0641 0.3136
type eval | step 12750 | loss 0.0304 0.2789 2.6196 11.3581 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.0304 0.0000 0.2713 0.0076 2.5088 0.1108 11.0451 0.3130 | ce_loss_increases 0.0001 0.0144 0.1119 0.1437 | compound_ce_loss_increase 0.2515 | l0s 19.9997 19.9567 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0304 0.0000 0.2713 0.0076 2.5088 0.1108 11.0451 0.3130
type eval | step 13000 | loss 0.0302 0.2776 2.6152 11.3418 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0302 0.0000 0.2699 0.0076 2.5046 0.1106 11.0293 0.3125 | ce_loss_increases 0.0002 0.0147 0.1117 0.1424 | compound_ce_loss_increase 0.2523 | l0s 19.9997 19.9613 20.0000 19.9949 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0302 0.0000 0.2699 0.0076 2.5046 0.1106 11.0293 0.3125
type eval | step 13250 | loss 0.0302 0.2777 2.6153 11.3350 | checkpoint False False False True | ce_loss 1.5684 | sae_losses 0.0302 0.0000 0.2701 0.0076 2.5047 0.1106 11.0228 0.3123 | ce_loss_increases 0.0003 0.0143 0.1129 0.1426 | compound_ce_loss_increase 0.2508 | l0s 19.9996 19.9638 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0302 0.0000 0.2701 0.0076 2.5047 0.1106 11.0228 0.3123
type eval | step 13500 | loss 0.0302 0.2781 2.6182 11.3345 | checkpoint True False False True | ce_loss 1.5684 | sae_losses 0.0301 0.0000 0.2705 0.0076 2.5076 0.1106 11.0223 0.3122 | ce_loss_increases 0.0004 0.0142 0.1129 0.1425 | compound_ce_loss_increase 0.2487 | l0s 19.9998 19.9605 20.0000 19.9956 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0301 0.0000 0.2705 0.0076 2.5076 0.1106 11.0223 0.3122
type eval | step 13750 | loss 0.0300 0.2784 2.6188 11.3409 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0300 0.0000 0.2708 0.0076 2.5081 0.1107 11.0287 0.3122 | ce_loss_increases 0.0003 0.0143 0.1138 0.1432 | compound_ce_loss_increase 0.2494 | l0s 19.9995 19.9538 20.0000 19.9948 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0300 0.0000 0.2708 0.0076 2.5081 0.1107 11.0287 0.3122
type eval | step 14000 | loss 0.0298 0.2770 2.6165 11.3358 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.0298 0.0000 0.2693 0.0076 2.5058 0.1107 11.0237 0.3121 | ce_loss_increases 0.0002 0.0141 0.1133 0.1434 | compound_ce_loss_increase 0.2497 | l0s 19.9997 19.9587 20.0000 19.9955 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0298 0.0000 0.2693 0.0076 2.5058 0.1107 11.0237 0.3121
type eval | step 14250 | loss 0.0296 0.2773 2.6159 11.3253 | checkpoint True False False True | ce_loss 1.5684 | sae_losses 0.0296 0.0000 0.2697 0.0076 2.5052 0.1107 11.0134 0.3119 | ce_loss_increases 0.0003 0.0142 0.1121 0.1434 | compound_ce_loss_increase 0.2486 | l0s 19.9997 19.9540 20.0000 19.9955 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0296 0.0000 0.2697 0.0076 2.5052 0.1107 11.0134 0.3119
type eval | step 14500 | loss 0.0296 0.2774 2.6160 11.3216 | checkpoint False False False True | ce_loss 1.5684 | sae_losses 0.0296 0.0000 0.2697 0.0076 2.5053 0.1107 11.0097 0.3118 | ce_loss_increases 0.0004 0.0142 0.1126 0.1434 | compound_ce_loss_increase 0.2488 | l0s 19.9996 19.9544 20.0000 19.9957 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0296 0.0000 0.2697 0.0076 2.5053 0.1107 11.0097 0.3118
type eval | step 14750 | loss 0.0296 0.2761 2.6123 11.3168 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0296 0.0000 0.2685 0.0076 2.5015 0.1108 11.0051 0.3116 | ce_loss_increases 0.0003 0.0142 0.1120 0.1436 | compound_ce_loss_increase 0.2486 | l0s 19.9995 19.9621 20.0000 19.9953 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0296 0.0000 0.2685 0.0076 2.5015 0.1108 11.0051 0.3116
type eval | step 15000 | loss 0.0293 0.2750 2.6105 11.3114 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0293 0.0000 0.2674 0.0076 2.4997 0.1108 11.0000 0.3113 | ce_loss_increases 0.0003 0.0145 0.1123 0.1434 | compound_ce_loss_increase 0.2480 | l0s 19.9997 19.9577 20.0000 19.9952 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0293 0.0000 0.2674 0.0076 2.4997 0.1108 11.0000 0.3113
type eval | step 15250 | loss 0.0292 0.2745 2.6076 11.3017 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0292 0.0000 0.2669 0.0076 2.4969 0.1107 10.9906 0.3110 | ce_loss_increases 0.0003 0.0143 0.1114 0.1431 | compound_ce_loss_increase 0.2488 | l0s 19.9997 19.9638 20.0000 19.9953 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0292 0.0000 0.2669 0.0076 2.4969 0.1107 10.9906 0.3110
type eval | step 15500 | loss 0.0292 0.2742 2.6041 11.2883 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0292 0.0000 0.2666 0.0076 2.4936 0.1105 10.9776 0.3107 | ce_loss_increases 0.0002 0.0142 0.1111 0.1425 | compound_ce_loss_increase 0.2482 | l0s 19.9997 19.9548 20.0000 19.9948 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0292 0.0000 0.2666 0.0076 2.4936 0.1105 10.9776 0.3107
type eval | step 15750 | loss 0.0292 0.2738 2.6034 11.2845 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0292 0.0000 0.2663 0.0076 2.4930 0.1105 10.9738 0.3107 | ce_loss_increases 0.0003 0.0142 0.1121 0.1420 | compound_ce_loss_increase 0.2478 | l0s 19.9996 19.9526 20.0000 19.9954 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0292 0.0000 0.2663 0.0076 2.4930 0.1105 10.9738 0.3107
type eval | step 16000 | loss 0.0291 0.2745 2.6044 11.2835 | checkpoint True False False True | ce_loss 1.5684 | sae_losses 0.0291 0.0000 0.2669 0.0076 2.4939 0.1105 10.9730 0.3105 | ce_loss_increases 0.0003 0.0142 0.1124 0.1413 | compound_ce_loss_increase 0.2472 | l0s 19.9997 19.9466 20.0000 19.9954 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0291 0.0000 0.2669 0.0076 2.4939 0.1105 10.9730 0.3105
type eval | step 16250 | loss 0.0291 0.2744 2.6064 11.2875 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0291 0.0000 0.2668 0.0076 2.4960 0.1105 10.9769 0.3107 | ce_loss_increases 0.0004 0.0142 0.1128 0.1418 | compound_ce_loss_increase 0.2487 | l0s 19.9996 19.9583 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0291 0.0000 0.2668 0.0076 2.4960 0.1105 10.9769 0.3107
type eval | step 16500 | loss 0.0289 0.2739 2.6049 11.2890 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0289 0.0000 0.2663 0.0076 2.4944 0.1105 10.9783 0.3107 | ce_loss_increases 0.0002 0.0140 0.1129 0.1424 | compound_ce_loss_increase 0.2484 | l0s 19.9998 19.9605 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0289 0.0000 0.2663 0.0076 2.4944 0.1105 10.9783 0.3107
type eval | step 16750 | loss 0.0288 0.2741 2.6041 11.2843 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0287 0.0000 0.2665 0.0076 2.4937 0.1105 10.9738 0.3105 | ce_loss_increases 0.0003 0.0138 0.1130 0.1431 | compound_ce_loss_increase 0.2490 | l0s 19.9998 19.9562 20.0000 19.9953 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0287 0.0000 0.2665 0.0076 2.4937 0.1105 10.9738 0.3105
type eval | step 17000 | loss 0.0290 0.2739 2.6043 11.2862 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0290 0.0000 0.2663 0.0076 2.4939 0.1104 10.9756 0.3106 | ce_loss_increases 0.0002 0.0140 0.1132 0.1434 | compound_ce_loss_increase 0.2496 | l0s 19.9997 19.9550 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0290 0.0000 0.2663 0.0076 2.4939 0.1104 10.9756 0.3106
type eval | step 17250 | loss 0.0287 0.2730 2.6034 11.2847 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0287 0.0000 0.2655 0.0076 2.4929 0.1105 10.9742 0.3105 | ce_loss_increases 0.0004 0.0139 0.1130 0.1438 | compound_ce_loss_increase 0.2487 | l0s 19.9997 19.9608 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0287 0.0000 0.2655 0.0076 2.4929 0.1105 10.9742 0.3105
type eval | step 17500 | loss 0.0287 0.2728 2.6017 11.2808 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0287 0.0000 0.2652 0.0076 2.4912 0.1105 10.9703 0.3104 | ce_loss_increases 0.0002 0.0141 0.1133 0.1438 | compound_ce_loss_increase 0.2483 | l0s 19.9995 19.9548 20.0000 19.9949 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0287 0.0000 0.2652 0.0076 2.4912 0.1105 10.9703 0.3104
type eval | step 17750 | loss 0.0286 0.2721 2.5997 11.2786 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0286 0.0000 0.2645 0.0075 2.4893 0.1104 10.9683 0.3103 | ce_loss_increases 0.0002 0.0141 0.1135 0.1445 | compound_ce_loss_increase 0.2478 | l0s 19.9997 19.9612 20.0000 19.9954 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0286 0.0000 0.2645 0.0075 2.4893 0.1104 10.9683 0.3103
type eval | step 18000 | loss 0.0284 0.2721 2.5968 11.2711 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0284 0.0000 0.2645 0.0075 2.4864 0.1103 10.9612 0.3099 | ce_loss_increases 0.0002 0.0144 0.1138 0.1442 | compound_ce_loss_increase 0.2470 | l0s 19.9997 19.9592 20.0000 19.9949 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0284 0.0000 0.2645 0.0075 2.4864 0.1103 10.9612 0.3099
type eval | step 18250 | loss 0.0284 0.2716 2.5952 11.2688 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.0284 0.0000 0.2641 0.0075 2.4849 0.1103 10.9590 0.3099 | ce_loss_increases 0.0003 0.0143 0.1136 0.1441 | compound_ce_loss_increase 0.2479 | l0s 19.9996 19.9575 20.0000 19.9954 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0284 0.0000 0.2641 0.0075 2.4849 0.1103 10.9590 0.3099
type eval | step 18500 | loss 0.0286 0.2721 2.5951 11.2679 | checkpoint False False True True | ce_loss 1.5684 | sae_losses 0.0286 0.0000 0.2646 0.0075 2.4848 0.1103 10.9580 0.3099 | ce_loss_increases 0.0003 0.0144 0.1137 0.1444 | compound_ce_loss_increase 0.2464 | l0s 19.9996 19.9560 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0286 0.0000 0.2646 0.0075 2.4848 0.1103 10.9580 0.3099
type eval | step 18750 | loss 0.0284 0.2724 2.5961 11.2700 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0284 0.0000 0.2649 0.0075 2.4858 0.1103 10.9601 0.3099 | ce_loss_increases 0.0002 0.0142 0.1137 0.1441 | compound_ce_loss_increase 0.2495 | l0s 19.9996 19.9553 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0284 0.0000 0.2649 0.0075 2.4858 0.1103 10.9601 0.3099
type eval | step 19000 | loss 0.0284 0.2721 2.5970 11.2731 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0284 0.0000 0.2646 0.0076 2.4867 0.1103 10.9630 0.3101 | ce_loss_increases 0.0003 0.0142 0.1139 0.1444 | compound_ce_loss_increase 0.2495 | l0s 19.9997 19.9629 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0284 0.0000 0.2646 0.0076 2.4867 0.1103 10.9630 0.3101
type eval | step 19250 | loss 0.0283 0.2720 2.5953 11.2703 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0283 0.0000 0.2645 0.0075 2.4851 0.1103 10.9603 0.3100 | ce_loss_increases 0.0001 0.0142 0.1143 0.1448 | compound_ce_loss_increase 0.2483 | l0s 19.9998 19.9607 20.0000 19.9947 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0283 0.0000 0.2645 0.0075 2.4851 0.1103 10.9603 0.3100
type eval | step 19500 | loss 0.0283 0.2723 2.5959 11.2716 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0283 0.0000 0.2648 0.0075 2.4857 0.1103 10.9616 0.3100 | ce_loss_increases 0.0003 0.0141 0.1144 0.1451 | compound_ce_loss_increase 0.2467 | l0s 19.9996 19.9541 20.0000 19.9953 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0283 0.0000 0.2648 0.0075 2.4857 0.1103 10.9616 0.3100
type eval | step 19750 | loss 0.0282 0.2718 2.5957 11.2713 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0282 0.0000 0.2643 0.0075 2.4855 0.1103 10.9613 0.3100 | ce_loss_increases 0.0001 0.0140 0.1142 0.1447 | compound_ce_loss_increase 0.2510 | l0s 19.9996 19.9593 20.0000 19.9950 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0282 0.0000 0.2643 0.0075 2.4855 0.1103 10.9613 0.3100
type eval | step 20000 | loss 0.0282 0.2714 2.5933 11.2718 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0281 0.0000 0.2639 0.0075 2.4830 0.1103 10.9618 0.3100 | ce_loss_increases 0.0002 0.0139 0.1139 0.1451 | compound_ce_loss_increase 0.2485 | l0s 19.9996 19.9581 20.0000 19.9951 20.0000 20.0000 20.0000 20.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0281 0.0000 0.2639 0.0075 2.4830 0.1103 10.9618 0.3100