type eval | step 0 | loss 186.7037 0.4260 303.2749 3.9970 601.4265 9.9644 760.2809 46.4154 | checkpoint False | ce_loss 1.5684 | sae_losses 186.7037 0.4260 303.2749 3.9970 601.4265 9.9644 760.2809 46.4154 | ce_loss_increases 0.7148 1.9479 2.6442 2.0959 2.4079 1.6917 2.1049 0.8262 | compound_ce_loss_increase 4.1068 | l0s 10.1670 10.1296 10.1281 10.1417 10.1658 10.1400 10.1650 10.1397 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 3.4815 10.4970 13.6282 23.6119 type eval | step 250 | loss 26.4622 0.0208 53.9500 0.6287 127.3772 2.1668 180.6020 7.9516 | checkpoint True True True True True True True True True True True True | ce_loss 1.5684 | sae_losses 26.4622 0.0208 53.9500 0.6287 127.3772 2.1668 180.6020 7.9516 | ce_loss_increases 0.1328 0.1190 1.3135 1.0812 1.7040 1.1090 1.6795 0.6814 | compound_ce_loss_increase 3.8962 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 1.5389 5.6679 8.7651 12.7539 type eval | step 500 | loss 1.5443 0.0254 7.3624 0.4568 21.6177 0.9417 41.3206 2.5722 | checkpoint True False True True True True True True True True True True | ce_loss 1.5684 | sae_losses 1.5443 0.0254 7.3624 0.4568 21.6177 0.9417 41.3206 2.5722 | ce_loss_increases 0.0081 0.0833 0.2613 0.9822 0.4802 0.7704 0.5581 0.5819 | compound_ce_loss_increase 2.7611 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.3132 1.3012 2.2327 3.3566 type eval | step 750 | loss 0.2155 0.0300 2.5971 0.5099 9.7592 1.2141 27.3213 2.2635 | checkpoint True False True False True False True True True True True True | ce_loss 1.5684 | sae_losses 0.2155 0.0300 2.5971 0.5099 9.7592 1.2141 27.3213 2.2635 | ce_loss_increases 0.0006 0.0974 0.0843 1.0012 0.1864 0.9562 0.3391 0.4942 | compound_ce_loss_increase 2.1277 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0976 0.2731 0.2338 0.7645 type eval | step 1000 | loss 0.1515 0.0301 1.7643 0.5541 6.9810 1.8683 23.8859 2.5345 | checkpoint True False True False True False True False True True True True | ce_loss 1.5684 | sae_losses 0.1515 0.0301 1.7643 0.5541 6.9810 1.8683 23.8859 2.5345 | ce_loss_increases 0.0001 0.0886 0.0516 1.1103 0.1241 1.4251 0.2805 0.5521 | compound_ce_loss_increase 2.6950 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0392 0.1315 0.0549 0.3130 type eval | step 1250 | loss 0.1312 0.0315 1.4430 0.5792 6.1282 1.9692 22.5218 3.2806 | checkpoint True False True False True False True False True True True True | ce_loss 1.5684 | sae_losses 0.1312 0.0315 1.4430 0.5792 6.1282 1.9692 22.5218 3.2806 | ce_loss_increases 0.0001 0.0922 0.0429 1.1077 0.1005 1.4609 0.2648 0.7675 | compound_ce_loss_increase 2.9633 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0197 0.1117 0.0229 0.2752 type eval | step 1500 | loss 0.1214 0.0291 1.2905 0.6412 5.7403 1.9101 21.7178 3.3920 | checkpoint True False True False True False True False True False False True | ce_loss 1.5684 | sae_losses 0.1214 0.0291 1.2905 0.6412 5.7403 1.9101 21.7178 3.3920 | ce_loss_increases 0.0002 0.0905 0.0378 1.3075 0.0936 1.4786 0.2485 0.8017 | compound_ce_loss_increase 3.0462 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0001 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0180 0.1146 0.0332 0.2660 type eval | step 1750 | loss 0.1147 0.0326 1.1949 0.5891 5.5082 2.0959 21.2350 3.2879 | checkpoint True False True False True False True False True True True False | ce_loss 1.5684 | sae_losses 0.1147 0.0326 1.1949 0.5891 5.5082 2.0959 21.2350 3.2879 | ce_loss_increases -0.0001 0.0897 0.0325 1.1468 0.0880 1.6123 0.2392 0.7238 | compound_ce_loss_increase 3.1586 | l0s 10.0000 10.0000 10.0000 10.0000 9.9998 10.0001 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0179 0.1067 0.0074 0.2809 type eval | step 2000 | loss 0.1065 0.0316 1.1042 0.6104 5.3381 2.1313 20.8961 3.2250 | checkpoint True False True False True False True False True True True False | ce_loss 1.5684 | sae_losses 0.1065 0.0316 1.1042 0.6104 5.3381 2.1313 20.8961 3.2250 | ce_loss_increases 0.0002 0.0855 0.0285 1.2082 0.0855 1.6276 0.2371 0.7223 | compound_ce_loss_increase 3.1104 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0004 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0120 0.0846 0.0065 0.3279 type eval | step 2250 | loss 0.1018 0.0314 1.0465 0.6368 5.2129 2.1812 20.6268 3.2293 | checkpoint True False True False True False True False False True False False | ce_loss 1.5684 | sae_losses 0.1018 0.0314 1.0465 0.6368 5.2129 2.1812 20.6268 3.2293 | ce_loss_increases 0.0003 0.0936 0.0257 1.1791 0.0834 1.6374 0.2380 0.8110 | compound_ce_loss_increase 3.3403 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0374 0.0594 0.0107 0.3118 type eval | step 2500 | loss 0.0999 0.0319 1.0165 0.6323 5.1243 2.1536 20.4373 3.1601 | checkpoint True False True False True False True False False False False False | ce_loss 1.5684 | sae_losses 0.0999 0.0319 1.0165 0.6323 5.1243 2.1536 20.4373 3.1601 | ce_loss_increases 0.0002 0.0890 0.0245 1.1967 0.0814 1.6319 0.2355 0.7497 | compound_ce_loss_increase 3.4887 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0002 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0186 0.1073 0.0092 0.3257 type eval | step 2750 | loss 0.0973 0.0322 0.9823 0.6098 5.0421 2.1779 20.2298 3.1969 | checkpoint True False True False True False True False False False False False | ce_loss 1.5684 | sae_losses 0.0973 0.0322 0.9823 0.6098 5.0421 2.1779 20.2298 3.1969 | ce_loss_increases 0.0003 0.0924 0.0236 1.1375 0.0804 1.6371 0.2307 0.7411 | compound_ce_loss_increase 3.5315 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0175 0.0819 0.0088 0.2786 type eval | step 3000 | loss 0.0946 0.0304 0.9625 0.6016 4.9956 2.0750 20.1455 3.2759 | checkpoint True False True False True False True False False True False False | ce_loss 1.5684 | sae_losses 0.0946 0.0304 0.9625 0.6016 4.9956 2.0750 20.1455 3.2759 | ce_loss_increases 0.0001 0.0846 0.0225 1.1901 0.0783 1.5736 0.2285 0.8048 | compound_ce_loss_increase 3.2392 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0126 0.0556 0.0200 0.2913 type eval | step 3250 | loss 0.0934 0.0298 0.9555 0.6129 4.9642 2.1573 20.0953 3.1742 | checkpoint True False True False True False True False False True False False | ce_loss 1.5684 | sae_losses 0.0934 0.0298 0.9555 0.6129 4.9642 2.1573 20.0953 3.1742 | ce_loss_increases 0.0001 0.0815 0.0218 1.2017 0.0791 1.6309 0.2279 0.7517 | compound_ce_loss_increase 3.6883 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0146 0.0513 0.0159 0.2891 type eval | step 3500 | loss 0.0925 0.0296 0.9481 0.6402 4.9363 2.1209 20.0595 3.1560 | checkpoint True False True False True False True False False True False True | ce_loss 1.5684 | sae_losses 0.0925 0.0296 0.9481 0.6402 4.9363 2.1209 20.0595 3.1560 | ce_loss_increases 0.0001 0.0797 0.0211 1.2969 0.0761 1.5599 0.2323 0.7599 | compound_ce_loss_increase 3.6287 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0167 0.0396 0.0324 0.2503 type eval | step 3750 | loss 0.0906 0.0293 0.9384 0.6391 4.9091 2.1195 20.0307 3.2306 | checkpoint True False True False True False True False False False False True | ce_loss 1.5684 | sae_losses 0.0906 0.0293 0.9384 0.6391 4.9091 2.1195 20.0307 3.2306 | ce_loss_increases 0.0002 0.0783 0.0213 1.2223 0.0737 1.5775 0.2340 0.7655 | compound_ce_loss_increase 3.6787 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0141 0.0600 0.0684 0.2389 type eval | step 4000 | loss 0.0898 0.0304 0.9307 0.7023 4.8824 2.1068 20.0002 3.1901 | checkpoint True False True False True False True False True False False True | ce_loss 1.5684 | sae_losses 0.0898 0.0304 0.9307 0.7023 4.8824 2.1068 20.0002 3.1901 | ce_loss_increases 0.0002 0.0842 0.0217 1.3783 0.0733 1.5954 0.2360 0.7605 | compound_ce_loss_increase 3.5511 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0116 0.0549 0.0385 0.2182 type eval | step 4250 | loss 0.0905 0.0293 0.9315 0.6565 4.8655 2.0264 20.0216 3.1195 | checkpoint False False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0905 0.0293 0.9315 0.6565 4.8655 2.0264 20.0216 3.1195 | ce_loss_increases 0.0000 0.0803 0.0215 1.2905 0.0725 1.4871 0.2390 0.7379 | compound_ce_loss_increase 3.3107 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0101 0.0722 0.0717 0.2395 type eval | step 4500 | loss 0.0872 0.0278 0.9266 0.5696 4.8504 2.0683 20.0486 3.1663 | checkpoint True False True False True False False False True False False True | ce_loss 1.5684 | sae_losses 0.0872 0.0278 0.9266 0.5696 4.8504 2.0683 20.0486 3.1663 | ce_loss_increases 0.0001 0.0760 0.0219 1.1304 0.0712 1.5414 0.2418 0.7402 | compound_ce_loss_increase 3.3011 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0091 0.0969 0.0787 0.2141 type eval | step 4750 | loss 0.0853 0.0305 0.9286 0.5278 4.8346 2.0167 20.0676 3.1063 | checkpoint True False False False True False False False True False False True | ce_loss 1.5684 | sae_losses 0.0853 0.0305 0.9286 0.5278 4.8346 2.0167 20.0676 3.1063 | ce_loss_increases 0.0001 0.0814 0.0217 1.0829 0.0706 1.4510 0.2424 0.7378 | compound_ce_loss_increase 3.0076 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0087 0.0695 0.0897 0.2101 type eval | step 5000 | loss 0.0837 0.0299 0.9220 0.5297 4.8245 2.0118 20.0920 3.1428 | checkpoint True False True False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0837 0.0299 0.9220 0.5297 4.8245 2.0118 20.0920 3.1428 | ce_loss_increases 0.0001 0.0773 0.0208 1.0836 0.0707 1.5145 0.2433 0.7220 | compound_ce_loss_increase 3.0701 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0082 0.0478 0.0821 0.2192 type eval | step 5250 | loss 0.0825 0.0309 0.9170 0.5252 4.8084 1.9976 20.0971 3.2392 | checkpoint True False True False True False False False True False False True | ce_loss 1.5684 | sae_losses 0.0825 0.0309 0.9170 0.5252 4.8084 1.9976 20.0971 3.2392 | ce_loss_increases 0.0000 0.0833 0.0202 1.0732 0.0706 1.4893 0.2434 0.7942 | compound_ce_loss_increase 3.0120 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0079 0.0504 0.0155 0.1980 type eval | step 5500 | loss 0.0816 0.0299 0.9151 0.5458 4.8018 2.0162 20.1308 3.3812 | checkpoint True False True False True False False False True False False True | ce_loss 1.5684 | sae_losses 0.0816 0.0299 0.9151 0.5458 4.8018 2.0162 20.1308 3.3812 | ce_loss_increases -0.0000 0.0829 0.0203 1.0744 0.0703 1.4748 0.2434 0.8689 | compound_ce_loss_increase 3.2884 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0001 10.0000 10.0002 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0075 0.0412 0.0215 0.1919 type eval | step 5750 | loss 0.0816 0.0286 0.9135 0.5543 4.7947 1.9812 20.1680 3.2350 | checkpoint True False True False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0816 0.0286 0.9135 0.5543 4.7947 1.9812 20.1680 3.2350 | ce_loss_increases -0.0001 0.0775 0.0193 1.1095 0.0703 1.4614 0.2445 0.7320 | compound_ce_loss_increase 3.0873 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0002 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0073 0.0446 0.0170 0.2096 type eval | step 6000 | loss 0.0812 0.0288 0.9179 0.5636 4.7925 2.0075 20.2073 3.4099 | checkpoint True False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0812 0.0288 0.9179 0.5636 4.7925 2.0075 20.2073 3.4099 | ce_loss_increases -0.0002 0.0758 0.0193 1.1228 0.0703 1.5294 0.2454 0.8376 | compound_ce_loss_increase 3.4978 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0072 0.0463 0.0135 0.2259 type eval | step 6250 | loss 0.0803 0.0292 0.9230 0.5345 4.7937 2.0839 20.2453 3.2369 | checkpoint True False False False False False False False True False False False | ce_loss 1.5684 | sae_losses 0.0803 0.0292 0.9230 0.5345 4.7937 2.0839 20.2453 3.2369 | ce_loss_increases -0.0002 0.0784 0.0189 1.0577 0.0709 1.5621 0.2453 0.7696 | compound_ce_loss_increase 3.3863 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0071 0.0591 0.0135 0.2236 type eval | step 6500 | loss 0.0796 0.0292 0.9238 0.5240 4.7835 2.0277 20.2621 3.3598 | checkpoint True False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0796 0.0292 0.9238 0.5240 4.7835 2.0277 20.2621 3.3598 | ce_loss_increases -0.0000 0.0774 0.0187 1.0375 0.0712 1.5570 0.2448 0.8272 | compound_ce_loss_increase 3.3310 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0003 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0069 0.0504 0.0124 0.2181 type eval | step 6750 | loss 0.0794 0.0306 0.9255 0.5117 4.7791 2.0450 20.2811 3.3347 | checkpoint True False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0794 0.0306 0.9255 0.5117 4.7791 2.0450 20.2811 3.3347 | ce_loss_increases -0.0000 0.0820 0.0184 1.0127 0.0714 1.5338 0.2449 0.8205 | compound_ce_loss_increase 3.3778 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0001 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0068 0.0472 0.0129 0.2319 type eval | step 7000 | loss 0.0793 0.0300 0.9277 0.5146 4.7762 2.0556 20.3023 3.3447 | checkpoint True False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0793 0.0300 0.9277 0.5146 4.7762 2.0556 20.3023 3.3447 | ce_loss_increases 0.0000 0.0833 0.0182 0.9957 0.0717 1.5349 0.2448 0.8112 | compound_ce_loss_increase 3.1468 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0067 0.0447 0.0150 0.2250 type eval | step 7250 | loss 0.0788 0.0287 0.9285 0.5072 4.7715 2.0781 20.3171 3.3880 | checkpoint True False False False True False False False True False False False | ce_loss 1.5684 | sae_losses 0.0788 0.0287 0.9285 0.5072 4.7715 2.0781 20.3171 3.3880 | ce_loss_increases 0.0000 0.0766 0.0185 1.0171 0.0720 1.5430 0.2454 0.8140 | compound_ce_loss_increase 3.1053 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0000 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0067 0.0430 0.0135 0.2268 type eval | step 7500 | loss 0.0786 0.0296 0.9294 0.4921 4.7679 1.9907 20.3271 3.2653 | checkpoint True False False False True False False False True True False False | ce_loss 1.5684 | sae_losses 0.0786 0.0296 0.9294 0.4921 4.7679 1.9907 20.3271 3.2653 | ce_loss_increases 0.0000 0.0810 0.0185 0.9687 0.0723 1.4684 0.2454 0.7489 | compound_ce_loss_increase 2.9105 | l0s 10.0000 10.0000 10.0000 10.0000 9.9999 10.0002 10.0000 10.0001 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | ∇_l1 0.0065 0.0391 0.0179 0.2281