type eval | step 0 | loss 28.6972 29.0177 29.1788 82.9392 | checkpoint False | ce_loss 1.5683 | sae_losses 13.9328 14.7644 13.2004 15.8173 12.5000 16.6789 15.8960 67.0432 | ce_loss_increases 4.1946 3.4464 2.8160 2.2183 | compound_ce_loss_increase 4.4453 | l0s 10.1545 10.1516 10.1588 10.1452 10.1587 10.1427 10.1437 10.1436 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 13.9328 14.7644 13.2004 15.8173 12.5000 16.6789 15.8960 67.0432 type eval | step 250 | loss 3.0383 4.1871 5.8551 12.9321 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 1.4652 1.5731 1.8414 2.3457 2.3372 3.5179 3.2952 9.6369 | ce_loss_increases 4.0950 3.5147 3.1619 2.4832 | compound_ce_loss_increase 4.3378 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 1.4652 1.5731 1.8414 2.3457 2.3372 3.5179 3.2952 9.6369 type eval | step 500 | loss 0.1997 0.6729 1.2754 2.4795 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0945 0.1052 0.2869 0.3860 0.4909 0.7845 0.8261 1.6534 | ce_loss_increases 2.4035 1.9535 1.7082 0.9773 | compound_ce_loss_increase 3.4109 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0945 0.1052 0.2869 0.3860 0.4909 0.7845 0.8261 1.6534 type eval | step 750 | loss 0.0419 0.2378 0.6673 1.5919 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0208 0.0211 0.0987 0.1392 0.2332 0.4341 0.5416 1.0502 | ce_loss_increases 0.6446 0.7927 0.7988 0.5883 | compound_ce_loss_increase 1.6091 | l0s 9.9995 9.9994 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0208 0.0211 0.0987 0.1392 0.2332 0.4341 0.5416 1.0502 type eval | step 0 | loss 26.5120 27.6647 28.7519 91.0018 | checkpoint False | ce_loss 1.5683 | sae_losses 11.8438 14.6682 13.4870 14.1777 12.1134 16.6385 15.7132 75.2887 | ce_loss_increases 4.1914 3.6587 2.5967 2.1365 | compound_ce_loss_increase 4.3935 | l0s 10.1718 10.1483 10.1553 10.1446 10.1577 10.1391 10.1458 10.1461 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 11.8438 14.6682 13.4870 14.1777 12.1134 16.6385 15.7132 75.2887 type eval | step 250 | loss 2.8283 4.1033 5.7493 14.4695 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 1.3121 1.5161 1.8987 2.2046 2.2719 3.4773 3.2176 11.2519 | ce_loss_increases 4.3510 3.5611 3.0667 2.3845 | compound_ce_loss_increase 4.0992 | l0s 10.0001 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 1.3121 1.5161 1.8987 2.2046 2.2719 3.4773 3.2176 11.2519 type eval | step 500 | loss 0.1913 0.6802 1.2632 2.4904 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0903 0.1010 0.2992 0.3810 0.4862 0.7770 0.8075 1.6830 | ce_loss_increases 2.3991 1.9587 1.6626 1.0082 | compound_ce_loss_increase 3.4080 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0903 0.1010 0.2992 0.3810 0.4862 0.7770 0.8075 1.6830 type eval | step 750 | loss 0.0404 0.2408 0.6674 1.6006 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0196 0.0208 0.0968 0.1440 0.2308 0.4366 0.5359 1.0646 | ce_loss_increases 0.6612 0.7707 0.8278 0.6012 | compound_ce_loss_increase 1.6411 | l0s 9.9998 9.9983 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0196 0.0208 0.0968 0.1440 0.2308 0.4366 0.5359 1.0646 type eval | step 1000 | loss 0.0279 0.1540 0.5093 1.3893 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0130 0.0149 0.0594 0.0946 0.1616 0.3477 0.4654 0.9239 | ce_loss_increases 0.3510 0.3905 0.5167 0.4942 | compound_ce_loss_increase 1.1273 | l0s 9.9995 9.9979 9.9999 9.9999 9.9999 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0130 0.0149 0.0594 0.0946 0.1616 0.3477 0.4654 0.9239 type eval | step 1250 | loss 0.0229 0.1226 0.4580 1.3054 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0105 0.0125 0.0462 0.0764 0.1392 0.3188 0.4375 0.8680 | ce_loss_increases 0.2445 0.2506 0.4311 0.4531 | compound_ce_loss_increase 0.9233 | l0s 9.9997 9.9974 9.9997 9.9997 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0105 0.0125 0.0462 0.0764 0.1392 0.3188 0.4375 0.8680 type eval | step 1500 | loss 0.0205 0.1088 0.4329 1.2618 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0092 0.0113 0.0405 0.0683 0.1290 0.3040 0.4224 0.8394 | ce_loss_increases 0.1934 0.1929 0.3988 0.4347 | compound_ce_loss_increase 0.8637 | l0s 9.9997 9.9981 9.9997 9.9997 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0092 0.0113 0.0405 0.0683 0.1290 0.3040 0.4224 0.8394 type eval | step 1750 | loss 0.0189 0.1027 0.4196 1.2354 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0085 0.0104 0.0378 0.0649 0.1236 0.2960 0.4134 0.8220 | ce_loss_increases 0.1640 0.1715 0.3742 0.4280 | compound_ce_loss_increase 0.8362 | l0s 9.9996 9.9974 9.9996 9.9997 9.9999 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0085 0.0104 0.0378 0.0649 0.1236 0.2960 0.4134 0.8220 type eval | step 2000 | loss 0.0175 0.0976 0.4102 1.2168 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0078 0.0096 0.0357 0.0619 0.1197 0.2905 0.4069 0.8099 | ce_loss_increases 0.1358 0.1538 0.3625 0.4122 | compound_ce_loss_increase 0.8105 | l0s 9.9995 9.9986 9.9996 9.9997 9.9999 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0078 0.0096 0.0357 0.0619 0.1197 0.2905 0.4069 0.8099 type eval | step 2250 | loss 0.0163 0.0943 0.4034 1.2028 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0072 0.0091 0.0344 0.0599 0.1172 0.2862 0.4021 0.8007 | ce_loss_increases 0.1215 0.1468 0.3562 0.3974 | compound_ce_loss_increase 0.7693 | l0s 9.9998 9.9989 9.9997 9.9997 9.9998 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0072 0.0091 0.0344 0.0599 0.1172 0.2862 0.4021 0.8007 type eval | step 2500 | loss 0.0155 0.0922 0.3985 1.1917 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0068 0.0087 0.0337 0.0585 0.1154 0.2831 0.3975 0.7942 | ce_loss_increases 0.1040 0.1372 0.3493 0.3897 | compound_ce_loss_increase 0.7463 | l0s 9.9998 9.9991 9.9998 9.9996 9.9998 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0068 0.0087 0.0337 0.0585 0.1154 0.2831 0.3975 0.7942 type eval | step 2750 | loss 0.0147 0.0905 0.3933 1.1814 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0064 0.0083 0.0329 0.0576 0.1134 0.2798 0.3937 0.7877 | ce_loss_increases 0.0957 0.1312 0.3450 0.3904 | compound_ce_loss_increase 0.7487 | l0s 9.9998 9.9994 9.9998 9.9996 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0064 0.0083 0.0329 0.0576 0.1134 0.2798 0.3937 0.7877 type eval | step 3000 | loss 0.0140 0.0893 0.3908 1.1764 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0061 0.0079 0.0325 0.0568 0.1125 0.2783 0.3919 0.7845 | ce_loss_increases 0.0802 0.1288 0.3490 0.3968 | compound_ce_loss_increase 0.7496 | l0s 9.9997 9.9995 9.9998 9.9995 9.9998 9.9998 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0061 0.0079 0.0325 0.0568 0.1125 0.2783 0.3919 0.7845 type eval | step 3250 | loss 0.0136 0.0886 0.3896 1.1724 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0059 0.0077 0.0321 0.0564 0.1122 0.2773 0.3909 0.7815 | ce_loss_increases 0.0755 0.1294 0.3493 0.3982 | compound_ce_loss_increase 0.7294 | l0s 9.9997 9.9995 9.9997 9.9994 9.9999 9.9999 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0059 0.0077 0.0321 0.0564 0.1122 0.2773 0.3909 0.7815 type eval | step 3500 | loss 0.0132 0.0879 0.3889 1.1721 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0057 0.0075 0.0320 0.0559 0.1119 0.2769 0.3912 0.7809 | ce_loss_increases 0.0680 0.1278 0.3493 0.4014 | compound_ce_loss_increase 0.7215 | l0s 9.9998 9.9996 9.9998 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0057 0.0075 0.0320 0.0559 0.1119 0.2769 0.3912 0.7809 type eval | step 3750 | loss 0.0127 0.0872 0.3878 1.1701 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0055 0.0073 0.0318 0.0554 0.1116 0.2762 0.3909 0.7792 | ce_loss_increases 0.0630 0.1263 0.3464 0.4049 | compound_ce_loss_increase 0.7187 | l0s 9.9996 9.9996 9.9998 9.9993 9.9998 9.9998 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0055 0.0073 0.0318 0.0554 0.1116 0.2762 0.3909 0.7792 type eval | step 4000 | loss 0.0124 0.0865 0.3858 1.1659 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0053 0.0071 0.0317 0.0548 0.1110 0.2748 0.3896 0.7764 | ce_loss_increases 0.0584 0.1226 0.3396 0.4058 | compound_ce_loss_increase 0.7110 | l0s 9.9998 9.9997 9.9998 9.9994 9.9998 9.9998 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0053 0.0071 0.0317 0.0548 0.1110 0.2748 0.3896 0.7764 type eval | step 4250 | loss 0.0121 0.0861 0.3850 1.1631 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0052 0.0069 0.0315 0.0546 0.1107 0.2743 0.3889 0.7743 | ce_loss_increases 0.0534 0.1215 0.3408 0.4077 | compound_ce_loss_increase 0.7034 | l0s 9.9997 9.9997 9.9998 9.9994 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0052 0.0069 0.0315 0.0546 0.1107 0.2743 0.3889 0.7743 type eval | step 4500 | loss 0.0118 0.0852 0.3843 1.1615 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0051 0.0067 0.0311 0.0541 0.1104 0.2740 0.3884 0.7730 | ce_loss_increases 0.0487 0.1221 0.3469 0.4017 | compound_ce_loss_increase 0.7122 | l0s 9.9998 9.9997 9.9997 9.9996 9.9997 9.9996 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0051 0.0067 0.0311 0.0541 0.1104 0.2740 0.3884 0.7730 type eval | step 4750 | loss 0.0115 0.0847 0.3833 1.1597 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0050 0.0065 0.0309 0.0538 0.1100 0.2734 0.3876 0.7721 | ce_loss_increases 0.0457 0.1223 0.3429 0.4023 | compound_ce_loss_increase 0.6970 | l0s 9.9998 9.9998 9.9997 9.9995 9.9997 9.9996 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0050 0.0065 0.0309 0.0538 0.1100 0.2734 0.3876 0.7721 type eval | step 5000 | loss 0.0113 0.0842 0.3819 1.1570 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0049 0.0064 0.0307 0.0535 0.1095 0.2724 0.3868 0.7702 | ce_loss_increases 0.0416 0.1202 0.3354 0.3960 | compound_ce_loss_increase 0.6853 | l0s 9.9999 9.9993 9.9997 9.9995 9.9997 9.9996 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0064 0.0307 0.0535 0.1095 0.2724 0.3868 0.7702 type eval | step 5250 | loss 0.0111 0.0833 0.3796 1.1529 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0048 0.0063 0.0304 0.0530 0.1087 0.2709 0.3851 0.7678 | ce_loss_increases 0.0418 0.1141 0.3328 0.3943 | compound_ce_loss_increase 0.6876 | l0s 9.9999 9.9998 9.9997 9.9996 9.9997 9.9995 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0063 0.0304 0.0530 0.1087 0.2709 0.3851 0.7678 type eval | step 5500 | loss 0.0109 0.0831 0.3792 1.1521 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0047 0.0062 0.0303 0.0529 0.1086 0.2706 0.3847 0.7675 | ce_loss_increases 0.0374 0.1155 0.3304 0.3995 | compound_ce_loss_increase 0.6913 | l0s 9.9999 9.9999 9.9997 9.9994 9.9997 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0062 0.0303 0.0529 0.1086 0.2706 0.3847 0.7675 type eval | step 5750 | loss 0.0110 0.0833 0.3790 1.1511 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0047 0.0062 0.0304 0.0529 0.1086 0.2704 0.3843 0.7668 | ce_loss_increases 0.0370 0.1155 0.3356 0.3971 | compound_ce_loss_increase 0.6870 | l0s 9.9999 9.9998 9.9997 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0062 0.0304 0.0529 0.1086 0.2704 0.3843 0.7668 type eval | step 6000 | loss 0.0107 0.0830 0.3797 1.1516 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0046 0.0061 0.0302 0.0528 0.1088 0.2709 0.3844 0.7672 | ce_loss_increases 0.0343 0.1168 0.3349 0.3918 | compound_ce_loss_increase 0.6870 | l0s 10.0000 9.9999 9.9998 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0061 0.0302 0.0528 0.1088 0.2709 0.3844 0.7672 type eval | step 6250 | loss 0.0106 0.0829 0.3798 1.1526 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0046 0.0060 0.0302 0.0527 0.1089 0.2708 0.3847 0.7678 | ce_loss_increases 0.0369 0.1182 0.3332 0.3906 | compound_ce_loss_increase 0.6900 | l0s 9.9999 9.9999 9.9997 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0060 0.0302 0.0527 0.1089 0.2708 0.3847 0.7678 type eval | step 6500 | loss 0.0105 0.0826 0.3788 1.1506 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0045 0.0060 0.0301 0.0525 0.1086 0.2701 0.3837 0.7669 | ce_loss_increases 0.0326 0.1164 0.3303 0.3920 | compound_ce_loss_increase 0.6884 | l0s 9.9999 9.9998 9.9997 9.9996 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0060 0.0301 0.0525 0.1086 0.2701 0.3837 0.7669 type eval | step 6750 | loss 0.0104 0.0826 0.3784 1.1495 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0045 0.0059 0.0302 0.0524 0.1085 0.2699 0.3830 0.7665 | ce_loss_increases 0.0316 0.1183 0.3302 0.3913 | compound_ce_loss_increase 0.6828 | l0s 9.9999 9.9999 9.9997 9.9995 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0059 0.0302 0.0524 0.1085 0.2699 0.3830 0.7665 type eval | step 7000 | loss 0.0102 0.0821 0.3786 1.1495 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0044 0.0058 0.0299 0.0522 0.1085 0.2701 0.3829 0.7665 | ce_loss_increases 0.0306 0.1200 0.3326 0.3890 | compound_ce_loss_increase 0.6881 | l0s 9.9999 9.9999 9.9997 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0058 0.0299 0.0522 0.1085 0.2701 0.3829 0.7665 type eval | step 7250 | loss 0.0101 0.0822 0.3781 1.1491 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0044 0.0057 0.0300 0.0522 0.1084 0.2698 0.3827 0.7664 | ce_loss_increases 0.0314 0.1198 0.3316 0.3900 | compound_ce_loss_increase 0.6861 | l0s 10.0000 9.9998 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0057 0.0300 0.0522 0.1084 0.2698 0.3827 0.7664 type eval | step 7500 | loss 0.0100 0.0816 0.3775 1.1472 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0044 0.0056 0.0298 0.0519 0.1080 0.2694 0.3821 0.7651 | ce_loss_increases 0.0297 0.1163 0.3289 0.3862 | compound_ce_loss_increase 0.6724 | l0s 9.9999 9.9999 9.9997 9.9996 9.9997 9.9994 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0056 0.0298 0.0519 0.1080 0.2694 0.3821 0.7651 type eval | step 7750 | loss 0.0099 0.0812 0.3758 1.1442 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0043 0.0056 0.0296 0.0516 0.1075 0.2684 0.3810 0.7631 | ce_loss_increases 0.0281 0.1146 0.3264 0.3854 | compound_ce_loss_increase 0.6697 | l0s 9.9999 9.9999 9.9997 9.9995 9.9997 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0056 0.0296 0.0516 0.1075 0.2684 0.3810 0.7631 type eval | step 8000 | loss 0.0098 0.0812 0.3756 1.1437 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0043 0.0055 0.0296 0.0516 0.1075 0.2681 0.3808 0.7629 | ce_loss_increases 0.0294 0.1153 0.3235 0.3917 | compound_ce_loss_increase 0.6721 | l0s 10.0000 9.9999 9.9996 9.9996 9.9997 9.9994 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0055 0.0296 0.0516 0.1075 0.2681 0.3808 0.7629 type eval | step 8250 | loss 0.0099 0.0815 0.3756 1.1435 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0043 0.0056 0.0297 0.0517 0.1075 0.2682 0.3807 0.7628 | ce_loss_increases 0.0300 0.1146 0.3266 0.3943 | compound_ce_loss_increase 0.6780 | l0s 10.0000 10.0000 9.9998 9.9995 9.9997 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0056 0.0297 0.0517 0.1075 0.2682 0.3807 0.7628 type eval | step 8500 | loss 0.0097 0.0813 0.3762 1.1441 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0055 0.0296 0.0517 0.1077 0.2686 0.3809 0.7631 | ce_loss_increases 0.0271 0.1159 0.3287 0.3938 | compound_ce_loss_increase 0.6728 | l0s 9.9999 9.9999 9.9998 9.9997 9.9996 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0055 0.0296 0.0517 0.1077 0.2686 0.3809 0.7631 type eval | step 8750 | loss 0.0096 0.0813 0.3767 1.1458 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0054 0.0296 0.0517 0.1077 0.2690 0.3815 0.7642 | ce_loss_increases 0.0272 0.1187 0.3293 0.3953 | compound_ce_loss_increase 0.6723 | l0s 9.9999 9.9999 9.9998 9.9995 9.9996 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0054 0.0296 0.0517 0.1077 0.2690 0.3815 0.7642 type eval | step 9000 | loss 0.0096 0.0810 0.3761 1.1446 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0042 0.0054 0.0295 0.0515 0.1075 0.2686 0.3812 0.7634 | ce_loss_increases 0.0267 0.1178 0.3269 0.3971 | compound_ce_loss_increase 0.6779 | l0s 9.9999 9.9999 9.9998 9.9994 9.9996 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0054 0.0295 0.0515 0.1075 0.2686 0.3812 0.7634 type eval | step 9250 | loss 0.0095 0.0810 0.3758 1.1441 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0042 0.0054 0.0295 0.0515 0.1073 0.2684 0.3810 0.7631 | ce_loss_increases 0.0260 0.1160 0.3258 0.3973 | compound_ce_loss_increase 0.6751 | l0s 10.0000 9.9999 9.9998 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0054 0.0295 0.0515 0.1073 0.2684 0.3810 0.7631 type eval | step 9500 | loss 0.0094 0.0809 0.3762 1.1450 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0041 0.0053 0.0294 0.0515 0.1075 0.2687 0.3812 0.7638 | ce_loss_increases 0.0257 0.1140 0.3294 0.3982 | compound_ce_loss_increase 0.6752 | l0s 9.9999 9.9998 9.9998 9.9994 9.9997 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0053 0.0294 0.0515 0.1075 0.2687 0.3812 0.7638 type eval | step 9750 | loss 0.0094 0.0808 0.3759 1.1446 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0041 0.0053 0.0295 0.0513 0.1073 0.2686 0.3811 0.7635 | ce_loss_increases 0.0253 0.1135 0.3338 0.4025 | compound_ce_loss_increase 0.6851 | l0s 9.9999 9.9998 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0053 0.0295 0.0513 0.1073 0.2686 0.3811 0.7635 type eval | step 10000 | loss 0.0093 0.0804 0.3755 1.1435 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0041 0.0052 0.0293 0.0512 0.1072 0.2683 0.3807 0.7628 | ce_loss_increases 0.0248 0.1107 0.3322 0.4010 | compound_ce_loss_increase 0.6760 | l0s 9.9999 9.9998 9.9998 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0052 0.0293 0.0512 0.1072 0.2683 0.3807 0.7628 type eval | step 10250 | loss 0.0093 0.0802 0.3744 1.1415 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0041 0.0052 0.0291 0.0511 0.1068 0.2676 0.3800 0.7615 | ce_loss_increases 0.0252 0.1078 0.3299 0.3998 | compound_ce_loss_increase 0.6636 | l0s 9.9999 9.9998 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0052 0.0291 0.0511 0.1068 0.2676 0.3800 0.7615 type eval | step 10500 | loss 0.0092 0.0801 0.3741 1.1404 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0040 0.0052 0.0291 0.0510 0.1067 0.2675 0.3796 0.7609 | ce_loss_increases 0.0256 0.1074 0.3293 0.3998 | compound_ce_loss_increase 0.6681 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0052 0.0291 0.0510 0.1067 0.2675 0.3796 0.7609 type eval | step 10750 | loss 0.0093 0.0803 0.3742 1.1402 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0041 0.0052 0.0292 0.0511 0.1067 0.2675 0.3796 0.7606 | ce_loss_increases 0.0226 0.1082 0.3308 0.4003 | compound_ce_loss_increase 0.6783 | l0s 9.9999 10.0000 9.9997 9.9993 9.9997 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0052 0.0292 0.0511 0.1067 0.2675 0.3796 0.7606 type eval | step 11000 | loss 0.0092 0.0804 0.3747 1.1403 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0040 0.0052 0.0293 0.0511 0.1069 0.2678 0.3796 0.7607 | ce_loss_increases 0.0252 0.1072 0.3320 0.3990 | compound_ce_loss_increase 0.6718 | l0s 10.0000 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0052 0.0293 0.0511 0.1069 0.2678 0.3796 0.7607 type eval | step 11250 | loss 0.0092 0.0803 0.3753 1.1415 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0040 0.0051 0.0292 0.0512 0.1071 0.2683 0.3799 0.7616 | ce_loss_increases 0.0239 0.1091 0.3346 0.3959 | compound_ce_loss_increase 0.6761 | l0s 9.9999 9.9999 9.9998 9.9993 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0051 0.0292 0.0512 0.1071 0.2683 0.3799 0.7616 type eval | step 11500 | loss 0.0091 0.0801 0.3750 1.1412 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0040 0.0051 0.0291 0.0510 0.1070 0.2680 0.3797 0.7616 | ce_loss_increases 0.0244 0.1087 0.3333 0.3969 | compound_ce_loss_increase 0.6764 | l0s 9.9999 9.9999 9.9998 9.9994 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0051 0.0291 0.0510 0.1070 0.2680 0.3797 0.7616 type eval | step 11750 | loss 0.0091 0.0801 0.3747 1.1404 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0040 0.0051 0.0291 0.0510 0.1069 0.2678 0.3794 0.7610 | ce_loss_increases 0.0233 0.1059 0.3294 0.3967 | compound_ce_loss_increase 0.6780 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0051 0.0291 0.0510 0.1069 0.2678 0.3794 0.7610 type eval | step 12000 | loss 0.0090 0.0801 0.3748 1.1410 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0040 0.0051 0.0291 0.0510 0.1069 0.2679 0.3795 0.7616 | ce_loss_increases 0.0223 0.1038 0.3297 0.3943 | compound_ce_loss_increase 0.6756 | l0s 10.0000 9.9999 9.9998 9.9994 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0051 0.0291 0.0510 0.1069 0.2679 0.3795 0.7616 type eval | step 12250 | loss 0.0090 0.0798 0.3747 1.1408 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0040 0.0051 0.0289 0.0509 0.1067 0.2680 0.3794 0.7614 | ce_loss_increases 0.0226 0.1022 0.3324 0.3950 | compound_ce_loss_increase 0.6775 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0040 0.0051 0.0289 0.0509 0.1067 0.2680 0.3794 0.7614 type eval | step 12500 | loss 0.0089 0.0798 0.3743 1.1401 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0289 0.0508 0.1066 0.2677 0.3792 0.7609 | ce_loss_increases 0.0224 0.1020 0.3338 0.3957 | compound_ce_loss_increase 0.6743 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0289 0.0508 0.1066 0.2677 0.3792 0.7609 type eval | step 12750 | loss 0.0089 0.0795 0.3736 1.1389 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0288 0.0508 0.1064 0.2673 0.3786 0.7603 | ce_loss_increases 0.0220 0.0997 0.3302 0.3944 | compound_ce_loss_increase 0.6700 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0288 0.0508 0.1064 0.2673 0.3786 0.7603 type eval | step 13000 | loss 0.0089 0.0794 0.3731 1.1378 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0287 0.0507 0.1062 0.2669 0.3782 0.7596 | ce_loss_increases 0.0220 0.0971 0.3294 0.3946 | compound_ce_loss_increase 0.6709 | l0s 9.9999 9.9998 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0287 0.0507 0.1062 0.2669 0.3782 0.7596 type eval | step 13250 | loss 0.0089 0.0794 0.3732 1.1373 | checkpoint True False False True | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0287 0.0507 0.1062 0.2670 0.3781 0.7593 | ce_loss_increases 0.0226 0.0974 0.3292 0.3950 | compound_ce_loss_increase 0.6743 | l0s 10.0000 9.9999 9.9998 9.9995 9.9997 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0287 0.0507 0.1062 0.2670 0.3781 0.7593 type eval | step 13500 | loss 0.0089 0.0796 0.3734 1.1371 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0288 0.0508 0.1063 0.2672 0.3781 0.7590 | ce_loss_increases 0.0217 0.0962 0.3307 0.3955 | compound_ce_loss_increase 0.6708 | l0s 10.0000 9.9999 9.9998 9.9995 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0288 0.0508 0.1063 0.2672 0.3781 0.7590 type eval | step 13750 | loss 0.0088 0.0797 0.3739 1.1379 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0039 0.0050 0.0289 0.0508 0.1065 0.2674 0.3784 0.7595 | ce_loss_increases 0.0212 0.0944 0.3326 0.3970 | compound_ce_loss_increase 0.6680 | l0s 10.0000 9.9999 9.9998 9.9995 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0050 0.0289 0.0508 0.1065 0.2674 0.3784 0.7595 type eval | step 14000 | loss 0.0088 0.0794 0.3738 1.1380 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0039 0.0049 0.0287 0.0507 0.1064 0.2674 0.3784 0.7596 | ce_loss_increases 0.0214 0.0960 0.3331 0.3984 | compound_ce_loss_increase 0.6717 | l0s 9.9999 9.9999 9.9998 9.9995 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0049 0.0287 0.0507 0.1064 0.2674 0.3784 0.7596 type eval | step 14250 | loss 0.0088 0.0794 0.3734 1.1373 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0287 0.0506 0.1063 0.2671 0.3781 0.7591 | ce_loss_increases 0.0217 0.0965 0.3326 0.4002 | compound_ce_loss_increase 0.6726 | l0s 9.9999 9.9999 9.9997 9.9996 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0287 0.0506 0.1063 0.2671 0.3781 0.7591 type eval | step 14500 | loss 0.0088 0.0793 0.3734 1.1374 | checkpoint False True False False | ce_loss 1.5683 | sae_losses 0.0039 0.0049 0.0287 0.0506 0.1063 0.2671 0.3782 0.7592 | ce_loss_increases 0.0209 0.0971 0.3289 0.3998 | compound_ce_loss_increase 0.6687 | l0s 9.9999 9.9999 9.9998 9.9996 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0039 0.0049 0.0287 0.0506 0.1063 0.2671 0.3782 0.7592 type eval | step 14750 | loss 0.0087 0.0791 0.3734 1.1371 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0286 0.0505 0.1062 0.2672 0.3782 0.7590 | ce_loss_increases 0.0200 0.0948 0.3306 0.3997 | compound_ce_loss_increase 0.6758 | l0s 10.0000 9.9999 9.9998 9.9996 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0286 0.0505 0.1062 0.2672 0.3782 0.7590 type eval | step 15000 | loss 0.0087 0.0790 0.3733 1.1368 | checkpoint False True False True | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0286 0.0504 0.1061 0.2671 0.3781 0.7587 | ce_loss_increases 0.0202 0.0937 0.3321 0.3993 | compound_ce_loss_increase 0.6720 | l0s 10.0000 9.9999 9.9998 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0286 0.0504 0.1061 0.2671 0.3781 0.7587 type eval | step 15250 | loss 0.0087 0.0789 0.3730 1.1360 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0285 0.0504 0.1061 0.2669 0.3779 0.7581 | ce_loss_increases 0.0207 0.0932 0.3304 0.3988 | compound_ce_loss_increase 0.6678 | l0s 9.9999 9.9999 9.9998 9.9996 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0285 0.0504 0.1061 0.2669 0.3779 0.7581 type eval | step 15500 | loss 0.0086 0.0788 0.3724 1.1351 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0285 0.0503 0.1059 0.2666 0.3776 0.7575 | ce_loss_increases 0.0202 0.0927 0.3283 0.3995 | compound_ce_loss_increase 0.6662 | l0s 9.9999 9.9999 9.9998 9.9996 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0285 0.0503 0.1059 0.2666 0.3776 0.7575 type eval | step 15750 | loss 0.0086 0.0788 0.3725 1.1350 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0284 0.0503 0.1059 0.2666 0.3775 0.7575 | ce_loss_increases 0.0201 0.0937 0.3295 0.4003 | compound_ce_loss_increase 0.6666 | l0s 10.0000 9.9999 9.9998 9.9996 9.9997 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0284 0.0503 0.1059 0.2666 0.3775 0.7575 type eval | step 16000 | loss 0.0087 0.0788 0.3725 1.1347 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0285 0.0504 0.1059 0.2667 0.3775 0.7572 | ce_loss_increases 0.0204 0.0948 0.3298 0.4016 | compound_ce_loss_increase 0.6643 | l0s 9.9999 10.0000 9.9997 9.9996 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0285 0.0504 0.1059 0.2667 0.3775 0.7572 type eval | step 16250 | loss 0.0086 0.0790 0.3729 1.1352 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0038 0.0049 0.0286 0.0505 0.1060 0.2669 0.3778 0.7575 | ce_loss_increases 0.0205 0.0948 0.3307 0.4023 | compound_ce_loss_increase 0.6680 | l0s 10.0000 9.9999 9.9997 9.9996 9.9998 9.9998 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0049 0.0286 0.0505 0.1060 0.2669 0.3778 0.7575 type eval | step 16500 | loss 0.0086 0.0790 0.3731 1.1356 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0285 0.0505 0.1061 0.2670 0.3779 0.7577 | ce_loss_increases 0.0206 0.0950 0.3328 0.4027 | compound_ce_loss_increase 0.6716 | l0s 10.0000 9.9999 9.9998 9.9996 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0285 0.0505 0.1061 0.2670 0.3779 0.7577 type eval | step 16750 | loss 0.0086 0.0789 0.3728 1.1351 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7573 | ce_loss_increases 0.0204 0.0941 0.3336 0.4027 | compound_ce_loss_increase 0.6673 | l0s 10.0000 9.9999 9.9998 9.9996 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7573 type eval | step 17000 | loss 0.0086 0.0789 0.3728 1.1351 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7573 | ce_loss_increases 0.0199 0.0952 0.3313 0.4037 | compound_ce_loss_increase 0.6659 | l0s 10.0000 9.9999 9.9997 9.9996 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7573 type eval | step 17250 | loss 0.0086 0.0788 0.3727 1.1349 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7572 | ce_loss_increases 0.0193 0.0941 0.3316 0.4037 | compound_ce_loss_increase 0.6702 | l0s 10.0000 9.9999 9.9998 9.9995 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0285 0.0504 0.1060 0.2668 0.3778 0.7572 type eval | step 17500 | loss 0.0086 0.0787 0.3726 1.1346 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0284 0.0503 0.1059 0.2667 0.3777 0.7570 | ce_loss_increases 0.0186 0.0932 0.3331 0.4028 | compound_ce_loss_increase 0.6695 | l0s 10.0000 9.9999 9.9998 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0284 0.0503 0.1059 0.2667 0.3777 0.7570 type eval | step 17750 | loss 0.0086 0.0787 0.3725 1.1343 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0038 0.0048 0.0284 0.0503 0.1059 0.2666 0.3776 0.7567 | ce_loss_increases 0.0190 0.0923 0.3317 0.4018 | compound_ce_loss_increase 0.6684 | l0s 10.0000 9.9999 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0038 0.0048 0.0284 0.0503 0.1059 0.2666 0.3776 0.7567 type eval | step 18000 | loss 0.0085 0.0786 0.3722 1.1337 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0502 0.1058 0.2664 0.3774 0.7563 | ce_loss_increases 0.0202 0.0925 0.3315 0.4020 | compound_ce_loss_increase 0.6645 | l0s 10.0000 9.9999 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0502 0.1058 0.2664 0.3774 0.7563 type eval | step 18250 | loss 0.0085 0.0786 0.3721 1.1337 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0283 0.0502 0.1057 0.2664 0.3774 0.7563 | ce_loss_increases 0.0196 0.0920 0.3306 0.4033 | compound_ce_loss_increase 0.6659 | l0s 10.0000 10.0000 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0283 0.0502 0.1057 0.2664 0.3774 0.7563 type eval | step 18500 | loss 0.0085 0.0786 0.3722 1.1336 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0283 0.0502 0.1057 0.2664 0.3774 0.7562 | ce_loss_increases 0.0199 0.0931 0.3320 0.4029 | compound_ce_loss_increase 0.6656 | l0s 10.0000 10.0000 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0283 0.0502 0.1057 0.2664 0.3774 0.7562 type eval | step 18750 | loss 0.0085 0.0787 0.3723 1.1338 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0503 0.1058 0.2665 0.3775 0.7563 | ce_loss_increases 0.0193 0.0925 0.3322 0.4028 | compound_ce_loss_increase 0.6681 | l0s 10.0000 9.9999 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0503 0.1058 0.2665 0.3775 0.7563 type eval | step 19000 | loss 0.0085 0.0788 0.3725 1.1341 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0285 0.0503 0.1059 0.2666 0.3777 0.7565 | ce_loss_increases 0.0208 0.0927 0.3338 0.4034 | compound_ce_loss_increase 0.6680 | l0s 10.0000 9.9999 9.9998 9.9996 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0285 0.0503 0.1059 0.2666 0.3777 0.7565 type eval | step 19250 | loss 0.0085 0.0787 0.3725 1.1339 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0503 0.1059 0.2666 0.3775 0.7563 | ce_loss_increases 0.0203 0.0928 0.3334 0.4028 | compound_ce_loss_increase 0.6704 | l0s 10.0000 10.0000 9.9998 9.9995 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0503 0.1059 0.2666 0.3775 0.7563 type eval | step 19500 | loss 0.0085 0.0788 0.3723 1.1339 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0503 0.1058 0.2665 0.3775 0.7564 | ce_loss_increases 0.0192 0.0929 0.3318 0.4025 | compound_ce_loss_increase 0.6694 | l0s 10.0000 9.9999 9.9999 9.9996 9.9998 9.9998 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0503 0.1058 0.2665 0.3775 0.7564 type eval | step 19750 | loss 0.0085 0.0787 0.3724 1.1338 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0503 0.1059 0.2665 0.3776 0.7562 | ce_loss_increases 0.0198 0.0939 0.3322 0.4020 | compound_ce_loss_increase 0.6695 | l0s 10.0000 9.9999 9.9999 9.9996 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0503 0.1059 0.2665 0.3776 0.7562 type eval | step 20000 | loss 0.0085 0.0787 0.3724 1.1337 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0037 0.0048 0.0284 0.0503 0.1059 0.2665 0.3776 0.7561 | ce_loss_increases 0.0201 0.0930 0.3324 0.4034 | compound_ce_loss_increase 0.6708 | l0s 10.0000 9.9999 9.9999 9.9996 9.9998 9.9997 10.0000 9.9999 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0037 0.0048 0.0284 0.0503 0.1059 0.2665 0.3776 0.7561