type eval | step 0 | loss 30.2236 27.7836 29.8112 95.0827 | checkpoint False | ce_loss 1.5683 | sae_losses 13.7413 16.4824 13.0375 14.7461 12.5411 17.2700 14.9756 80.1070 | ce_loss_increases 4.1792 3.6156 2.8187 1.9520 | compound_ce_loss_increase 3.9959 | l0s 10.1455 10.1365 10.1630 10.1467 10.1606 10.1427 10.1460 10.1338 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 13.7413 16.4824 13.0375 14.7461 12.5411 17.2700 14.9756 80.1070 type eval | step 250 | loss 3.1602 4.1021 5.9655 13.8641 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 1.4373 1.7229 1.8650 2.2371 2.4146 3.5509 3.2597 10.6044 | ce_loss_increases 4.0353 3.5584 3.1935 2.3586 | compound_ce_loss_increase 4.0453 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 1.4373 1.7229 1.8650 2.2371 2.4146 3.5509 3.2597 10.6044 type eval | step 500 | loss 0.1962 0.6740 1.2705 2.4930 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0864 0.1097 0.2963 0.3777 0.4921 0.7785 0.8256 1.6674 | ce_loss_increases 2.3759 1.9661 1.6867 1.0327 | compound_ce_loss_increase 3.4999 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0864 0.1097 0.2963 0.3777 0.4921 0.7785 0.8256 1.6674 type eval | step 750 | loss 0.0412 0.2415 0.6543 1.6024 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0202 0.0210 0.1049 0.1366 0.2207 0.4336 0.5416 1.0608 | ce_loss_increases 0.6476 0.8678 0.7827 0.6048 | compound_ce_loss_increase 1.6081 | l0s 9.9991 9.9992 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0202 0.0210 0.1049 0.1366 0.2207 0.4336 0.5416 1.0608 type eval | step 1000 | loss 0.0281 0.1585 0.5044 1.3939 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0140 0.0141 0.0696 0.0889 0.1577 0.3467 0.4704 0.9235 | ce_loss_increases 0.3580 0.5133 0.5052 0.4833 | compound_ce_loss_increase 1.0510 | l0s 9.9991 9.9989 10.0000 9.9999 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0140 0.0141 0.0696 0.0889 0.1577 0.3467 0.4704 0.9235 type eval | step 1250 | loss 0.0233 0.1313 0.4565 1.3083 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0114 0.0118 0.0574 0.0739 0.1379 0.3187 0.4405 0.8678 | ce_loss_increases 0.2540 0.3761 0.4255 0.4496 | compound_ce_loss_increase 0.8709 | l0s 9.9993 9.9993 9.9999 9.9997 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0114 0.0118 0.0574 0.0739 0.1379 0.3187 0.4405 0.8678 type eval | step 1500 | loss 0.0207 0.1176 0.4323 1.2611 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0101 0.0107 0.0507 0.0669 0.1276 0.3047 0.4241 0.8369 | ce_loss_increases 0.2034 0.3119 0.3854 0.4388 | compound_ce_loss_increase 0.7969 | l0s 9.9997 9.9995 9.9998 9.9996 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0101 0.0107 0.0507 0.0669 0.1276 0.3047 0.4241 0.8369 type eval | step 1750 | loss 0.0190 0.1099 0.4196 1.2341 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0091 0.0099 0.0462 0.0637 0.1225 0.2971 0.4154 0.8188 | ce_loss_increases 0.1747 0.2646 0.3718 0.4313 | compound_ce_loss_increase 0.7754 | l0s 9.9997 9.9996 9.9998 9.9995 9.9999 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0091 0.0099 0.0462 0.0637 0.1225 0.2971 0.4154 0.8188 type eval | step 2000 | loss 0.0175 0.1031 0.4101 1.2153 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0083 0.0092 0.0423 0.0608 0.1188 0.2912 0.4092 0.8062 | ce_loss_increases 0.1388 0.2225 0.3697 0.4199 | compound_ce_loss_increase 0.7626 | l0s 9.9997 9.9995 9.9997 9.9996 9.9998 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0083 0.0092 0.0423 0.0608 0.1188 0.2912 0.4092 0.8062 type eval | step 2250 | loss 0.0162 0.0985 0.4033 1.2012 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0076 0.0086 0.0396 0.0589 0.1161 0.2871 0.4046 0.7966 | ce_loss_increases 0.1227 0.2042 0.3644 0.4130 | compound_ce_loss_increase 0.7347 | l0s 9.9997 9.9994 9.9998 9.9993 9.9999 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0076 0.0086 0.0396 0.0589 0.1161 0.2871 0.4046 0.7966 type eval | step 2500 | loss 0.0153 0.0952 0.3981 1.1893 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0072 0.0082 0.0375 0.0577 0.1143 0.2838 0.4009 0.7884 | ce_loss_increases 0.0997 0.1892 0.3582 0.4051 | compound_ce_loss_increase 0.7201 | l0s 9.9997 9.9995 9.9998 9.9996 9.9999 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0072 0.0082 0.0375 0.0577 0.1143 0.2838 0.4009 0.7884 type eval | step 2750 | loss 0.0146 0.0924 0.3927 1.1786 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0068 0.0078 0.0357 0.0567 0.1124 0.2803 0.3970 0.7816 | ce_loss_increases 0.0890 0.1739 0.3539 0.3956 | compound_ce_loss_increase 0.6959 | l0s 9.9999 9.9997 9.9998 9.9995 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0068 0.0078 0.0357 0.0567 0.1124 0.2803 0.3970 0.7816 type eval | step 3000 | loss 0.0139 0.0907 0.3903 1.1731 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0065 0.0074 0.0347 0.0560 0.1117 0.2786 0.3949 0.7782 | ce_loss_increases 0.0761 0.1714 0.3483 0.3907 | compound_ce_loss_increase 0.6938 | l0s 9.9998 9.9995 9.9999 9.9994 9.9999 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0065 0.0074 0.0347 0.0560 0.1117 0.2786 0.3949 0.7782 type eval | step 3250 | loss 0.0134 0.0896 0.3885 1.1680 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0063 0.0071 0.0341 0.0555 0.1111 0.2774 0.3934 0.7746 | ce_loss_increases 0.0711 0.1648 0.3488 0.3893 | compound_ce_loss_increase 0.6850 | l0s 9.9996 9.9994 9.9997 9.9993 9.9999 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0063 0.0071 0.0341 0.0555 0.1111 0.2774 0.3934 0.7746 type eval | step 3500 | loss 0.0130 0.0888 0.3875 1.1658 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0061 0.0068 0.0335 0.0552 0.1107 0.2769 0.3928 0.7730 | ce_loss_increases 0.0675 0.1645 0.3488 0.3854 | compound_ce_loss_increase 0.6827 | l0s 9.9997 9.9995 9.9998 9.9992 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0061 0.0068 0.0335 0.0552 0.1107 0.2769 0.3928 0.7730 type eval | step 3750 | loss 0.0124 0.0878 0.3868 1.1632 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0059 0.0066 0.0330 0.0549 0.1103 0.2765 0.3919 0.7713 | ce_loss_increases 0.0607 0.1565 0.3465 0.3870 | compound_ce_loss_increase 0.6756 | l0s 9.9998 9.9997 9.9998 9.9992 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0059 0.0066 0.0330 0.0549 0.1103 0.2765 0.3919 0.7713 type eval | step 4000 | loss 0.0121 0.0870 0.3850 1.1588 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0058 0.0064 0.0325 0.0545 0.1096 0.2753 0.3899 0.7689 | ce_loss_increases 0.0608 0.1539 0.3398 0.3857 | compound_ce_loss_increase 0.6727 | l0s 9.9998 9.9996 9.9998 9.9991 9.9998 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0058 0.0064 0.0325 0.0545 0.1096 0.2753 0.3899 0.7689 type eval | step 4250 | loss 0.0118 0.0867 0.3840 1.1567 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0056 0.0062 0.0323 0.0544 0.1093 0.2747 0.3886 0.7681 | ce_loss_increases 0.0565 0.1470 0.3364 0.3856 | compound_ce_loss_increase 0.6806 | l0s 9.9998 9.9995 9.9997 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0056 0.0062 0.0323 0.0544 0.1093 0.2747 0.3886 0.7681 type eval | step 4500 | loss 0.0115 0.0857 0.3831 1.1564 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0055 0.0060 0.0319 0.0538 0.1087 0.2744 0.3880 0.7684 | ce_loss_increases 0.0523 0.1453 0.3418 0.3894 | compound_ce_loss_increase 0.6854 | l0s 9.9997 9.9995 9.9998 9.9995 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0055 0.0060 0.0319 0.0538 0.1087 0.2744 0.3880 0.7684 type eval | step 4750 | loss 0.0112 0.0853 0.3824 1.1544 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0054 0.0058 0.0317 0.0536 0.1084 0.2739 0.3868 0.7676 | ce_loss_increases 0.0503 0.1440 0.3395 0.3899 | compound_ce_loss_increase 0.6767 | l0s 9.9998 9.9997 9.9998 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0054 0.0058 0.0317 0.0536 0.1084 0.2739 0.3868 0.7676 type eval | step 5000 | loss 0.0110 0.0847 0.3809 1.1508 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0053 0.0057 0.0314 0.0533 0.1079 0.2730 0.3855 0.7653 | ce_loss_increases 0.0481 0.1418 0.3375 0.3852 | compound_ce_loss_increase 0.6705 | l0s 9.9998 9.9995 9.9997 9.9995 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0053 0.0057 0.0314 0.0533 0.1079 0.2730 0.3855 0.7653 type eval | step 5250 | loss 0.0108 0.0840 0.3784 1.1459 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0052 0.0056 0.0310 0.0530 0.1068 0.2716 0.3836 0.7623 | ce_loss_increases 0.0474 0.1377 0.3339 0.3874 | compound_ce_loss_increase 0.6706 | l0s 9.9998 9.9996 9.9997 9.9995 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0052 0.0056 0.0310 0.0530 0.1068 0.2716 0.3836 0.7623 type eval | step 5500 | loss 0.0106 0.0838 0.3780 1.1445 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0051 0.0055 0.0309 0.0529 0.1066 0.2715 0.3829 0.7616 | ce_loss_increases 0.0442 0.1408 0.3387 0.3890 | compound_ce_loss_increase 0.6834 | l0s 9.9999 9.9994 9.9997 9.9996 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0051 0.0055 0.0309 0.0529 0.1066 0.2715 0.3829 0.7616 type eval | step 5750 | loss 0.0106 0.0838 0.3776 1.1430 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0051 0.0055 0.0309 0.0529 0.1066 0.2711 0.3823 0.7607 | ce_loss_increases 0.0423 0.1338 0.3361 0.3912 | compound_ce_loss_increase 0.6751 | l0s 9.9999 9.9997 9.9997 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0051 0.0055 0.0309 0.0529 0.1066 0.2711 0.3823 0.7607 type eval | step 6000 | loss 0.0104 0.0837 0.3778 1.1434 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0050 0.0054 0.0308 0.0528 0.1064 0.2714 0.3827 0.7607 | ce_loss_increases 0.0431 0.1348 0.3366 0.3928 | compound_ce_loss_increase 0.6731 | l0s 9.9999 9.9994 9.9998 9.9994 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0050 0.0054 0.0308 0.0528 0.1064 0.2714 0.3827 0.7607 type eval | step 6250 | loss 0.0102 0.0834 0.3781 1.1438 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0049 0.0053 0.0307 0.0527 0.1064 0.2717 0.3829 0.7609 | ce_loss_increases 0.0397 0.1319 0.3390 0.3941 | compound_ce_loss_increase 0.6711 | l0s 9.9996 9.9994 9.9997 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0053 0.0307 0.0527 0.1064 0.2717 0.3829 0.7609 type eval | step 6500 | loss 0.0101 0.0831 0.3775 1.1421 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0049 0.0052 0.0305 0.0526 0.1062 0.2713 0.3821 0.7600 | ce_loss_increases 0.0379 0.1326 0.3398 0.3937 | compound_ce_loss_increase 0.6690 | l0s 9.9999 9.9997 9.9998 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0052 0.0305 0.0526 0.1062 0.2713 0.3821 0.7600 type eval | step 6750 | loss 0.0100 0.0831 0.3771 1.1414 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0049 0.0051 0.0305 0.0526 0.1060 0.2710 0.3818 0.7596 | ce_loss_increases 0.0384 0.1331 0.3352 0.3945 | compound_ce_loss_increase 0.6661 | l0s 9.9999 9.9996 9.9998 9.9992 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0051 0.0305 0.0526 0.1060 0.2710 0.3818 0.7596 type eval | step 7000 | loss 0.0099 0.0827 0.3772 1.1420 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0048 0.0050 0.0304 0.0523 0.1060 0.2711 0.3818 0.7602 | ce_loss_increases 0.0360 0.1305 0.3385 0.3970 | compound_ce_loss_increase 0.6820 | l0s 9.9998 9.9996 9.9998 9.9992 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0050 0.0304 0.0523 0.1060 0.2711 0.3818 0.7602 type eval | step 7250 | loss 0.0098 0.0826 0.3768 1.1408 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0048 0.0050 0.0303 0.0523 0.1058 0.2710 0.3814 0.7594 | ce_loss_increases 0.0357 0.1306 0.3386 0.3979 | compound_ce_loss_increase 0.6800 | l0s 9.9999 9.9997 9.9998 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0050 0.0303 0.0523 0.1058 0.2710 0.3814 0.7594 type eval | step 7500 | loss 0.0097 0.0822 0.3762 1.1391 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0047 0.0049 0.0302 0.0521 0.1055 0.2707 0.3808 0.7582 | ce_loss_increases 0.0359 0.1327 0.3359 0.3921 | compound_ce_loss_increase 0.6706 | l0s 9.9998 9.9997 9.9998 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0049 0.0302 0.0521 0.1055 0.2707 0.3808 0.7582 type eval | step 7750 | loss 0.0095 0.0817 0.3746 1.1359 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0047 0.0049 0.0300 0.0518 0.1050 0.2696 0.3796 0.7563 | ce_loss_increases 0.0351 0.1300 0.3325 0.3913 | compound_ce_loss_increase 0.6618 | l0s 9.9999 9.9998 9.9998 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0049 0.0300 0.0518 0.1050 0.2696 0.3796 0.7563 type eval | step 8000 | loss 0.0094 0.0818 0.3742 1.1353 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0046 0.0048 0.0300 0.0518 0.1049 0.2693 0.3793 0.7560 | ce_loss_increases 0.0344 0.1326 0.3354 0.3907 | compound_ce_loss_increase 0.6611 | l0s 9.9998 9.9997 9.9998 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0048 0.0300 0.0518 0.1049 0.2693 0.3793 0.7560 type eval | step 8250 | loss 0.0096 0.0820 0.3741 1.1348 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0047 0.0049 0.0302 0.0518 0.1050 0.2691 0.3792 0.7556 | ce_loss_increases 0.0337 0.1295 0.3323 0.3937 | compound_ce_loss_increase 0.6644 | l0s 9.9999 9.9996 9.9997 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0049 0.0302 0.0518 0.1050 0.2691 0.3792 0.7556 type eval | step 8500 | loss 0.0094 0.0821 0.3747 1.1353 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0046 0.0048 0.0302 0.0519 0.1052 0.2695 0.3798 0.7555 | ce_loss_increases 0.0362 0.1295 0.3341 0.3946 | compound_ce_loss_increase 0.6687 | l0s 9.9999 9.9998 9.9997 9.9990 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0048 0.0302 0.0519 0.1052 0.2695 0.3798 0.7555 type eval | step 8750 | loss 0.0093 0.0820 0.3751 1.1367 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0046 0.0048 0.0301 0.0519 0.1053 0.2699 0.3804 0.7562 | ce_loss_increases 0.0339 0.1299 0.3356 0.3942 | compound_ce_loss_increase 0.6763 | l0s 9.9999 9.9997 9.9998 9.9990 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0048 0.0301 0.0519 0.1053 0.2699 0.3804 0.7562 type eval | step 9000 | loss 0.0092 0.0818 0.3746 1.1359 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0047 0.0301 0.0517 0.1051 0.2695 0.3801 0.7558 | ce_loss_increases 0.0339 0.1264 0.3357 0.3950 | compound_ce_loss_increase 0.6763 | l0s 9.9999 9.9999 9.9997 9.9989 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0047 0.0301 0.0517 0.1051 0.2695 0.3801 0.7558 type eval | step 9250 | loss 0.0092 0.0817 0.3744 1.1356 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0047 0.0300 0.0517 0.1050 0.2694 0.3798 0.7558 | ce_loss_increases 0.0339 0.1271 0.3296 0.3969 | compound_ce_loss_increase 0.6735 | l0s 9.9999 9.9998 9.9998 9.9990 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0047 0.0300 0.0517 0.1050 0.2694 0.3798 0.7558 type eval | step 9500 | loss 0.0091 0.0818 0.3747 1.1361 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0046 0.0300 0.0518 0.1051 0.2696 0.3800 0.7561 | ce_loss_increases 0.0325 0.1298 0.3306 0.3973 | compound_ce_loss_increase 0.6824 | l0s 9.9999 9.9997 9.9999 9.9989 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0046 0.0300 0.0518 0.1051 0.2696 0.3800 0.7561 type eval | step 9750 | loss 0.0091 0.0814 0.3744 1.1356 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0045 0.0046 0.0298 0.0515 0.1049 0.2695 0.3799 0.7558 | ce_loss_increases 0.0339 0.1285 0.3309 0.3958 | compound_ce_loss_increase 0.6845 | l0s 9.9999 9.9998 9.9998 9.9989 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0046 0.0298 0.0515 0.1049 0.2695 0.3799 0.7558 type eval | step 10000 | loss 0.0090 0.0810 0.3740 1.1350 | checkpoint True True True False | ce_loss 1.5683 | sae_losses 0.0045 0.0046 0.0297 0.0513 0.1046 0.2693 0.3796 0.7554 | ce_loss_increases 0.0315 0.1307 0.3286 0.3918 | compound_ce_loss_increase 0.6691 | l0s 10.0000 9.9998 9.9998 9.9989 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0046 0.0297 0.0513 0.1046 0.2693 0.3796 0.7554 type eval | step 10250 | loss 0.0090 0.0807 0.3729 1.1332 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0044 0.0045 0.0295 0.0512 0.1043 0.2687 0.3788 0.7543 | ce_loss_increases 0.0329 0.1310 0.3279 0.3897 | compound_ce_loss_increase 0.6626 | l0s 9.9999 9.9996 9.9998 9.9991 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0045 0.0295 0.0512 0.1043 0.2687 0.3788 0.7543 type eval | step 10500 | loss 0.0089 0.0808 0.3725 1.1324 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0044 0.0045 0.0295 0.0512 0.1041 0.2683 0.3784 0.7540 | ce_loss_increases 0.0324 0.1334 0.3301 0.3906 | compound_ce_loss_increase 0.6614 | l0s 9.9999 9.9996 9.9998 9.9990 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0045 0.0295 0.0512 0.1041 0.2683 0.3784 0.7540 type eval | step 10750 | loss 0.0090 0.0809 0.3726 1.1325 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0046 0.0296 0.0513 0.1042 0.2683 0.3785 0.7540 | ce_loss_increases 0.0318 0.1308 0.3307 0.3907 | compound_ce_loss_increase 0.6649 | l0s 10.0000 9.9997 9.9998 9.9989 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0046 0.0296 0.0513 0.1042 0.2683 0.3785 0.7540 type eval | step 11000 | loss 0.0089 0.0811 0.3730 1.1327 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0045 0.0297 0.0514 0.1044 0.2686 0.3787 0.7540 | ce_loss_increases 0.0314 0.1312 0.3298 0.3871 | compound_ce_loss_increase 0.6675 | l0s 9.9999 9.9998 9.9998 9.9992 9.9998 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0045 0.0297 0.0514 0.1044 0.2686 0.3787 0.7540 type eval | step 11250 | loss 0.0089 0.0812 0.3735 1.1343 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0045 0.0297 0.0515 0.1046 0.2689 0.3793 0.7550 | ce_loss_increases 0.0316 0.1305 0.3317 0.3878 | compound_ce_loss_increase 0.6720 | l0s 9.9999 9.9997 9.9998 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0045 0.0297 0.0515 0.1046 0.2689 0.3793 0.7550 type eval | step 11500 | loss 0.0088 0.0810 0.3731 1.1341 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0045 0.0297 0.0514 0.1044 0.2687 0.3793 0.7547 | ce_loss_increases 0.0306 0.1310 0.3318 0.3874 | compound_ce_loss_increase 0.6738 | l0s 9.9999 9.9998 9.9998 9.9992 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0045 0.0297 0.0514 0.1044 0.2687 0.3793 0.7547 type eval | step 11750 | loss 0.0088 0.0810 0.3730 1.1338 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0043 0.0045 0.0296 0.0513 0.1044 0.2686 0.3792 0.7546 | ce_loss_increases 0.0310 0.1310 0.3301 0.3876 | compound_ce_loss_increase 0.6705 | l0s 9.9999 9.9996 9.9998 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0045 0.0296 0.0513 0.1044 0.2686 0.3792 0.7546 type eval | step 12000 | loss 0.0088 0.0811 0.3731 1.1347 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0297 0.0514 0.1045 0.2687 0.3794 0.7553 | ce_loss_increases 0.0294 0.1326 0.3294 0.3905 | compound_ce_loss_increase 0.6790 | l0s 9.9999 9.9997 9.9999 9.9993 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0297 0.0514 0.1045 0.2687 0.3794 0.7553 type eval | step 12250 | loss 0.0088 0.0808 0.3730 1.1346 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0296 0.0513 0.1043 0.2687 0.3794 0.7551 | ce_loss_increases 0.0292 0.1311 0.3308 0.3920 | compound_ce_loss_increase 0.6809 | l0s 9.9999 9.9998 9.9999 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0296 0.0513 0.1043 0.2687 0.3794 0.7551 type eval | step 12500 | loss 0.0087 0.0806 0.3728 1.1342 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0294 0.0512 0.1042 0.2686 0.3794 0.7548 | ce_loss_increases 0.0307 0.1317 0.3294 0.3919 | compound_ce_loss_increase 0.6757 | l0s 9.9999 9.9999 9.9999 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0294 0.0512 0.1042 0.2686 0.3794 0.7548 type eval | step 12750 | loss 0.0087 0.0803 0.3722 1.1333 | checkpoint True True True False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0293 0.0510 0.1040 0.2682 0.3790 0.7543 | ce_loss_increases 0.0289 0.1323 0.3287 0.3895 | compound_ce_loss_increase 0.6675 | l0s 9.9999 9.9998 9.9999 9.9995 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0293 0.0510 0.1040 0.2682 0.3790 0.7543 type eval | step 13000 | loss 0.0086 0.0803 0.3716 1.1328 | checkpoint True True True False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0293 0.0510 0.1038 0.2679 0.3787 0.7541 | ce_loss_increases 0.0292 0.1324 0.3311 0.3885 | compound_ce_loss_increase 0.6648 | l0s 9.9999 9.9999 9.9999 9.9995 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0293 0.0510 0.1038 0.2679 0.3787 0.7541 type eval | step 13250 | loss 0.0087 0.0804 0.3718 1.1328 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0293 0.0511 0.1038 0.2679 0.3787 0.7541 | ce_loss_increases 0.0303 0.1320 0.3328 0.3895 | compound_ce_loss_increase 0.6667 | l0s 9.9999 9.9999 9.9999 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0293 0.0511 0.1038 0.2679 0.3787 0.7541 type eval | step 13500 | loss 0.0087 0.0807 0.3720 1.1328 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0043 0.0044 0.0295 0.0512 0.1039 0.2680 0.3788 0.7540 | ce_loss_increases 0.0293 0.1330 0.3314 0.3904 | compound_ce_loss_increase 0.6679 | l0s 10.0000 9.9999 9.9999 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0043 0.0044 0.0295 0.0512 0.1039 0.2680 0.3788 0.7540 type eval | step 13750 | loss 0.0086 0.0808 0.3725 1.1338 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0044 0.0295 0.0513 0.1041 0.2683 0.3793 0.7545 | ce_loss_increases 0.0302 0.1344 0.3334 0.3893 | compound_ce_loss_increase 0.6737 | l0s 10.0000 10.0000 9.9999 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0044 0.0295 0.0513 0.1041 0.2683 0.3793 0.7545 type eval | step 14000 | loss 0.0086 0.0806 0.3723 1.1340 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0294 0.0512 0.1041 0.2682 0.3794 0.7545 | ce_loss_increases 0.0291 0.1329 0.3339 0.3898 | compound_ce_loss_increase 0.6738 | l0s 9.9999 9.9998 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0294 0.0512 0.1041 0.2682 0.3794 0.7545 type eval | step 14250 | loss 0.0085 0.0806 0.3721 1.1337 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0295 0.0511 0.1040 0.2681 0.3793 0.7544 | ce_loss_increases 0.0295 0.1330 0.3312 0.3907 | compound_ce_loss_increase 0.6754 | l0s 9.9999 9.9999 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0295 0.0511 0.1040 0.2681 0.3793 0.7544 type eval | step 14500 | loss 0.0086 0.0806 0.3723 1.1340 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0295 0.0512 0.1040 0.2682 0.3794 0.7546 | ce_loss_increases 0.0289 0.1344 0.3319 0.3910 | compound_ce_loss_increase 0.6742 | l0s 9.9999 9.9999 9.9999 9.9995 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0295 0.0512 0.1040 0.2682 0.3794 0.7546 type eval | step 14750 | loss 0.0085 0.0805 0.3722 1.1343 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0294 0.0511 0.1040 0.2682 0.3795 0.7548 | ce_loss_increases 0.0279 0.1358 0.3331 0.3939 | compound_ce_loss_increase 0.6804 | l0s 9.9999 9.9997 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0294 0.0511 0.1040 0.2682 0.3795 0.7548 type eval | step 15000 | loss 0.0085 0.0804 0.3721 1.1344 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0293 0.0510 0.1039 0.2682 0.3796 0.7548 | ce_loss_increases 0.0295 0.1337 0.3318 0.3943 | compound_ce_loss_increase 0.6768 | l0s 9.9999 9.9986 9.9999 9.9995 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0293 0.0510 0.1039 0.2682 0.3796 0.7548 type eval | step 15250 | loss 0.0085 0.0802 0.3719 1.1340 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0292 0.0509 0.1038 0.2680 0.3794 0.7546 | ce_loss_increases 0.0284 0.1359 0.3294 0.3932 | compound_ce_loss_increase 0.6741 | l0s 9.9999 9.9999 9.9999 9.9995 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0292 0.0509 0.1038 0.2680 0.3794 0.7546 type eval | step 15500 | loss 0.0084 0.0800 0.3714 1.1334 | checkpoint True True True False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7543 | ce_loss_increases 0.0285 0.1339 0.3298 0.3928 | compound_ce_loss_increase 0.6667 | l0s 10.0000 9.9998 9.9999 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7543 type eval | step 15750 | loss 0.0084 0.0800 0.3714 1.1334 | checkpoint False False True False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7543 | ce_loss_increases 0.0291 0.1341 0.3304 0.3942 | compound_ce_loss_increase 0.6669 | l0s 9.9999 9.9986 9.9999 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7543 type eval | step 16000 | loss 0.0085 0.0801 0.3715 1.1332 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7541 | ce_loss_increases 0.0285 0.1349 0.3309 0.3966 | compound_ce_loss_increase 0.6707 | l0s 10.0000 9.9998 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0292 0.0509 0.1037 0.2678 0.3791 0.7541 type eval | step 16250 | loss 0.0085 0.0803 0.3718 1.1336 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0293 0.0510 0.1038 0.2680 0.3793 0.7542 | ce_loss_increases 0.0289 0.1368 0.3307 0.3959 | compound_ce_loss_increase 0.6682 | l0s 10.0000 9.9987 9.9999 9.9995 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0293 0.0510 0.1038 0.2680 0.3793 0.7542 type eval | step 16500 | loss 0.0084 0.0803 0.3719 1.1337 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0293 0.0510 0.1038 0.2681 0.3795 0.7542 | ce_loss_increases 0.0270 0.1356 0.3307 0.3951 | compound_ce_loss_increase 0.6691 | l0s 9.9999 9.9986 9.9999 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0293 0.0510 0.1038 0.2681 0.3795 0.7542 type eval | step 16750 | loss 0.0084 0.0802 0.3717 1.1333 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0293 0.0510 0.1037 0.2679 0.3794 0.7539 | ce_loss_increases 0.0269 0.1349 0.3303 0.3955 | compound_ce_loss_increase 0.6662 | l0s 9.9999 9.9985 9.9999 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0293 0.0510 0.1037 0.2679 0.3794 0.7539 type eval | step 17000 | loss 0.0084 0.0803 0.3718 1.1336 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0042 0.0043 0.0293 0.0510 0.1038 0.2680 0.3794 0.7542 | ce_loss_increases 0.0287 0.1356 0.3282 0.3978 | compound_ce_loss_increase 0.6696 | l0s 9.9999 9.9986 9.9999 9.9996 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0042 0.0043 0.0293 0.0510 0.1038 0.2680 0.3794 0.7542 type eval | step 17250 | loss 0.0084 0.0802 0.3718 1.1336 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0509 0.1037 0.2681 0.3794 0.7542 | ce_loss_increases 0.0285 0.1354 0.3288 0.3985 | compound_ce_loss_increase 0.6759 | l0s 10.0000 9.9986 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0509 0.1037 0.2681 0.3794 0.7542 type eval | step 17500 | loss 0.0084 0.0802 0.3717 1.1335 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0509 0.1037 0.2681 0.3794 0.7541 | ce_loss_increases 0.0278 0.1340 0.3304 0.3987 | compound_ce_loss_increase 0.6727 | l0s 9.9999 9.9975 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0509 0.1037 0.2681 0.3794 0.7541 type eval | step 17750 | loss 0.0084 0.0801 0.3716 1.1334 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0509 0.1037 0.2680 0.3793 0.7540 | ce_loss_increases 0.0284 0.1351 0.3290 0.3986 | compound_ce_loss_increase 0.6737 | l0s 9.9999 9.9975 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0509 0.1037 0.2680 0.3793 0.7540 type eval | step 18000 | loss 0.0084 0.0800 0.3714 1.1330 | checkpoint True True True False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0508 0.1035 0.2678 0.3792 0.7538 | ce_loss_increases 0.0289 0.1336 0.3270 0.3988 | compound_ce_loss_increase 0.6695 | l0s 9.9999 9.9987 9.9999 9.9995 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0508 0.1035 0.2678 0.3792 0.7538 type eval | step 18250 | loss 0.0083 0.0800 0.3713 1.1329 | checkpoint True False True False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0508 0.1035 0.2677 0.3791 0.7538 | ce_loss_increases 0.0289 0.1348 0.3271 0.3991 | compound_ce_loss_increase 0.6670 | l0s 9.9999 9.9987 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0508 0.1035 0.2677 0.3791 0.7538 type eval | step 18500 | loss 0.0084 0.0801 0.3713 1.1329 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0509 0.1036 0.2678 0.3791 0.7538 | ce_loss_increases 0.0291 0.1350 0.3267 0.4002 | compound_ce_loss_increase 0.6703 | l0s 9.9999 9.9975 9.9998 9.9994 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0509 0.1036 0.2678 0.3791 0.7538 type eval | step 18750 | loss 0.0084 0.0802 0.3715 1.1331 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0509 0.1036 0.2679 0.3792 0.7539 | ce_loss_increases 0.0284 0.1363 0.3269 0.3999 | compound_ce_loss_increase 0.6658 | l0s 9.9999 9.9987 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0509 0.1036 0.2679 0.3792 0.7539 type eval | step 19000 | loss 0.0084 0.0802 0.3716 1.1331 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0510 0.1037 0.2680 0.3793 0.7538 | ce_loss_increases 0.0282 0.1354 0.3273 0.4003 | compound_ce_loss_increase 0.6687 | l0s 10.0000 9.9975 9.9998 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0510 0.1037 0.2680 0.3793 0.7538 type eval | step 19250 | loss 0.0084 0.0802 0.3716 1.1331 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0510 0.1037 0.2680 0.3793 0.7538 | ce_loss_increases 0.0281 0.1355 0.3279 0.4005 | compound_ce_loss_increase 0.6717 | l0s 10.0000 9.9974 9.9999 9.9995 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0510 0.1037 0.2680 0.3793 0.7538 type eval | step 19500 | loss 0.0083 0.0802 0.3716 1.1331 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0510 0.1036 0.2680 0.3793 0.7538 | ce_loss_increases 0.0284 0.1354 0.3269 0.4025 | compound_ce_loss_increase 0.6703 | l0s 9.9999 9.9974 9.9999 9.9994 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0510 0.1036 0.2680 0.3793 0.7538 type eval | step 19750 | loss 0.0083 0.0802 0.3717 1.1334 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0292 0.0510 0.1037 0.2680 0.3793 0.7540 | ce_loss_increases 0.0293 0.1355 0.3267 0.4027 | compound_ce_loss_increase 0.6698 | l0s 9.9999 9.9987 9.9999 9.9994 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0292 0.0510 0.1037 0.2680 0.3793 0.7540 type eval | step 20000 | loss 0.0083 0.0802 0.3716 1.1334 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0041 0.0042 0.0293 0.0509 0.1036 0.2680 0.3793 0.7540 | ce_loss_increases 0.0279 0.1365 0.3266 0.4029 | compound_ce_loss_increase 0.6701 | l0s 10.0000 9.9974 9.9999 9.9994 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0041 0.0042 0.0293 0.0509 0.1036 0.2680 0.3793 0.7540