type eval | step 0 | loss 28.5820 27.7576 29.1771 85.5571 | checkpoint False | ce_loss 1.5683 | sae_losses 13.5712 15.0109 12.9893 14.7683 12.4758 16.7013 15.5737 69.9834 | ce_loss_increases 4.3280 3.6769 2.6645 2.1061 | compound_ce_loss_increase 4.2042 | l0s 10.1514 10.1376 10.1651 10.1471 10.1574 10.1413 10.1445 10.1445 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 13.5712 15.0109 12.9893 14.7683 12.4758 16.7013 15.5737 69.9834 type eval | step 250 | loss 2.9079 4.0987 5.7260 13.6763 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 1.4013 1.5066 1.8548 2.2439 2.2865 3.4395 3.3022 10.3741 | ce_loss_increases 4.4551 3.5862 3.1646 2.5236 | compound_ce_loss_increase 4.4072 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 1.4013 1.5066 1.8548 2.2439 2.2865 3.4395 3.3022 10.3741 type eval | step 500 | loss 0.1865 0.6724 1.2753 2.4776 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0929 0.0937 0.2914 0.3810 0.4973 0.7781 0.8284 1.6493 | ce_loss_increases 2.2827 1.9649 1.7322 1.0088 | compound_ce_loss_increase 3.2941 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0929 0.0937 0.2914 0.3810 0.4973 0.7781 0.8284 1.6493 type eval | step 750 | loss 0.0410 0.2273 0.6693 1.5969 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0208 0.0202 0.0963 0.1310 0.2272 0.4421 0.5443 1.0527 | ce_loss_increases 0.6853 0.6890 0.8380 0.6055 | compound_ce_loss_increase 1.6438 | l0s 9.9996 9.9998 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0208 0.0202 0.0963 0.1310 0.2272 0.4421 0.5443 1.0527 type eval | step 1000 | loss 0.0289 0.1457 0.5160 1.3807 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0145 0.0144 0.0595 0.0862 0.1616 0.3545 0.4696 0.9111 | ce_loss_increases 0.4200 0.2897 0.5306 0.5112 | compound_ce_loss_increase 1.0812 | l0s 9.9994 9.9990 9.9999 9.9999 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0145 0.0144 0.0595 0.0862 0.1616 0.3545 0.4696 0.9111 type eval | step 1250 | loss 0.0243 0.1216 0.4642 1.2964 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0121 0.0123 0.0480 0.0736 0.1413 0.3229 0.4401 0.8562 | ce_loss_increases 0.3460 0.1928 0.4435 0.4618 | compound_ce_loss_increase 0.9367 | l0s 9.9994 9.9995 9.9998 9.9998 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0121 0.0123 0.0480 0.0736 0.1413 0.3229 0.4401 0.8562 type eval | step 1500 | loss 0.0219 0.1099 0.4385 1.2506 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0107 0.0111 0.0422 0.0677 0.1313 0.3072 0.4242 0.8264 | ce_loss_increases 0.2824 0.1554 0.4164 0.4498 | compound_ce_loss_increase 0.8865 | l0s 9.9994 9.9996 9.9997 9.9997 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0107 0.0111 0.0422 0.0677 0.1313 0.3072 0.4242 0.8264 type eval | step 1750 | loss 0.0202 0.1041 0.4253 1.2268 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0099 0.0103 0.0395 0.0647 0.1259 0.2994 0.4154 0.8114 | ce_loss_increases 0.2496 0.1302 0.4040 0.4528 | compound_ce_loss_increase 0.8550 | l0s 9.9993 9.9991 9.9997 9.9996 10.0000 10.0000 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0099 0.0103 0.0395 0.0647 0.1259 0.2994 0.4154 0.8114 type eval | step 2000 | loss 0.0189 0.0991 0.4160 1.2090 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0092 0.0096 0.0373 0.0619 0.1221 0.2939 0.4100 0.7990 | ce_loss_increases 0.2188 0.1193 0.4048 0.4508 | compound_ce_loss_increase 0.8460 | l0s 9.9992 9.9994 9.9997 9.9996 9.9999 9.9999 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0092 0.0096 0.0373 0.0619 0.1221 0.2939 0.4100 0.7990 type eval | step 2250 | loss 0.0178 0.0960 0.4091 1.1959 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0087 0.0091 0.0360 0.0601 0.1194 0.2897 0.4056 0.7903 | ce_loss_increases 0.1961 0.1100 0.3952 0.4429 | compound_ce_loss_increase 0.8178 | l0s 9.9992 9.9994 9.9998 9.9995 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0087 0.0091 0.0360 0.0601 0.1194 0.2897 0.4056 0.7903 type eval | step 2500 | loss 0.0171 0.0939 0.4037 1.1844 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0083 0.0088 0.0352 0.0587 0.1172 0.2865 0.4014 0.7829 | ce_loss_increases 0.1785 0.1034 0.3773 0.4306 | compound_ce_loss_increase 0.7848 | l0s 9.9993 9.9995 9.9999 9.9994 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0083 0.0088 0.0352 0.0587 0.1172 0.2865 0.4014 0.7829 type eval | step 2750 | loss 0.0164 0.0920 0.3983 1.1734 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0080 0.0085 0.0345 0.0575 0.1150 0.2834 0.3972 0.7763 | ce_loss_increases 0.1616 0.1019 0.3675 0.4272 | compound_ce_loss_increase 0.7879 | l0s 9.9995 9.9994 9.9998 9.9994 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0080 0.0085 0.0345 0.0575 0.1150 0.2834 0.3972 0.7763 type eval | step 3000 | loss 0.0158 0.0911 0.3954 1.1680 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0076 0.0082 0.0342 0.0569 0.1138 0.2815 0.3952 0.7729 | ce_loss_increases 0.1528 0.0990 0.3621 0.4271 | compound_ce_loss_increase 0.7807 | l0s 9.9995 9.9995 9.9999 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0076 0.0082 0.0342 0.0569 0.1138 0.2815 0.3952 0.7729 type eval | step 3250 | loss 0.0154 0.0901 0.3936 1.1634 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0074 0.0080 0.0338 0.0563 0.1132 0.2803 0.3935 0.7700 | ce_loss_increases 0.1486 0.0943 0.3607 0.4235 | compound_ce_loss_increase 0.7772 | l0s 9.9994 9.9995 9.9999 9.9995 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0074 0.0080 0.0338 0.0563 0.1132 0.2803 0.3935 0.7700 type eval | step 3500 | loss 0.0150 0.0893 0.3925 1.1611 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0072 0.0078 0.0335 0.0559 0.1128 0.2798 0.3929 0.7682 | ce_loss_increases 0.1479 0.0922 0.3606 0.4220 | compound_ce_loss_increase 0.7676 | l0s 9.9995 9.9992 9.9998 9.9993 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0072 0.0078 0.0335 0.0559 0.1128 0.2798 0.3929 0.7682 type eval | step 3750 | loss 0.0145 0.0883 0.3912 1.1586 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0069 0.0075 0.0330 0.0553 0.1122 0.2790 0.3922 0.7665 | ce_loss_increases 0.1293 0.0906 0.3593 0.4294 | compound_ce_loss_increase 0.7668 | l0s 9.9995 9.9995 9.9998 9.9994 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0069 0.0075 0.0330 0.0553 0.1122 0.2790 0.3922 0.7665 type eval | step 4000 | loss 0.0141 0.0875 0.3894 1.1544 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0067 0.0074 0.0326 0.0550 0.1116 0.2778 0.3905 0.7639 | ce_loss_increases 0.1225 0.0890 0.3567 0.4367 | compound_ce_loss_increase 0.7634 | l0s 9.9995 9.9996 9.9999 9.9994 9.9999 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0067 0.0074 0.0326 0.0550 0.1116 0.2778 0.3905 0.7639 type eval | step 4250 | loss 0.0138 0.0872 0.3883 1.1516 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0066 0.0072 0.0324 0.0548 0.1113 0.2769 0.3892 0.7624 | ce_loss_increases 0.1237 0.0875 0.3541 0.4372 | compound_ce_loss_increase 0.7612 | l0s 9.9996 9.9996 9.9999 9.9993 9.9999 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0066 0.0072 0.0324 0.0548 0.1113 0.2769 0.3892 0.7624 type eval | step 4500 | loss 0.0135 0.0860 0.3873 1.1494 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0064 0.0071 0.0318 0.0541 0.1107 0.2766 0.3884 0.7610 | ce_loss_increases 0.1229 0.0845 0.3617 0.4354 | compound_ce_loss_increase 0.7731 | l0s 9.9994 9.9996 9.9998 9.9992 9.9999 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0064 0.0071 0.0318 0.0541 0.1107 0.2766 0.3884 0.7610 type eval | step 4750 | loss 0.0132 0.0856 0.3863 1.1457 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0062 0.0069 0.0317 0.0539 0.1103 0.2759 0.3873 0.7583 | ce_loss_increases 0.1177 0.0834 0.3644 0.4310 | compound_ce_loss_increase 0.7603 | l0s 9.9994 9.9996 9.9997 9.9992 9.9999 9.9995 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0062 0.0069 0.0317 0.0539 0.1103 0.2759 0.3873 0.7583 type eval | step 5000 | loss 0.0130 0.0851 0.3848 1.1428 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0061 0.0068 0.0315 0.0535 0.1097 0.2751 0.3862 0.7565 | ce_loss_increases 0.1164 0.0837 0.3609 0.4209 | compound_ce_loss_increase 0.7351 | l0s 9.9996 9.9996 9.9998 9.9990 9.9999 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0061 0.0068 0.0315 0.0535 0.1097 0.2751 0.3862 0.7565 type eval | step 5250 | loss 0.0127 0.0842 0.3825 1.1385 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0060 0.0067 0.0312 0.0530 0.1088 0.2737 0.3845 0.7541 | ce_loss_increases 0.1143 0.0821 0.3576 0.4150 | compound_ce_loss_increase 0.7374 | l0s 9.9995 9.9997 9.9998 9.9993 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0060 0.0067 0.0312 0.0530 0.1088 0.2737 0.3845 0.7541 type eval | step 5500 | loss 0.0125 0.0839 0.3820 1.1363 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0059 0.0066 0.0311 0.0528 0.1086 0.2734 0.3840 0.7523 | ce_loss_increases 0.1106 0.0815 0.3570 0.4114 | compound_ce_loss_increase 0.7399 | l0s 9.9994 9.9996 9.9999 9.9991 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0059 0.0066 0.0311 0.0528 0.1086 0.2734 0.3840 0.7523 type eval | step 5750 | loss 0.0125 0.0840 0.3819 1.1348 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0058 0.0066 0.0311 0.0528 0.1085 0.2734 0.3835 0.7513 | ce_loss_increases 0.1085 0.0813 0.3542 0.4107 | compound_ce_loss_increase 0.7341 | l0s 9.9992 9.9995 9.9999 9.9991 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0058 0.0066 0.0311 0.0528 0.1085 0.2734 0.3835 0.7513 type eval | step 6000 | loss 0.0122 0.0840 0.3823 1.1355 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0058 0.0064 0.0311 0.0529 0.1087 0.2737 0.3838 0.7517 | ce_loss_increases 0.1081 0.0823 0.3532 0.4072 | compound_ce_loss_increase 0.7271 | l0s 9.9996 9.9987 9.9999 9.9992 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0058 0.0064 0.0311 0.0529 0.1087 0.2737 0.3838 0.7517 type eval | step 6250 | loss 0.0120 0.0838 0.3825 1.1364 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0057 0.0063 0.0310 0.0528 0.1086 0.2738 0.3843 0.7521 | ce_loss_increases 0.1046 0.0841 0.3523 0.4074 | compound_ce_loss_increase 0.7327 | l0s 9.9995 9.9980 9.9998 9.9992 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0057 0.0063 0.0310 0.0528 0.1086 0.2738 0.3843 0.7521 type eval | step 6500 | loss 0.0118 0.0834 0.3814 1.1346 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0056 0.0062 0.0309 0.0525 0.1084 0.2731 0.3836 0.7510 | ce_loss_increases 0.1042 0.0840 0.3514 0.4126 | compound_ce_loss_increase 0.7210 | l0s 9.9996 9.9990 9.9999 9.9991 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0056 0.0062 0.0309 0.0525 0.1084 0.2731 0.3836 0.7510 type eval | step 6750 | loss 0.0117 0.0834 0.3808 1.1342 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0056 0.0061 0.0310 0.0525 0.1081 0.2727 0.3833 0.7509 | ce_loss_increases 0.0988 0.0832 0.3485 0.4134 | compound_ce_loss_increase 0.7220 | l0s 9.9996 9.9992 9.9998 9.9991 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0056 0.0061 0.0310 0.0525 0.1081 0.2727 0.3833 0.7509 type eval | step 7000 | loss 0.0115 0.0829 0.3808 1.1342 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0055 0.0060 0.0307 0.0522 0.1081 0.2727 0.3833 0.7510 | ce_loss_increases 0.0951 0.0815 0.3526 0.4124 | compound_ce_loss_increase 0.7324 | l0s 9.9996 9.9991 9.9999 9.9990 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0055 0.0060 0.0307 0.0522 0.1081 0.2727 0.3833 0.7510 type eval | step 7250 | loss 0.0113 0.0829 0.3801 1.1330 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0054 0.0059 0.0308 0.0521 0.1079 0.2723 0.3827 0.7502 | ce_loss_increases 0.0956 0.0809 0.3522 0.4126 | compound_ce_loss_increase 0.7284 | l0s 9.9997 9.9995 9.9998 9.9991 9.9998 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0054 0.0059 0.0308 0.0521 0.1079 0.2723 0.3827 0.7502 type eval | step 7500 | loss 0.0112 0.0823 0.3793 1.1310 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0053 0.0059 0.0305 0.0518 0.1075 0.2717 0.3819 0.7491 | ce_loss_increases 0.1013 0.0822 0.3508 0.4069 | compound_ce_loss_increase 0.7114 | l0s 9.9998 9.9995 9.9999 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0053 0.0059 0.0305 0.0518 0.1075 0.2717 0.3819 0.7491 type eval | step 7750 | loss 0.0110 0.0818 0.3777 1.1281 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0053 0.0058 0.0303 0.0515 0.1070 0.2707 0.3805 0.7477 | ce_loss_increases 0.0945 0.0820 0.3495 0.4058 | compound_ce_loss_increase 0.7113 | l0s 9.9997 9.9997 9.9998 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0053 0.0058 0.0303 0.0515 0.1070 0.2707 0.3805 0.7477 type eval | step 8000 | loss 0.0109 0.0819 0.3772 1.1272 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0052 0.0057 0.0304 0.0515 0.1069 0.2703 0.3801 0.7471 | ce_loss_increases 0.0931 0.0817 0.3507 0.4079 | compound_ce_loss_increase 0.7156 | l0s 9.9998 9.9996 9.9999 9.9992 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0052 0.0057 0.0304 0.0515 0.1069 0.2703 0.3801 0.7471 type eval | step 8250 | loss 0.0110 0.0821 0.3771 1.1270 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0052 0.0058 0.0305 0.0515 0.1069 0.2702 0.3797 0.7472 | ce_loss_increases 0.0919 0.0790 0.3466 0.4121 | compound_ce_loss_increase 0.7087 | l0s 9.9998 9.9996 9.9999 9.9992 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0052 0.0058 0.0305 0.0515 0.1069 0.2702 0.3797 0.7472 type eval | step 8500 | loss 0.0108 0.0822 0.3778 1.1281 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0051 0.0057 0.0305 0.0516 0.1072 0.2706 0.3800 0.7481 | ce_loss_increases 0.0906 0.0796 0.3506 0.4148 | compound_ce_loss_increase 0.7167 | l0s 9.9997 9.9985 9.9999 9.9993 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0051 0.0057 0.0305 0.0516 0.1072 0.2706 0.3800 0.7481 type eval | step 8750 | loss 0.0107 0.0821 0.3782 1.1294 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0051 0.0056 0.0304 0.0517 0.1073 0.2709 0.3803 0.7490 | ce_loss_increases 0.0956 0.0803 0.3488 0.4158 | compound_ce_loss_increase 0.7182 | l0s 9.9997 9.9997 10.0000 9.9993 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0051 0.0056 0.0304 0.0517 0.1073 0.2709 0.3803 0.7490 type eval | step 9000 | loss 0.0106 0.0818 0.3776 1.1282 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0050 0.0056 0.0304 0.0514 0.1070 0.2706 0.3798 0.7484 | ce_loss_increases 0.0972 0.0808 0.3476 0.4178 | compound_ce_loss_increase 0.7170 | l0s 9.9998 9.9985 9.9999 9.9993 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0050 0.0056 0.0304 0.0514 0.1070 0.2706 0.3798 0.7484 type eval | step 9250 | loss 0.0106 0.0817 0.3773 1.1278 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0050 0.0056 0.0304 0.0514 0.1069 0.2704 0.3796 0.7482 | ce_loss_increases 0.0889 0.0804 0.3448 0.4212 | compound_ce_loss_increase 0.7091 | l0s 9.9997 9.9986 9.9999 9.9994 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0050 0.0056 0.0304 0.0514 0.1069 0.2704 0.3796 0.7482 type eval | step 9500 | loss 0.0105 0.0817 0.3775 1.1286 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0050 0.0055 0.0303 0.0514 0.1071 0.2704 0.3797 0.7489 | ce_loss_increases 0.0883 0.0795 0.3448 0.4199 | compound_ce_loss_increase 0.7174 | l0s 9.9996 9.9997 9.9999 9.9994 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0050 0.0055 0.0303 0.0514 0.1071 0.2704 0.3797 0.7489 type eval | step 9750 | loss 0.0104 0.0815 0.3771 1.1276 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0049 0.0055 0.0303 0.0512 0.1068 0.2703 0.3795 0.7481 | ce_loss_increases 0.0864 0.0771 0.3473 0.4209 | compound_ce_loss_increase 0.7188 | l0s 9.9996 9.9998 9.9999 9.9993 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0055 0.0303 0.0512 0.1068 0.2703 0.3795 0.7481 type eval | step 10000 | loss 0.0103 0.0812 0.3766 1.1265 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0049 0.0054 0.0302 0.0510 0.1066 0.2700 0.3791 0.7474 | ce_loss_increases 0.0851 0.0790 0.3443 0.4184 | compound_ce_loss_increase 0.7128 | l0s 9.9996 9.9998 9.9999 9.9993 9.9999 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0049 0.0054 0.0302 0.0510 0.1066 0.2700 0.3791 0.7474 type eval | step 10250 | loss 0.0102 0.0808 0.3754 1.1245 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0048 0.0054 0.0300 0.0508 0.1062 0.2692 0.3784 0.7460 | ce_loss_increases 0.0821 0.0782 0.3419 0.4153 | compound_ce_loss_increase 0.7007 | l0s 9.9996 9.9998 10.0000 9.9994 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0054 0.0300 0.0508 0.1062 0.2692 0.3784 0.7460 type eval | step 10500 | loss 0.0101 0.0807 0.3750 1.1233 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0048 0.0053 0.0300 0.0508 0.1061 0.2689 0.3782 0.7451 | ce_loss_increases 0.0810 0.0782 0.3406 0.4138 | compound_ce_loss_increase 0.7070 | l0s 9.9997 9.9997 9.9999 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0053 0.0300 0.0508 0.1061 0.2689 0.3782 0.7451 type eval | step 10750 | loss 0.0102 0.0809 0.3750 1.1232 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0048 0.0054 0.0301 0.0508 0.1062 0.2688 0.3783 0.7449 | ce_loss_increases 0.0786 0.0782 0.3401 0.4148 | compound_ce_loss_increase 0.7049 | l0s 9.9997 9.9998 9.9999 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0054 0.0301 0.0508 0.1062 0.2688 0.3783 0.7449 type eval | step 11000 | loss 0.0101 0.0812 0.3755 1.1235 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0048 0.0053 0.0303 0.0509 0.1064 0.2691 0.3784 0.7450 | ce_loss_increases 0.0783 0.0785 0.3402 0.4170 | compound_ce_loss_increase 0.7095 | l0s 9.9994 9.9998 10.0000 9.9994 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0048 0.0053 0.0303 0.0509 0.1064 0.2691 0.3784 0.7450 type eval | step 11250 | loss 0.0100 0.0812 0.3760 1.1247 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0047 0.0053 0.0302 0.0510 0.1065 0.2694 0.3790 0.7457 | ce_loss_increases 0.0798 0.0773 0.3416 0.4184 | compound_ce_loss_increase 0.7159 | l0s 9.9996 9.9998 9.9999 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0053 0.0302 0.0510 0.1065 0.2694 0.3790 0.7457 type eval | step 11500 | loss 0.0100 0.0810 0.3756 1.1238 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0047 0.0052 0.0302 0.0508 0.1063 0.2692 0.3788 0.7450 | ce_loss_increases 0.0778 0.0780 0.3411 0.4215 | compound_ce_loss_increase 0.7153 | l0s 9.9995 9.9985 9.9999 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0052 0.0302 0.0508 0.1063 0.2692 0.3788 0.7450 type eval | step 11750 | loss 0.0099 0.0809 0.3753 1.1228 | checkpoint True False False True | ce_loss 1.5683 | sae_losses 0.0047 0.0052 0.0302 0.0507 0.1062 0.2690 0.3786 0.7442 | ce_loss_increases 0.0766 0.0800 0.3402 0.4239 | compound_ce_loss_increase 0.7095 | l0s 9.9996 9.9982 10.0000 9.9991 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0052 0.0302 0.0507 0.1062 0.2690 0.3786 0.7442 type eval | step 12000 | loss 0.0099 0.0810 0.3753 1.1228 | checkpoint True False False True | ce_loss 1.5683 | sae_losses 0.0047 0.0052 0.0302 0.0507 0.1063 0.2690 0.3787 0.7441 | ce_loss_increases 0.0778 0.0792 0.3396 0.4216 | compound_ce_loss_increase 0.7081 | l0s 9.9996 9.9994 9.9999 9.9991 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0052 0.0302 0.0507 0.1063 0.2690 0.3787 0.7441 type eval | step 12250 | loss 0.0099 0.0806 0.3752 1.1218 | checkpoint True True False True | ce_loss 1.5683 | sae_losses 0.0047 0.0052 0.0301 0.0506 0.1062 0.2690 0.3786 0.7432 | ce_loss_increases 0.0776 0.0782 0.3424 0.4224 | compound_ce_loss_increase 0.7155 | l0s 9.9996 9.9998 9.9999 9.9991 9.9999 9.9996 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0047 0.0052 0.0301 0.0506 0.1062 0.2690 0.3786 0.7432 type eval | step 12500 | loss 0.0098 0.0805 0.3748 1.1207 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0046 0.0051 0.0301 0.0505 0.1060 0.2688 0.3784 0.7423 | ce_loss_increases 0.0743 0.0798 0.3405 0.4206 | compound_ce_loss_increase 0.7072 | l0s 9.9996 9.9998 9.9999 9.9992 9.9998 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0051 0.0301 0.0505 0.1060 0.2688 0.3784 0.7423 type eval | step 12750 | loss 0.0097 0.0803 0.3741 1.1194 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0046 0.0051 0.0300 0.0503 0.1058 0.2684 0.3779 0.7415 | ce_loss_increases 0.0778 0.0782 0.3376 0.4167 | compound_ce_loss_increase 0.6993 | l0s 9.9997 9.9998 9.9999 9.9992 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0051 0.0300 0.0503 0.1058 0.2684 0.3779 0.7415 type eval | step 13000 | loss 0.0097 0.0802 0.3736 1.1183 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0046 0.0051 0.0300 0.0502 0.1056 0.2680 0.3775 0.7408 | ce_loss_increases 0.0756 0.0779 0.3375 0.4150 | compound_ce_loss_increase 0.6996 | l0s 9.9998 9.9999 9.9999 9.9992 9.9999 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0051 0.0300 0.0502 0.1056 0.2680 0.3775 0.7408 type eval | step 13250 | loss 0.0097 0.0803 0.3736 1.1180 | checkpoint False False True True | ce_loss 1.5683 | sae_losses 0.0046 0.0051 0.0300 0.0503 0.1056 0.2680 0.3773 0.7407 | ce_loss_increases 0.0767 0.0787 0.3371 0.4147 | compound_ce_loss_increase 0.7071 | l0s 9.9998 9.9998 9.9999 9.9992 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0051 0.0300 0.0503 0.1056 0.2680 0.3773 0.7407 type eval | step 13500 | loss 0.0097 0.0805 0.3739 1.1181 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0046 0.0051 0.0302 0.0504 0.1058 0.2681 0.3775 0.7406 | ce_loss_increases 0.0736 0.0787 0.3370 0.4144 | compound_ce_loss_increase 0.7066 | l0s 9.9996 9.9999 9.9999 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0046 0.0051 0.0302 0.0504 0.1058 0.2681 0.3775 0.7406 type eval | step 13750 | loss 0.0097 0.0807 0.3744 1.1188 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0051 0.0303 0.0505 0.1060 0.2684 0.3778 0.7411 | ce_loss_increases 0.0741 0.0789 0.3385 0.4128 | compound_ce_loss_increase 0.7093 | l0s 9.9997 9.9999 9.9999 9.9993 9.9998 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0051 0.0303 0.0505 0.1060 0.2684 0.3778 0.7411 type eval | step 14000 | loss 0.0096 0.0805 0.3743 1.1188 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0051 0.0301 0.0503 0.1059 0.2684 0.3777 0.7410 | ce_loss_increases 0.0728 0.0792 0.3388 0.4145 | compound_ce_loss_increase 0.7073 | l0s 9.9997 10.0000 9.9999 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0051 0.0301 0.0503 0.1059 0.2684 0.3777 0.7410 type eval | step 14250 | loss 0.0096 0.0805 0.3740 1.1181 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0051 0.0302 0.0503 0.1058 0.2682 0.3775 0.7405 | ce_loss_increases 0.0746 0.0802 0.3369 0.4167 | compound_ce_loss_increase 0.7096 | l0s 9.9996 9.9999 9.9999 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0051 0.0302 0.0503 0.1058 0.2682 0.3775 0.7405 type eval | step 14500 | loss 0.0096 0.0805 0.3740 1.1181 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0051 0.0302 0.0503 0.1058 0.2682 0.3775 0.7406 | ce_loss_increases 0.0750 0.0806 0.3378 0.4166 | compound_ce_loss_increase 0.7062 | l0s 9.9996 9.9996 9.9999 9.9993 9.9996 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0051 0.0302 0.0503 0.1058 0.2682 0.3775 0.7406 type eval | step 14750 | loss 0.0095 0.0804 0.3740 1.1182 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0045 0.0050 0.0302 0.0502 0.1058 0.2682 0.3776 0.7406 | ce_loss_increases 0.0722 0.0789 0.3389 0.4169 | compound_ce_loss_increase 0.7063 | l0s 9.9996 9.9999 9.9999 9.9994 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0050 0.0302 0.0502 0.1058 0.2682 0.3776 0.7406 type eval | step 15000 | loss 0.0095 0.0803 0.3739 1.1177 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0045 0.0050 0.0302 0.0502 0.1057 0.2682 0.3775 0.7402 | ce_loss_increases 0.0747 0.0781 0.3399 0.4165 | compound_ce_loss_increase 0.7038 | l0s 9.9997 9.9999 9.9999 9.9993 9.9996 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0050 0.0302 0.0502 0.1057 0.2682 0.3775 0.7402 type eval | step 15250 | loss 0.0095 0.0802 0.3736 1.1171 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0045 0.0050 0.0301 0.0501 0.1056 0.2680 0.3773 0.7398 | ce_loss_increases 0.0703 0.0788 0.3370 0.4168 | compound_ce_loss_increase 0.7016 | l0s 9.9997 9.9999 9.9999 9.9993 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0050 0.0301 0.0501 0.1056 0.2680 0.3773 0.7398 type eval | step 15500 | loss 0.0094 0.0801 0.3731 1.1165 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0044 0.0050 0.0301 0.0500 0.1055 0.2676 0.3770 0.7394 | ce_loss_increases 0.0745 0.0792 0.3361 0.4139 | compound_ce_loss_increase 0.7029 | l0s 9.9997 9.9999 9.9999 9.9994 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0050 0.0301 0.0500 0.1055 0.2676 0.3770 0.7394 type eval | step 15750 | loss 0.0094 0.0801 0.3731 1.1163 | checkpoint True False False True | ce_loss 1.5683 | sae_losses 0.0044 0.0050 0.0301 0.0500 0.1055 0.2676 0.3769 0.7394 | ce_loss_increases 0.0743 0.0786 0.3368 0.4145 | compound_ce_loss_increase 0.7012 | l0s 9.9998 9.9999 9.9999 9.9994 9.9996 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0050 0.0301 0.0500 0.1055 0.2676 0.3769 0.7394 type eval | step 16000 | loss 0.0094 0.0803 0.3733 1.1163 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0045 0.0050 0.0302 0.0501 0.1056 0.2677 0.3769 0.7394 | ce_loss_increases 0.0708 0.0796 0.3388 0.4159 | compound_ce_loss_increase 0.6972 | l0s 9.9997 9.9999 10.0000 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0045 0.0050 0.0302 0.0501 0.1056 0.2677 0.3769 0.7394 type eval | step 16250 | loss 0.0094 0.0805 0.3737 1.1167 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0050 0.0303 0.0502 0.1057 0.2679 0.3770 0.7398 | ce_loss_increases 0.0701 0.0785 0.3391 0.4154 | compound_ce_loss_increase 0.7029 | l0s 9.9996 9.9999 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0050 0.0303 0.0502 0.1057 0.2679 0.3770 0.7398 type eval | step 16500 | loss 0.0094 0.0805 0.3738 1.1169 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0050 0.0303 0.0502 0.1058 0.2680 0.3771 0.7398 | ce_loss_increases 0.0708 0.0788 0.3403 0.4165 | compound_ce_loss_increase 0.6989 | l0s 9.9997 9.9999 9.9999 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0050 0.0303 0.0502 0.1058 0.2680 0.3771 0.7398 type eval | step 16750 | loss 0.0093 0.0804 0.3736 1.1164 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0303 0.0501 0.1057 0.2680 0.3769 0.7395 | ce_loss_increases 0.0715 0.0791 0.3395 0.4176 | compound_ce_loss_increase 0.7034 | l0s 9.9997 10.0000 10.0000 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0303 0.0501 0.1057 0.2680 0.3769 0.7395 type eval | step 17000 | loss 0.0093 0.0804 0.3736 1.1165 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0304 0.0501 0.1057 0.2679 0.3770 0.7395 | ce_loss_increases 0.0706 0.0800 0.3392 0.4184 | compound_ce_loss_increase 0.6983 | l0s 9.9997 9.9999 9.9999 9.9994 9.9996 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0304 0.0501 0.1057 0.2679 0.3770 0.7395 type eval | step 17250 | loss 0.0093 0.0804 0.3736 1.1164 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0303 0.0501 0.1057 0.2679 0.3770 0.7394 | ce_loss_increases 0.0685 0.0795 0.3395 0.4173 | compound_ce_loss_increase 0.6961 | l0s 9.9997 9.9996 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0303 0.0501 0.1057 0.2679 0.3770 0.7394 type eval | step 17500 | loss 0.0093 0.0803 0.3735 1.1163 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0303 0.0500 0.1057 0.2679 0.3770 0.7393 | ce_loss_increases 0.0696 0.0781 0.3400 0.4173 | compound_ce_loss_increase 0.7009 | l0s 9.9997 9.9999 9.9999 9.9993 9.9996 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0303 0.0500 0.1057 0.2679 0.3770 0.7393 type eval | step 17750 | loss 0.0093 0.0803 0.3734 1.1160 | checkpoint False False False True | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0303 0.0500 0.1056 0.2678 0.3769 0.7391 | ce_loss_increases 0.0687 0.0784 0.3392 0.4176 | compound_ce_loss_increase 0.6954 | l0s 9.9997 9.9999 9.9999 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0303 0.0500 0.1056 0.2678 0.3769 0.7391 type eval | step 18000 | loss 0.0093 0.0802 0.3731 1.1156 | checkpoint True False True True | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0302 0.0499 0.1055 0.2676 0.3766 0.7390 | ce_loss_increases 0.0717 0.0780 0.3380 0.4165 | compound_ce_loss_increase 0.6973 | l0s 9.9997 9.9998 9.9999 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0302 0.0499 0.1055 0.2676 0.3766 0.7390 type eval | step 18250 | loss 0.0092 0.0800 0.3731 1.1156 | checkpoint True True True True | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0301 0.0499 0.1055 0.2676 0.3766 0.7390 | ce_loss_increases 0.0707 0.0780 0.3378 0.4165 | compound_ce_loss_increase 0.6965 | l0s 9.9997 9.9999 10.0000 9.9994 9.9997 9.9997 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0301 0.0499 0.1055 0.2676 0.3766 0.7390 type eval | step 18500 | loss 0.0093 0.0797 0.3731 1.1157 | checkpoint False True True False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0298 0.0499 0.1056 0.2675 0.3766 0.7391 | ce_loss_increases 0.0685 0.0772 0.3381 0.4164 | compound_ce_loss_increase 0.6892 | l0s 9.9997 9.9998 10.0000 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0298 0.0499 0.1056 0.2675 0.3766 0.7391 type eval | step 18750 | loss 0.0093 0.0796 0.3733 1.1160 | checkpoint False True False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0296 0.0500 0.1057 0.2676 0.3766 0.7393 | ce_loss_increases 0.0676 0.0772 0.3396 0.4152 | compound_ce_loss_increase 0.6946 | l0s 9.9997 9.9998 10.0000 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0296 0.0500 0.1057 0.2676 0.3766 0.7393 type eval | step 19000 | loss 0.0093 0.0796 0.3735 1.1163 | checkpoint False False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0296 0.0500 0.1057 0.2677 0.3767 0.7396 | ce_loss_increases 0.0696 0.0768 0.3404 0.4158 | compound_ce_loss_increase 0.6924 | l0s 9.9997 9.9999 10.0000 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0296 0.0500 0.1057 0.2677 0.3767 0.7396 type eval | step 19250 | loss 0.0092 0.0795 0.3734 1.1161 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0295 0.0500 0.1057 0.2677 0.3766 0.7394 | ce_loss_increases 0.0684 0.0769 0.3403 0.4167 | compound_ce_loss_increase 0.6952 | l0s 9.9996 9.9999 10.0000 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0295 0.0500 0.1057 0.2677 0.3766 0.7394 type eval | step 19500 | loss 0.0092 0.0795 0.3733 1.1162 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0295 0.0500 0.1057 0.2676 0.3766 0.7395 | ce_loss_increases 0.0681 0.0771 0.3383 0.4170 | compound_ce_loss_increase 0.6966 | l0s 9.9997 9.9998 10.0000 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0295 0.0500 0.1057 0.2676 0.3766 0.7395 type eval | step 19750 | loss 0.0092 0.0795 0.3734 1.1163 | checkpoint True False False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0296 0.0500 0.1057 0.2677 0.3767 0.7396 | ce_loss_increases 0.0683 0.0773 0.3388 0.4174 | compound_ce_loss_increase 0.6955 | l0s 9.9996 9.9998 10.0000 9.9994 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0296 0.0500 0.1057 0.2677 0.3767 0.7396 type eval | step 20000 | loss 0.0092 0.0795 0.3734 1.1161 | checkpoint True True False False | ce_loss 1.5683 | sae_losses 0.0044 0.0049 0.0295 0.0499 0.1057 0.2677 0.3766 0.7395 | ce_loss_increases 0.0687 0.0766 0.3408 0.4176 | compound_ce_loss_increase 0.7008 | l0s 9.9996 9.9998 10.0000 9.9993 9.9997 9.9998 10.0000 10.0000 | stream_l1s 10.6278 11.3458 10.7689 11.5477 11.0628 12.7100 12.2906 23.1259 | ∇_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0044 0.0049 0.0295 0.0499 0.1057 0.2677 0.3766 0.7395