| type eval | step 0 | loss 64.0641 272.8978 264.8856 263.4692 1229.2462 | checkpoint False | ce_loss 1.5683 | sae_losses 64.0641 272.8978 264.8856 263.4692 1229.2462 | ce_loss_increases 9.3565 6.4024 3.2766 1.3070 0.2241 | compound_ce_loss_increase 8.2896 | l0s 252.6291 262.8740 262.4859 254.4701 259.6426 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 250 | loss 2.6416 6.6722 13.6019 22.2585 93.6708 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 2.6416 6.6722 13.6019 22.2585 93.6708 | ce_loss_increases 2.6025 3.1723 1.5807 0.8336 0.5518 | compound_ce_loss_increase 18.0163 | l0s 106.7540 105.8370 106.4442 116.7654 155.2323 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 500 | loss 0.0668 0.3052 1.0588 1.5440 5.2114 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0668 0.3052 1.0588 1.5440 5.2114 | ce_loss_increases 0.1331 0.8541 1.1558 0.8797 0.4108 | compound_ce_loss_increase 3.5442 | l0s 33.1732 35.2169 30.7330 36.1500 27.0954 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 750 | loss 0.0336 0.1494 0.6341 1.0358 3.5250 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0336 0.1494 0.6341 1.0358 3.5250 | ce_loss_increases 0.0359 0.4239 0.5744 0.4458 0.1941 | compound_ce_loss_increase 1.8407 | l0s 20.5884 19.4394 20.0141 29.5440 25.6748 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 1000 | loss 0.0293 0.1241 0.5632 0.9239 3.2393 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0293 0.1241 0.5632 0.9239 3.2393 | ce_loss_increases 0.0176 0.3725 0.4869 0.3800 0.1933 | compound_ce_loss_increase 1.8699 | l0s 17.0147 15.3931 16.2486 23.8874 21.7000 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 1250 | loss 0.0279 0.1173 0.5410 0.8889 3.1563 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0279 0.1173 0.5410 0.8889 3.1563 | ce_loss_increases 0.0156 0.3497 0.4426 0.3646 0.2015 | compound_ce_loss_increase 2.0009 | l0s 14.2403 13.7108 14.5937 21.1870 18.9757 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 1500 | loss 0.0271 0.1145 0.5298 0.8717 3.1149 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0271 0.1145 0.5298 0.8717 3.1149 | ce_loss_increases 0.0141 0.3341 0.4346 0.3560 0.2119 | compound_ce_loss_increase 2.0923 | l0s 12.4782 12.8043 13.5077 19.6646 17.3125 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 1750 | loss 0.0266 0.1130 0.5238 0.8619 3.0923 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0266 0.1130 0.5238 0.8619 3.0923 | ce_loss_increases 0.0140 0.3154 0.4421 0.3580 0.2055 | compound_ce_loss_increase 2.0910 | l0s 11.2796 12.3539 12.7721 18.6663 16.3837 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 2000 | loss 0.0262 0.1115 0.5196 0.8548 3.0748 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0262 0.1115 0.5196 0.8548 3.0748 | ce_loss_increases 0.0120 0.2961 0.4342 0.3650 0.2025 | compound_ce_loss_increase 2.1045 | l0s 10.6916 11.6689 12.3249 17.8666 15.6473 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 2250 | loss 0.0259 0.1104 0.5165 0.8494 3.0638 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0259 0.1104 0.5165 0.8494 3.0638 | ce_loss_increases 0.0136 0.2897 0.4297 0.3568 0.2032 | compound_ce_loss_increase 2.1810 | l0s 9.9876 11.2369 11.9721 17.3936 15.1637 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 2500 | loss 0.0257 0.1096 0.5144 0.8453 3.0537 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0257 0.1096 0.5144 0.8453 3.0537 | ce_loss_increases 0.0123 0.2820 0.4309 0.3499 0.2062 | compound_ce_loss_increase 2.2317 | l0s 9.4950 11.0295 11.6426 17.0251 14.6649 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 2750 | loss 0.0255 0.1090 0.5126 0.8415 3.0458 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0255 0.1090 0.5126 0.8415 3.0458 | ce_loss_increases 0.0123 0.2690 0.4209 0.3534 0.2080 | compound_ce_loss_increase 2.1499 | l0s 9.0474 10.8297 11.4997 16.6150 14.3556 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 3000 | loss 0.0253 0.1085 0.5115 0.8396 3.0411 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0253 0.1085 0.5115 0.8396 3.0411 | ce_loss_increases 0.0113 0.2624 0.4174 0.3513 0.2145 | compound_ce_loss_increase 2.1972 | l0s 8.7768 10.7487 11.3267 16.4441 14.0886 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 3250 | loss 0.0252 0.1082 0.5109 0.8381 3.0376 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0252 0.1082 0.5109 0.8381 3.0376 | ce_loss_increases 0.0117 0.2588 0.4174 0.3534 0.2185 | compound_ce_loss_increase 2.2745 | l0s 8.5106 10.4775 11.2317 16.1927 13.8961 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 3500 | loss 0.0251 0.1080 0.5106 0.8373 3.0353 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0251 0.1080 0.5106 0.8373 3.0353 | ce_loss_increases 0.0110 0.2529 0.4243 0.3533 0.2131 | compound_ce_loss_increase 2.2143 | l0s 8.3096 10.2714 11.0281 16.1179 13.8867 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 3750 | loss 0.0250 0.1077 0.5104 0.8366 3.0329 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0250 0.1077 0.5104 0.8366 3.0329 | ce_loss_increases 0.0123 0.2586 0.4263 0.3554 0.2145 | compound_ce_loss_increase 2.3299 | l0s 8.1371 10.2017 10.9193 16.0039 13.6339 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 4000 | loss 0.0249 0.1073 0.5095 0.8351 3.0304 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0249 0.1073 0.5095 0.8351 3.0304 | ce_loss_increases 0.0111 0.2561 0.4219 0.3556 0.2211 | compound_ce_loss_increase 2.3266 | l0s 8.0245 10.1079 10.9354 15.8640 13.4941 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 4250 | loss 0.0249 0.1071 0.5091 0.8345 3.0284 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0249 0.1071 0.5091 0.8345 3.0284 | ce_loss_increases 0.0106 0.2538 0.4187 0.3522 0.2256 | compound_ce_loss_increase 2.2943 | l0s 7.9015 9.9580 10.8735 15.8071 13.3946 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 4500 | loss 0.0248 0.1070 0.5088 0.8342 3.0264 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.1070 0.5088 0.8342 3.0264 | ce_loss_increases 0.0105 0.2469 0.4232 0.3546 0.2241 | compound_ce_loss_increase 2.3489 | l0s 7.8516 9.7947 10.7963 15.6840 13.2750 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 4750 | loss 0.0248 0.1068 0.5084 0.8337 3.0250 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.1068 0.5084 0.8337 3.0250 | ce_loss_increases 0.0098 0.2423 0.4107 0.3534 0.2214 | compound_ce_loss_increase 2.2510 | l0s 7.8189 9.7722 10.8501 15.5976 13.2033 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 5000 | loss 0.0248 0.1067 0.5080 0.8330 3.0229 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.1067 0.5080 0.8330 3.0229 | ce_loss_increases 0.0106 0.2319 0.4117 0.3487 0.2189 | compound_ce_loss_increase 2.2530 | l0s 7.7031 9.7445 10.8204 15.6048 13.2103 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 5250 | loss 0.0247 0.1065 0.5077 0.8323 3.0208 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.1065 0.5077 0.8323 3.0208 | ce_loss_increases 0.0101 0.2328 0.4078 0.3486 0.2185 | compound_ce_loss_increase 2.2264 | l0s 7.6755 9.6824 10.8341 15.5545 13.1102 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 5500 | loss 0.0247 0.1063 0.5076 0.8319 3.0200 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.1063 0.5076 0.8319 3.0200 | ce_loss_increases 0.0095 0.2332 0.4118 0.3500 0.2192 | compound_ce_loss_increase 2.3027 | l0s 7.6366 9.5776 10.7525 15.4963 13.0877 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 5750 | loss 0.0247 0.1062 0.5075 0.8317 3.0198 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.1062 0.5075 0.8317 3.0198 | ce_loss_increases 0.0097 0.2318 0.4132 0.3519 0.2214 | compound_ce_loss_increase 2.3230 | l0s 7.6158 9.5474 10.6916 15.4239 13.0533 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 6000 | loss 0.0246 0.1062 0.5075 0.8317 3.0193 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.1062 0.5075 0.8317 3.0193 | ce_loss_increases 0.0096 0.2312 0.4104 0.3513 0.2209 | compound_ce_loss_increase 2.2758 | l0s 7.5521 9.5131 10.7281 15.4576 13.0368 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 6250 | loss 0.0246 0.1062 0.5074 0.8317 3.0190 | checkpoint True False True False True | ce_loss 1.5683 | sae_losses 0.0246 0.1062 0.5074 0.8317 3.0190 | ce_loss_increases 0.0092 0.2283 0.4151 0.3508 0.2188 | compound_ce_loss_increase 2.2877 | l0s 7.5672 9.4448 10.6702 15.4602 13.0483 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 6500 | loss 0.0246 0.1061 0.5073 0.8315 3.0181 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.1061 0.5073 0.8315 3.0181 | ce_loss_increases 0.0092 0.2285 0.4162 0.3513 0.2192 | compound_ce_loss_increase 2.3195 | l0s 7.5568 9.4555 10.6570 15.4084 13.0375 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 6750 | loss 0.0246 0.1061 0.5072 0.8313 3.0178 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.1061 0.5072 0.8313 3.0178 | ce_loss_increases 0.0084 0.2277 0.4126 0.3507 0.2208 | compound_ce_loss_increase 2.3011 | l0s 7.5372 9.4778 10.6755 15.4102 12.9660 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 7000 | loss 0.0246 0.1061 0.5072 0.8313 3.0173 | checkpoint True True False True True | ce_loss 1.5683 | sae_losses 0.0246 0.1061 0.5072 0.8313 3.0173 | ce_loss_increases 0.0092 0.2295 0.4151 0.3507 0.2209 | compound_ce_loss_increase 2.3161 | l0s 7.5063 9.3930 10.6682 15.4000 12.9483 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 7250 | loss 0.0246 0.1060 0.5071 0.8313 3.0168 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.1060 0.5071 0.8313 3.0168 | ce_loss_increases 0.0088 0.2226 0.4157 0.3509 0.2207 | compound_ce_loss_increase 2.2939 | l0s 7.4882 9.4179 10.6721 15.3764 12.9460 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |
| type eval | step 7500 | loss 0.0246 0.1060 0.5071 0.8311 3.0166 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.1060 0.5071 0.8311 3.0166 | ce_loss_increases 0.0090 0.2257 0.4117 0.3508 0.2201 | compound_ce_loss_increase 2.2751 | l0s 7.5031 9.3500 10.6773 15.3755 12.9196 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259 | |