davidquarel's picture
Upload folder using huggingface_hub
5eaa98a verified
type eval | step 0 | loss 69.9351 227.3715 251.4663 262.0019 1158.8053 | checkpoint False | ce_loss 1.5683 | sae_losses 69.9351 227.3715 251.4663 262.0019 1158.8053 | ce_loss_increases 9.5531 6.2093 3.0940 1.4725 0.2427 | compound_ce_loss_increase 7.8380 | l0s 255.7843 246.1082 261.4560 257.6235 253.3834 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 250 | loss 2.7938 4.6736 9.2002 19.7001 65.0648 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 2.7938 4.6736 9.2002 19.7001 65.0648 | ce_loss_increases 2.4624 2.5027 1.4391 0.8839 0.5587 | compound_ce_loss_increase 15.9712 | l0s 109.0341 98.4950 101.4176 119.2250 157.3766 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 500 | loss 0.0672 0.2048 0.6036 0.8582 1.7478 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0672 0.2048 0.6036 0.8582 1.7478 | ce_loss_increases 0.1324 0.6147 0.6956 0.5069 0.1164 | compound_ce_loss_increase 2.1451 | l0s 33.4035 41.5548 42.0688 54.1596 55.7794 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 750 | loss 0.0335 0.1020 0.3495 0.5447 1.0117 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0335 0.1020 0.3495 0.5447 1.0117 | ce_loss_increases 0.0313 0.3622 0.3207 0.1837 0.0290 | compound_ce_loss_increase 1.0494 | l0s 20.5222 24.9453 34.0170 69.1310 94.0110 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 1000 | loss 0.0291 0.0836 0.3035 0.4791 0.8924 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0291 0.0836 0.3035 0.4791 0.8924 | ce_loss_increases 0.0188 0.3401 0.2667 0.1451 0.0183 | compound_ce_loss_increase 1.0156 | l0s 16.9708 21.7072 29.8823 67.8966 112.0945 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 1250 | loss 0.0278 0.0777 0.2887 0.4584 0.8595 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0278 0.0777 0.2887 0.4584 0.8595 | ce_loss_increases 0.0136 0.3101 0.2397 0.1342 0.0155 | compound_ce_loss_increase 1.0264 | l0s 14.0052 20.0144 27.8973 65.0998 115.1745 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 1500 | loss 0.0270 0.0749 0.2814 0.4484 0.8443 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0270 0.0749 0.2814 0.4484 0.8443 | ce_loss_increases 0.0125 0.2840 0.2297 0.1284 0.0145 | compound_ce_loss_increase 1.0387 | l0s 12.3140 19.0326 26.4638 63.0203 114.9442 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 1750 | loss 0.0265 0.0733 0.2776 0.4432 0.8359 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0265 0.0733 0.2776 0.4432 0.8359 | ce_loss_increases 0.0123 0.2665 0.2284 0.1277 0.0140 | compound_ce_loss_increase 1.0452 | l0s 11.0394 18.3555 25.7191 61.4684 113.9065 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 2000 | loss 0.0261 0.0721 0.2746 0.4398 0.8302 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0261 0.0721 0.2746 0.4398 0.8302 | ce_loss_increases 0.0120 0.2507 0.2298 0.1308 0.0139 | compound_ce_loss_increase 1.0642 | l0s 10.2797 17.5832 25.1559 60.1170 113.0572 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 2250 | loss 0.0258 0.0712 0.2725 0.4370 0.8258 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0258 0.0712 0.2725 0.4370 0.8258 | ce_loss_increases 0.0125 0.2354 0.2288 0.1284 0.0134 | compound_ce_loss_increase 1.0967 | l0s 9.5582 17.2766 24.6476 59.3353 112.1152 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 2500 | loss 0.0255 0.0704 0.2709 0.4348 0.8225 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0255 0.0704 0.2709 0.4348 0.8225 | ce_loss_increases 0.0127 0.2308 0.2220 0.1267 0.0130 | compound_ce_loss_increase 1.0747 | l0s 9.0886 16.7897 24.2651 58.6772 111.6607 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 2750 | loss 0.0253 0.0698 0.2695 0.4327 0.8196 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0253 0.0698 0.2695 0.4327 0.8196 | ce_loss_increases 0.0112 0.2156 0.2130 0.1292 0.0135 | compound_ce_loss_increase 1.0413 | l0s 8.6659 16.6079 24.0919 57.5640 110.8334 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 3000 | loss 0.0252 0.0693 0.2686 0.4317 0.8183 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0252 0.0693 0.2686 0.4317 0.8183 | ce_loss_increases 0.0124 0.2080 0.2110 0.1272 0.0142 | compound_ce_loss_increase 1.0826 | l0s 8.4159 16.3360 23.7989 57.3684 110.3676 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 3250 | loss 0.0251 0.0690 0.2681 0.4309 0.8167 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0251 0.0690 0.2681 0.4309 0.8167 | ce_loss_increases 0.0108 0.2129 0.2096 0.1288 0.0142 | compound_ce_loss_increase 1.0807 | l0s 8.2436 16.0343 23.4633 57.0171 110.1526 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 3500 | loss 0.0250 0.0688 0.2678 0.4304 0.8164 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0250 0.0688 0.2678 0.4304 0.8164 | ce_loss_increases 0.0113 0.2076 0.2122 0.1260 0.0147 | compound_ce_loss_increase 1.0646 | l0s 8.1357 15.6935 23.2728 56.9503 109.8648 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 3750 | loss 0.0249 0.0684 0.2672 0.4299 0.8151 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0249 0.0684 0.2672 0.4299 0.8151 | ce_loss_increases 0.0107 0.2024 0.2087 0.1245 0.0148 | compound_ce_loss_increase 1.0792 | l0s 7.9486 15.5286 23.0477 56.5439 109.6589 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 4000 | loss 0.0248 0.0681 0.2667 0.4289 0.8137 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.0681 0.2667 0.4289 0.8137 | ce_loss_increases 0.0101 0.1978 0.2083 0.1243 0.0150 | compound_ce_loss_increase 1.0657 | l0s 7.8210 15.4135 22.9380 56.2839 109.3188 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 4250 | loss 0.0248 0.0679 0.2663 0.4285 0.8130 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0248 0.0679 0.2663 0.4285 0.8130 | ce_loss_increases 0.0107 0.1952 0.2055 0.1236 0.0144 | compound_ce_loss_increase 1.0770 | l0s 7.7570 15.3227 22.8371 56.1585 109.3503 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 4500 | loss 0.0247 0.0677 0.2660 0.4280 0.8121 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0677 0.2660 0.4280 0.8121 | ce_loss_increases 0.0098 0.1911 0.2064 0.1243 0.0144 | compound_ce_loss_increase 1.0759 | l0s 7.6724 15.1929 22.7772 55.8330 109.2679 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 4750 | loss 0.0247 0.0675 0.2657 0.4275 0.8112 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0675 0.2657 0.4275 0.8112 | ce_loss_increases 0.0099 0.1907 0.2019 0.1237 0.0143 | compound_ce_loss_increase 1.0667 | l0s 7.6310 15.1614 22.8068 55.7218 108.9904 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 5000 | loss 0.0247 0.0673 0.2653 0.4270 0.8105 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0247 0.0673 0.2653 0.4270 0.8105 | ce_loss_increases 0.0099 0.1891 0.2036 0.1217 0.0141 | compound_ce_loss_increase 1.0497 | l0s 7.6042 15.0103 22.7263 55.7531 108.8672 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 5250 | loss 0.0246 0.0672 0.2650 0.4264 0.8097 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0672 0.2650 0.4264 0.8097 | ce_loss_increases 0.0093 0.1852 0.2020 0.1222 0.0148 | compound_ce_loss_increase 1.0509 | l0s 7.5453 14.9759 22.7354 55.5485 108.6608 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 5500 | loss 0.0246 0.0671 0.2648 0.4262 0.8094 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0671 0.2648 0.4262 0.8094 | ce_loss_increases 0.0094 0.1837 0.2016 0.1229 0.0146 | compound_ce_loss_increase 1.0697 | l0s 7.5389 14.8933 22.6347 55.4255 108.4745 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 5750 | loss 0.0246 0.0670 0.2648 0.4261 0.8090 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0246 0.0670 0.2648 0.4261 0.8090 | ce_loss_increases 0.0093 0.1828 0.2045 0.1233 0.0149 | compound_ce_loss_increase 1.0857 | l0s 7.4760 14.8114 22.4662 55.3199 108.4992 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 6000 | loss 0.0246 0.0670 0.2648 0.4260 0.8088 | checkpoint True True False True True | ce_loss 1.5683 | sae_losses 0.0246 0.0670 0.2648 0.4260 0.8088 | ce_loss_increases 0.0095 0.1846 0.2058 0.1227 0.0150 | compound_ce_loss_increase 1.0818 | l0s 7.4760 14.7987 22.5016 55.3805 108.5362 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 6250 | loss 0.0245 0.0670 0.2648 0.4261 0.8088 | checkpoint True True False False True | ce_loss 1.5683 | sae_losses 0.0245 0.0670 0.2648 0.4261 0.8088 | ce_loss_increases 0.0089 0.1797 0.2100 0.1226 0.0150 | compound_ce_loss_increase 1.0657 | l0s 7.4739 14.7209 22.4140 55.3752 108.6182 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 6500 | loss 0.0245 0.0669 0.2647 0.4258 0.8084 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0669 0.2647 0.4258 0.8084 | ce_loss_increases 0.0095 0.1806 0.2065 0.1222 0.0151 | compound_ce_loss_increase 1.0924 | l0s 7.4278 14.6543 22.3796 55.2960 108.6694 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 6750 | loss 0.0245 0.0668 0.2646 0.4257 0.8082 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0668 0.2646 0.4257 0.8082 | ce_loss_increases 0.0088 0.1803 0.2037 0.1218 0.0150 | compound_ce_loss_increase 1.0682 | l0s 7.4555 14.6546 22.4456 55.3179 108.4709 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 7000 | loss 0.0245 0.0668 0.2645 0.4257 0.8082 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0668 0.2645 0.4257 0.8082 | ce_loss_increases 0.0089 0.1797 0.2041 0.1220 0.0149 | compound_ce_loss_increase 1.0774 | l0s 7.3995 14.5176 22.3972 55.2673 108.3490 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 7250 | loss 0.0245 0.0667 0.2644 0.4256 0.8080 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0667 0.2644 0.4256 0.8080 | ce_loss_increases 0.0084 0.1774 0.2050 0.1220 0.0149 | compound_ce_loss_increase 1.0624 | l0s 7.3754 14.5630 22.3671 55.1606 108.3959 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259
type eval | step 7500 | loss 0.0245 0.0667 0.2643 0.4255 0.8078 | checkpoint True True True True True | ce_loss 1.5683 | sae_losses 0.0245 0.0667 0.2643 0.4255 0.8078 | ce_loss_increases 0.0085 0.1788 0.2038 0.1217 0.0146 | compound_ce_loss_increase 1.0660 | l0s 7.4011 14.4701 22.3721 55.1387 108.2986 | stream_l1s 6.7379 11.3458 11.5477 12.7100 23.1259