File size: 51,127 Bytes
a79c31e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
type eval | step 0 | loss 62.1168 38.5482 32.7109 16.2987 23.8339 | checkpoint False | ce_loss 2.7305 | sae_losses 62.1168 38.5482 32.7109 16.2987 23.8339 | ce_loss_increases 13.6339 4.8078 3.4410 2.5197 0.4850 | compound_ce_loss_increase 11.0687 | l0s 126.5022 130.4285 132.0811 124.6985 128.5097 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.0976 0.1029 0.1535 0.1265 0.1991 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0976 0.1029 0.1535 0.1265 0.1991 | ce_loss_increases 0.9425 1.0777 2.5968 2.3437 6.3653 | compound_ce_loss_increase 7.3942 | l0s 32.3041 19.5469 8.0821 6.7983 0.8884 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.0504 0.0698 0.1209 0.1059 0.1879 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0504 0.0698 0.1209 0.1059 0.1879 | ce_loss_increases 0.2568 0.4954 1.5351 1.3955 4.0608 | compound_ce_loss_increase 7.3846 | l0s 32.5219 15.3129 5.9977 5.6847 1.6278 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.0443 0.0640 0.1156 0.1027 0.1852 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0443 0.0640 0.1156 0.1027 0.1852 | ce_loss_increases 0.1325 0.4469 1.3722 1.2836 3.5578 | compound_ce_loss_increase 7.4394 | l0s 27.5626 12.8291 4.8814 4.8781 1.6247 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.0425 0.0619 0.1140 0.1019 0.1843 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0425 0.0619 0.1140 0.1019 0.1843 | ce_loss_increases 0.1250 0.4374 1.3264 1.2442 3.3534 | compound_ce_loss_increase 7.3618 | l0s 25.6762 11.5295 4.3647 4.4795 1.5408 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.0416 0.0609 0.1135 0.1015 0.1840 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0416 0.0609 0.1135 0.1015 0.1840 | ce_loss_increases 0.1196 0.4261 1.3144 1.2372 3.3324 | compound_ce_loss_increase 7.3926 | l0s 24.4143 10.8949 4.1687 4.3115 1.5073 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.0410 0.0603 0.1131 0.1013 0.1838 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0410 0.0603 0.1131 0.1013 0.1838 | ce_loss_increases 0.1199 0.4295 1.3123 1.2241 3.2959 | compound_ce_loss_increase 7.3557 | l0s 23.5010 10.4786 4.0506 4.2185 1.4877 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.0406 0.0600 0.1130 0.1011 0.1837 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0406 0.0600 0.1130 0.1011 0.1837 | ce_loss_increases 0.1155 0.4251 1.3100 1.2176 3.2816 | compound_ce_loss_increase 7.3658 | l0s 23.1034 10.1729 3.9743 4.1718 1.4905 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.0402 0.0598 0.1128 0.1011 0.1837 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0402 0.0598 0.1128 0.1011 0.1837 | ce_loss_increases 0.1156 0.4269 1.3052 1.2221 3.2495 | compound_ce_loss_increase 7.3287 | l0s 22.6448 9.9752 3.9285 4.1169 1.4746 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.0400 0.0596 0.1128 0.1010 0.1836 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0400 0.0596 0.1128 0.1010 0.1836 | ce_loss_increases 0.1145 0.4273 1.2986 1.2156 3.2534 | compound_ce_loss_increase 7.3384 | l0s 22.2548 9.8874 3.8997 4.0863 1.4748 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.0398 0.0595 0.1127 0.1010 0.1836 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0398 0.0595 0.1127 0.1010 0.1836 | ce_loss_increases 0.1132 0.4292 1.2953 1.2142 3.2785 | compound_ce_loss_increase 7.3939 | l0s 21.9071 9.7915 3.8888 4.0860 1.4736 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.0397 0.0594 0.1126 0.1009 0.1836 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0397 0.0594 0.1126 0.1009 0.1836 | ce_loss_increases 0.1130 0.4271 1.2966 1.2176 3.2556 | compound_ce_loss_increase 7.3690 | l0s 21.6792 9.7545 3.8633 4.0458 1.4745 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.0396 0.0593 0.1126 0.1009 0.1835 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0396 0.0593 0.1126 0.1009 0.1835 | ce_loss_increases 0.1138 0.4268 1.2927 1.2154 3.2783 | compound_ce_loss_increase 7.4031 | l0s 21.5254 9.6603 3.8601 4.0322 1.4683 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.0395 0.0593 0.1126 0.1009 0.1835 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0395 0.0593 0.1126 0.1009 0.1835 | ce_loss_increases 0.1125 0.4230 1.2961 1.2188 3.2590 | compound_ce_loss_increase 7.3798 | l0s 21.5168 9.6156 3.8545 4.0206 1.4718 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.0395 0.0593 0.1126 0.1009 0.1835 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0395 0.0593 0.1126 0.1009 0.1835 | ce_loss_increases 0.1131 0.4261 1.2974 1.2128 3.2615 | compound_ce_loss_increase 7.3961 | l0s 21.3164 9.5854 3.8460 4.0333 1.4747 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.0394 0.0592 0.1126 0.1009 0.1835 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0394 0.0592 0.1126 0.1009 0.1835 | ce_loss_increases 0.1131 0.4242 1.2988 1.2187 3.2306 | compound_ce_loss_increase 7.3487 | l0s 21.3501 9.5534 3.8388 4.0077 1.4743 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.0394 0.0592 0.1125 0.1008 0.1835 | checkpoint False True True True True | ce_loss 2.7305 | sae_losses 0.0394 0.0592 0.1125 0.1008 0.1835 | ce_loss_increases 0.1126 0.4206 1.2931 1.2120 3.2413 | compound_ce_loss_increase 7.3541 | l0s 21.3689 9.5213 3.8385 4.0210 1.4727 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.0394 0.0591 0.1125 0.1008 0.1835 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0394 0.0591 0.1125 0.1008 0.1835 | ce_loss_increases 0.1133 0.4202 1.2919 1.2104 3.2444 | compound_ce_loss_increase 7.3592 | l0s 21.2880 9.5207 3.8392 4.0146 1.4707 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.0394 0.0591 0.1125 0.1008 0.1835 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0394 0.0591 0.1125 0.1008 0.1835 | ce_loss_increases 0.1124 0.4227 1.2965 1.2138 3.2618 | compound_ce_loss_increase 7.3984 | l0s 21.2795 9.4830 3.8287 4.0055 1.4733 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.0393 0.0591 0.1125 0.1008 0.1835 | checkpoint True True True False False | ce_loss 2.7305 | sae_losses 0.0393 0.0591 0.1125 0.1008 0.1835 | ce_loss_increases 0.1125 0.4232 1.2934 1.2091 3.2448 | compound_ce_loss_increase 7.3566 | l0s 21.2658 9.4830 3.8251 4.0167 1.4686 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.0393 0.0591 0.1125 0.1008 0.1835 | checkpoint True True True False False | ce_loss 2.7305 | sae_losses 0.0393 0.0591 0.1125 0.1008 0.1835 | ce_loss_increases 0.1135 0.4253 1.2955 1.2099 3.2526 | compound_ce_loss_increase 7.3795 | l0s 21.1398 9.4597 3.8206 4.0128 1.4737 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 66.9152 33.4942 26.4795 16.6743 18.6534 | checkpoint False | ce_loss 2.7305 | sae_losses 66.9152 33.4942 26.4795 16.6743 18.6534 | ce_loss_increases 13.2048 4.9146 3.7548 2.4958 0.5724 | compound_ce_loss_increase 9.4137 | l0s 127.4290 126.8114 128.7220 123.8109 129.8279 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.2804 0.1630 0.1527 0.1262 0.1479 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2804 0.1630 0.1527 0.1262 0.1479 | ce_loss_increases 1.9628 1.8229 2.5734 2.3130 1.7294 | compound_ce_loss_increase 7.7162 | l0s 20.0368 14.2932 7.8862 6.6719 8.0205 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.1865 0.1146 0.1203 0.1058 0.1271 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1865 0.1146 0.1203 0.1058 0.1271 | ce_loss_increases 1.0174 0.9088 1.5520 1.3950 0.9918 | compound_ce_loss_increase 6.7608 | l0s 12.4116 9.4445 5.8227 5.6889 6.5869 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.1711 0.1065 0.1152 0.1025 0.1238 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1711 0.1065 0.1152 0.1025 0.1238 | ce_loss_increases 0.9512 0.7821 1.3442 1.2620 0.9081 | compound_ce_loss_increase 6.7964 | l0s 9.8073 7.4841 4.6591 4.7849 5.6608 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.1652 0.1039 0.1138 0.1017 0.1229 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1652 0.1039 0.1138 0.1017 0.1229 | ce_loss_increases 0.9266 0.7399 1.3031 1.2388 0.8830 | compound_ce_loss_increase 6.6203 | l0s 8.5332 6.7643 4.2466 4.3820 5.2310 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.1621 0.1029 0.1132 0.1014 0.1226 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1621 0.1029 0.1132 0.1014 0.1226 | ce_loss_increases 0.8971 0.7356 1.2786 1.2448 0.8790 | compound_ce_loss_increase 6.8698 | l0s 7.8725 6.4386 4.0380 4.2134 5.0607 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.1606 0.1024 0.1129 0.1012 0.1224 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1606 0.1024 0.1129 0.1012 0.1224 | ce_loss_increases 0.8810 0.7355 1.2634 1.2390 0.8704 | compound_ce_loss_increase 6.8387 | l0s 7.4109 6.2097 3.9608 4.1258 4.9957 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.1596 0.1021 0.1128 0.1011 0.1223 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1596 0.1021 0.1128 0.1011 0.1223 | ce_loss_increases 0.8683 0.7521 1.2589 1.2291 0.8736 | compound_ce_loss_increase 6.9167 | l0s 7.2302 6.0494 3.8978 4.0824 4.9417 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.1590 0.1018 0.1127 0.1010 0.1223 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1590 0.1018 0.1127 0.1010 0.1223 | ce_loss_increases 0.8714 0.7581 1.2711 1.2244 0.8660 | compound_ce_loss_increase 6.8720 | l0s 7.0511 5.9623 3.8312 4.0291 4.8978 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.1587 0.1017 0.1126 0.1009 0.1223 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1587 0.1017 0.1126 0.1009 0.1223 | ce_loss_increases 0.8661 0.7624 1.2665 1.2182 0.8689 | compound_ce_loss_increase 6.9136 | l0s 6.9251 5.9107 3.8052 3.9985 4.8895 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.1583 0.1015 0.1125 0.1009 0.1222 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1583 0.1015 0.1125 0.1009 0.1222 | ce_loss_increases 0.8709 0.7650 1.2679 1.2168 0.8686 | compound_ce_loss_increase 6.9736 | l0s 6.7599 5.8630 3.7801 4.0060 4.8948 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.1581 0.1014 0.1125 0.1009 0.1222 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1581 0.1014 0.1125 0.1009 0.1222 | ce_loss_increases 0.8603 0.7564 1.2647 1.2209 0.8651 | compound_ce_loss_increase 6.8967 | l0s 6.7602 5.8303 3.7468 3.9566 4.8615 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.1581 0.1014 0.1124 0.1009 0.1222 | checkpoint True True True True False | ce_loss 2.7305 | sae_losses 0.1581 0.1014 0.1124 0.1009 0.1222 | ce_loss_increases 0.8598 0.7541 1.2635 1.2179 0.8669 | compound_ce_loss_increase 6.8723 | l0s 6.6806 5.8107 3.7328 3.9475 4.8505 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.1578 0.1013 0.1124 0.1009 0.1222 | checkpoint True True False True True | ce_loss 2.7305 | sae_losses 0.1578 0.1013 0.1124 0.1009 0.1222 | ce_loss_increases 0.8580 0.7560 1.2695 1.2225 0.8612 | compound_ce_loss_increase 6.8579 | l0s 6.6533 5.7855 3.7133 3.9447 4.8459 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.1578 0.1012 0.1124 0.1009 0.1221 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.1578 0.1012 0.1124 0.1009 0.1221 | ce_loss_increases 0.8523 0.7575 1.2686 1.2177 0.8621 | compound_ce_loss_increase 6.8672 | l0s 6.6260 5.7635 3.7126 3.9477 4.8487 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.1577 0.1012 0.1124 0.1008 0.1221 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1577 0.1012 0.1124 0.1008 0.1221 | ce_loss_increases 0.8589 0.7598 1.2722 1.2188 0.8550 | compound_ce_loss_increase 6.8171 | l0s 6.5838 5.7624 3.6943 3.9342 4.8471 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.1577 0.1012 0.1123 0.1008 0.1221 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1577 0.1012 0.1123 0.1008 0.1221 | ce_loss_increases 0.8520 0.7570 1.2643 1.2169 0.8570 | compound_ce_loss_increase 6.8115 | l0s 6.6477 5.7549 3.7006 3.9389 4.8362 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.1577 0.1012 0.1123 0.1008 0.1221 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1577 0.1012 0.1123 0.1008 0.1221 | ce_loss_increases 0.8538 0.7558 1.2662 1.2145 0.8560 | compound_ce_loss_increase 6.7904 | l0s 6.5565 5.7537 3.6962 3.9375 4.8354 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.1576 0.1012 0.1123 0.1008 0.1221 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1576 0.1012 0.1123 0.1008 0.1221 | ce_loss_increases 0.8506 0.7604 1.2711 1.2178 0.8616 | compound_ce_loss_increase 6.8377 | l0s 6.5711 5.7536 3.6839 3.9281 4.8309 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.1576 0.1011 0.1123 0.1008 0.1221 | checkpoint False True False False False | ce_loss 2.7305 | sae_losses 0.1576 0.1011 0.1123 0.1008 0.1221 | ce_loss_increases 0.8500 0.7600 1.2688 1.2154 0.8585 | compound_ce_loss_increase 6.8023 | l0s 6.5627 5.7389 3.6860 3.9306 4.8321 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.1575 0.1011 0.1123 0.1008 0.1221 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.1575 0.1011 0.1123 0.1008 0.1221 | ce_loss_increases 0.8521 0.7618 1.2702 1.2140 0.8612 | compound_ce_loss_increase 6.8242 | l0s 6.5298 5.7303 3.6797 3.9339 4.8407 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 217.0749 133.9181 84.7643 65.7416 65.9811 | checkpoint False | ce_loss 2.7305 | sae_losses 217.0749 133.9181 84.7643 65.7416 65.9811 | ce_loss_increases 14.6022 5.6782 3.5488 2.4542 0.2608 | compound_ce_loss_increase 11.3118 | l0s 251.6037 254.1876 249.7975 260.2430 258.3506 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.2878 0.1750 0.1556 0.1295 0.1481 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2878 0.1750 0.1556 0.1295 0.1481 | ce_loss_increases 1.7752 1.9067 2.5840 2.4058 1.6351 | compound_ce_loss_increase 7.2510 | l0s 22.5019 15.2678 8.6838 7.7088 8.7541 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.1716 0.1093 0.1177 0.1041 0.1242 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1716 0.1093 0.1177 0.1041 0.1242 | ce_loss_increases 0.8736 0.8163 1.3476 1.2489 0.8938 | compound_ce_loss_increase 7.2575 | l0s 13.7032 9.8405 6.7115 6.5038 8.1346 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.1553 0.0997 0.1118 0.1003 0.1203 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1553 0.0997 0.1118 0.1003 0.1203 | ce_loss_increases 0.7452 0.6947 1.1749 1.1083 0.7912 | compound_ce_loss_increase 7.0751 | l0s 10.3311 8.0425 5.6991 5.7201 7.3129 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.1498 0.0966 0.1099 0.0991 0.1190 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1498 0.0966 0.1099 0.0991 0.1190 | ce_loss_increases 0.6984 0.6138 1.1302 1.0606 0.7539 | compound_ce_loss_increase 7.0669 | l0s 8.9906 7.1512 5.1559 5.2188 6.7310 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.1471 0.0951 0.1089 0.0985 0.1184 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1471 0.0951 0.1089 0.0985 0.1184 | ce_loss_increases 0.6834 0.6096 1.1350 1.0475 0.7373 | compound_ce_loss_increase 7.0362 | l0s 8.2262 6.4852 4.7994 4.9698 6.4235 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.1458 0.0942 0.1084 0.0982 0.1181 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1458 0.0942 0.1084 0.0982 0.1181 | ce_loss_increases 0.6743 0.5788 1.1246 1.0355 0.7329 | compound_ce_loss_increase 7.1809 | l0s 7.8444 6.1932 4.6230 4.8385 6.2194 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.1450 0.0937 0.1081 0.0979 0.1179 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1450 0.0937 0.1081 0.0979 0.1179 | ce_loss_increases 0.6672 0.5681 1.0965 1.0226 0.7210 | compound_ce_loss_increase 7.0754 | l0s 7.5208 5.9453 4.5342 4.7276 6.0683 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.1443 0.0933 0.1079 0.0977 0.1178 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1443 0.0933 0.1079 0.0977 0.1178 | ce_loss_increases 0.6705 0.5585 1.1108 1.0202 0.7172 | compound_ce_loss_increase 6.9490 | l0s 7.2990 5.8250 4.4249 4.6440 5.9563 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.1440 0.0930 0.1077 0.0976 0.1177 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1440 0.0930 0.1077 0.0976 0.1177 | ce_loss_increases 0.6719 0.5592 1.1002 1.0160 0.7204 | compound_ce_loss_increase 7.2182 | l0s 7.1619 5.7361 4.3753 4.6062 5.8781 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.1435 0.0928 0.1075 0.0975 0.1176 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1435 0.0928 0.1075 0.0975 0.1176 | ce_loss_increases 0.6727 0.5640 1.0968 1.0176 0.7186 | compound_ce_loss_increase 7.0815 | l0s 7.0032 5.6546 4.3245 4.5460 5.8050 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.1433 0.0927 0.1074 0.0975 0.1175 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1433 0.0927 0.1074 0.0975 0.1175 | ce_loss_increases 0.6677 0.5617 1.0977 1.0186 0.7141 | compound_ce_loss_increase 7.0705 | l0s 6.9426 5.6079 4.2750 4.5103 5.8034 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.1433 0.0925 0.1074 0.0974 0.1175 | checkpoint False True True True True | ce_loss 2.7305 | sae_losses 0.1433 0.0925 0.1074 0.0974 0.1175 | ce_loss_increases 0.6656 0.5727 1.0942 1.0094 0.7123 | compound_ce_loss_increase 6.9809 | l0s 6.8317 5.5753 4.2554 4.5177 5.7522 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.1431 0.0925 0.1073 0.0974 0.1175 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1431 0.0925 0.1073 0.0974 0.1175 | ce_loss_increases 0.6694 0.5671 1.0873 1.0081 0.7138 | compound_ce_loss_increase 7.0122 | l0s 6.7233 5.5430 4.2379 4.4940 5.7304 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.1428 0.0924 0.1072 0.0974 0.1174 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1428 0.0924 0.1072 0.0974 0.1174 | ce_loss_increases 0.6625 0.5712 1.0841 1.0136 0.7139 | compound_ce_loss_increase 6.9587 | l0s 6.7020 5.5053 4.2323 4.4818 5.7102 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.1428 0.0923 0.1072 0.0973 0.1174 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1428 0.0923 0.1072 0.0973 0.1174 | ce_loss_increases 0.6631 0.5700 1.0859 1.0158 0.7088 | compound_ce_loss_increase 6.9256 | l0s 6.6508 5.4996 4.2133 4.4553 5.6942 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.1428 0.0923 0.1072 0.0973 0.1174 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1428 0.0923 0.1072 0.0973 0.1174 | ce_loss_increases 0.6582 0.5752 1.0874 1.0116 0.7106 | compound_ce_loss_increase 6.8789 | l0s 6.6271 5.4772 4.1988 4.4602 5.6748 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.1428 0.0922 0.1071 0.0973 0.1174 | checkpoint False True True True True | ce_loss 2.7305 | sae_losses 0.1428 0.0922 0.1071 0.0973 0.1174 | ce_loss_increases 0.6597 0.5722 1.0824 1.0057 0.7125 | compound_ce_loss_increase 6.9523 | l0s 6.5979 5.4733 4.1992 4.4758 5.6949 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.1427 0.0922 0.1071 0.0973 0.1174 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1427 0.0922 0.1071 0.0973 0.1174 | ce_loss_increases 0.6576 0.5748 1.0876 1.0074 0.7102 | compound_ce_loss_increase 6.9241 | l0s 6.5901 5.4573 4.1765 4.4566 5.6876 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.1426 0.0922 0.1071 0.0973 0.1174 | checkpoint True True True True False | ce_loss 2.7305 | sae_losses 0.1426 0.0922 0.1071 0.0973 0.1174 | ce_loss_increases 0.6594 0.5710 1.0879 1.0077 0.7079 | compound_ce_loss_increase 6.8838 | l0s 6.5463 5.4541 4.1734 4.4478 5.6742 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.1426 0.0922 0.1071 0.0973 0.1174 | checkpoint False True True True True | ce_loss 2.7305 | sae_losses 0.1426 0.0922 0.1071 0.0973 0.1174 | ce_loss_increases 0.6584 0.5763 1.0851 1.0094 0.7111 | compound_ce_loss_increase 6.9385 | l0s 6.5548 5.4330 4.1765 4.4545 5.6792 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 263.4649 165.1268 115.5145 77.1456 67.5597 | checkpoint False | ce_loss 2.7305 | sae_losses 263.4649 165.1268 115.5145 77.1456 67.5597 | ce_loss_increases 14.3452 5.1352 3.6494 2.6231 0.3475 | compound_ce_loss_increase 11.4295 | l0s 256.9329 261.0746 265.2831 254.1063 251.9931 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.3948 0.3291 0.2209 0.1679 0.1925 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.3948 0.3291 0.2209 0.1679 0.1925 | ce_loss_increases 2.3141 4.6114 5.0827 5.6332 4.5915 | compound_ce_loss_increase 6.9377 | l0s 18.3972 5.8177 1.4787 0.6369 2.5792 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.2491 0.2517 0.1960 0.1588 0.1720 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2491 0.2517 0.1960 0.1588 0.1720 | ce_loss_increases 1.3340 2.6353 3.7622 4.4979 2.4034 | compound_ce_loss_increase 7.4810 | l0s 9.9013 4.1823 2.0910 1.3741 3.2784 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.2289 0.2402 0.1914 0.1565 0.1686 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2289 0.2402 0.1914 0.1565 0.1686 | ce_loss_increases 1.1594 2.3567 3.4387 4.1572 2.0690 | compound_ce_loss_increase 7.4601 | l0s 7.2982 3.3578 1.8534 1.4147 2.9780 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.2230 0.2364 0.1902 0.1559 0.1675 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2230 0.2364 0.1902 0.1559 0.1675 | ce_loss_increases 1.1209 2.2832 3.3411 4.0981 1.9336 | compound_ce_loss_increase 7.3901 | l0s 6.3587 3.0018 1.7482 1.3543 2.7505 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.2202 0.2347 0.1896 0.1557 0.1671 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2202 0.2347 0.1896 0.1557 0.1671 | ce_loss_increases 1.0986 2.2781 3.2989 4.1061 1.8902 | compound_ce_loss_increase 7.4188 | l0s 5.7783 2.8146 1.6990 1.3446 2.6485 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.2186 0.2338 0.1892 0.1555 0.1669 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2186 0.2338 0.1892 0.1555 0.1669 | ce_loss_increases 1.0694 2.2246 3.2769 4.0663 1.8950 | compound_ce_loss_increase 7.5581 | l0s 5.4925 2.7274 1.6666 1.3474 2.5990 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.2173 0.2333 0.1889 0.1554 0.1667 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2173 0.2333 0.1889 0.1554 0.1667 | ce_loss_increases 1.0707 2.2039 3.2298 4.0152 1.8515 | compound_ce_loss_increase 7.4483 | l0s 5.2419 2.6551 1.6385 1.3464 2.5465 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.2159 0.2329 0.1887 0.1553 0.1667 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2159 0.2329 0.1887 0.1553 0.1667 | ce_loss_increases 1.0461 2.2019 3.2239 3.9884 1.8376 | compound_ce_loss_increase 7.3885 | l0s 5.0655 2.6094 1.6175 1.3412 2.5087 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.2152 0.2326 0.1886 0.1552 0.1666 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2152 0.2326 0.1886 0.1552 0.1666 | ce_loss_increases 1.0414 2.2190 3.2385 4.0051 1.8528 | compound_ce_loss_increase 7.4748 | l0s 4.9608 2.5686 1.6124 1.3425 2.4927 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.2146 0.2325 0.1885 0.1552 0.1666 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2146 0.2325 0.1885 0.1552 0.1666 | ce_loss_increases 1.0426 2.2212 3.2347 4.0195 1.8499 | compound_ce_loss_increase 7.4506 | l0s 4.8939 2.5480 1.6032 1.3433 2.4756 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.2142 0.2322 0.1884 0.1551 0.1665 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2142 0.2322 0.1884 0.1551 0.1665 | ce_loss_increases 1.0330 2.2153 3.2293 3.9931 1.8476 | compound_ce_loss_increase 7.4449 | l0s 4.8319 2.5295 1.6004 1.3462 2.4582 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.2141 0.2321 0.1884 0.1551 0.1665 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2141 0.2321 0.1884 0.1551 0.1665 | ce_loss_increases 1.0230 2.2129 3.2289 4.0050 1.8319 | compound_ce_loss_increase 7.4335 | l0s 4.7789 2.5160 1.6048 1.3485 2.4520 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.2138 0.2320 0.1883 0.1551 0.1665 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2138 0.2320 0.1883 0.1551 0.1665 | ce_loss_increases 1.0254 2.1953 3.2162 3.9982 1.8419 | compound_ce_loss_increase 7.4614 | l0s 4.7259 2.5057 1.6035 1.3508 2.4493 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.2136 0.2319 0.1882 0.1551 0.1664 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2136 0.2319 0.1882 0.1551 0.1664 | ce_loss_increases 1.0232 2.2092 3.2196 3.9811 1.8303 | compound_ce_loss_increase 7.4233 | l0s 4.7036 2.4918 1.6071 1.3576 2.4442 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.2136 0.2318 0.1882 0.1550 0.1664 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2136 0.2318 0.1882 0.1550 0.1664 | ce_loss_increases 1.0217 2.2007 3.2149 3.9803 1.8254 | compound_ce_loss_increase 7.4245 | l0s 4.7003 2.4876 1.6005 1.3527 2.4318 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.2135 0.2318 0.1882 0.1550 0.1664 | checkpoint True True True True False | ce_loss 2.7305 | sae_losses 0.2135 0.2318 0.1882 0.1550 0.1664 | ce_loss_increases 1.0203 2.2034 3.2106 3.9924 1.8233 | compound_ce_loss_increase 7.4014 | l0s 4.6617 2.4762 1.5992 1.3568 2.4272 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.2135 0.2317 0.1882 0.1550 0.1664 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2135 0.2317 0.1882 0.1550 0.1664 | ce_loss_increases 1.0200 2.1908 3.2215 3.9936 1.8321 | compound_ce_loss_increase 7.4514 | l0s 4.6718 2.4765 1.5965 1.3583 2.4392 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.2134 0.2317 0.1881 0.1550 0.1664 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2134 0.2317 0.1881 0.1550 0.1664 | ce_loss_increases 1.0242 2.1997 3.2088 3.9763 1.8304 | compound_ce_loss_increase 7.4451 | l0s 4.6689 2.4651 1.5947 1.3562 2.4301 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.2134 0.2316 0.1881 0.1550 0.1664 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2134 0.2316 0.1881 0.1550 0.1664 | ce_loss_increases 1.0200 2.1879 3.2081 3.9688 1.8213 | compound_ce_loss_increase 7.4252 | l0s 4.6524 2.4669 1.5942 1.3542 2.4242 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.2134 0.2316 0.1881 0.1550 0.1664 | checkpoint True True False True True | ce_loss 2.7305 | sae_losses 0.2134 0.2316 0.1881 0.1550 0.1664 | ce_loss_increases 1.0247 2.1943 3.2218 3.9952 1.8311 | compound_ce_loss_increase 7.4506 | l0s 4.6467 2.4590 1.5956 1.3551 2.4286 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 228.0831 151.7672 101.7318 65.4508 59.5769 | checkpoint False | ce_loss 2.7305 | sae_losses 228.0831 151.7672 101.7318 65.4508 59.5769 | ce_loss_increases 15.1737 5.6176 3.8103 2.4554 0.3263 | compound_ce_loss_increase 13.1763 | l0s 250.7765 262.3879 262.0721 260.5365 253.3260 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.2006 0.1266 0.1046 0.0885 0.1042 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.2006 0.1266 0.1046 0.0885 0.1042 | ce_loss_increases 1.3214 1.1681 1.4280 1.1088 0.6645 | compound_ce_loss_increase 6.3830 | l0s 27.5051 20.5368 14.1571 13.6512 15.6874 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.1043 0.0667 0.0744 0.0691 0.0841 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1043 0.0667 0.0744 0.0691 0.0841 | ce_loss_increases 0.4811 0.4678 0.7304 0.6437 0.4012 | compound_ce_loss_increase 6.8437 | l0s 20.7711 15.4090 11.3696 12.6329 17.1463 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.0916 0.0592 0.0696 0.0658 0.0809 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0916 0.0592 0.0696 0.0658 0.0809 | ce_loss_increases 0.3851 0.3842 0.6208 0.5575 0.3530 | compound_ce_loss_increase 6.4559 | l0s 16.2395 12.8906 9.9856 11.9131 16.7249 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.0873 0.0568 0.0680 0.0646 0.0798 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0873 0.0568 0.0680 0.0646 0.0798 | ce_loss_increases 0.3689 0.3561 0.5955 0.5365 0.3418 | compound_ce_loss_increase 6.2818 | l0s 14.1681 11.6093 9.3029 11.3358 16.0011 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.0852 0.0555 0.0672 0.0640 0.0793 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0852 0.0555 0.0672 0.0640 0.0793 | ce_loss_increases 0.3696 0.3270 0.5892 0.5309 0.3402 | compound_ce_loss_increase 6.1845 | l0s 12.9116 10.8693 8.8363 10.8791 15.4473 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.0839 0.0548 0.0667 0.0637 0.0790 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0839 0.0548 0.0667 0.0637 0.0790 | ce_loss_increases 0.3700 0.3225 0.5748 0.5263 0.3356 | compound_ce_loss_increase 5.9956 | l0s 12.3710 10.4399 8.5786 10.6410 15.1459 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.0829 0.0543 0.0664 0.0635 0.0788 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0829 0.0543 0.0664 0.0635 0.0788 | ce_loss_increases 0.3675 0.3078 0.5599 0.5164 0.3304 | compound_ce_loss_increase 5.9405 | l0s 11.9054 10.1585 8.4260 10.4111 14.8904 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.0822 0.0539 0.0662 0.0633 0.0786 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0822 0.0539 0.0662 0.0633 0.0786 | ce_loss_increases 0.3682 0.3031 0.5641 0.5186 0.3290 | compound_ce_loss_increase 5.9497 | l0s 11.5908 9.9693 8.2726 10.2526 14.6543 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.0818 0.0537 0.0660 0.0632 0.0785 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0818 0.0537 0.0660 0.0632 0.0785 | ce_loss_increases 0.3712 0.3040 0.5615 0.5102 0.3319 | compound_ce_loss_increase 6.0919 | l0s 11.2982 9.7756 8.1782 10.2105 14.5247 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.0814 0.0535 0.0659 0.0631 0.0784 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0814 0.0535 0.0659 0.0631 0.0784 | ce_loss_increases 0.3715 0.3044 0.5616 0.5157 0.3301 | compound_ce_loss_increase 6.0922 | l0s 10.9955 9.6578 8.0721 10.0574 14.3246 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.0811 0.0533 0.0659 0.0631 0.0783 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0811 0.0533 0.0659 0.0631 0.0783 | ce_loss_increases 0.3675 0.3070 0.5609 0.5101 0.3268 | compound_ce_loss_increase 6.1650 | l0s 10.8388 9.5277 7.9950 10.0474 14.3193 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.0809 0.0532 0.0658 0.0631 0.0783 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0809 0.0532 0.0658 0.0631 0.0783 | ce_loss_increases 0.3645 0.3041 0.5579 0.5121 0.3279 | compound_ce_loss_increase 6.1266 | l0s 10.7384 9.4567 7.9844 10.0067 14.2022 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.0807 0.0532 0.0658 0.0630 0.0783 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0807 0.0532 0.0658 0.0630 0.0783 | ce_loss_increases 0.3666 0.3051 0.5585 0.5084 0.3285 | compound_ce_loss_increase 6.1916 | l0s 10.4893 9.3989 7.9178 9.9738 14.1134 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.0806 0.0531 0.0657 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0806 0.0531 0.0657 0.0630 0.0782 | ce_loss_increases 0.3631 0.3016 0.5585 0.5094 0.3295 | compound_ce_loss_increase 6.2096 | l0s 10.4911 9.3348 7.8809 9.9223 14.0264 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.0804 0.0530 0.0657 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0804 0.0530 0.0657 0.0630 0.0782 | ce_loss_increases 0.3644 0.3040 0.5568 0.5113 0.3273 | compound_ce_loss_increase 6.1797 | l0s 10.3669 9.3380 7.8633 9.8749 14.0105 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.0804 0.0530 0.0657 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0804 0.0530 0.0657 0.0630 0.0782 | ce_loss_increases 0.3640 0.3065 0.5587 0.5099 0.3284 | compound_ce_loss_increase 6.1848 | l0s 10.3247 9.2994 7.8337 9.8558 13.9896 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.0803 0.0530 0.0657 0.0630 0.0782 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0803 0.0530 0.0657 0.0630 0.0782 | ce_loss_increases 0.3649 0.3064 0.5589 0.5063 0.3287 | compound_ce_loss_increase 6.2226 | l0s 10.2953 9.2847 7.8302 9.8859 14.0009 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.0803 0.0529 0.0656 0.0630 0.0782 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0803 0.0529 0.0656 0.0630 0.0782 | ce_loss_increases 0.3646 0.3049 0.5590 0.5067 0.3280 | compound_ce_loss_increase 6.2095 | l0s 10.2967 9.2850 7.8124 9.8427 13.9828 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.0802 0.0529 0.0656 0.0630 0.0781 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0802 0.0529 0.0656 0.0630 0.0781 | ce_loss_increases 0.3654 0.3063 0.5609 0.5073 0.3266 | compound_ce_loss_increase 6.1940 | l0s 10.2605 9.2456 7.7938 9.8366 13.9347 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.0802 0.0529 0.0656 0.0630 0.0781 | checkpoint True True True False False | ce_loss 2.7305 | sae_losses 0.0802 0.0529 0.0656 0.0630 0.0781 | ce_loss_increases 0.3670 0.3075 0.5580 0.5048 0.3279 | compound_ce_loss_increase 6.2255 | l0s 10.2341 9.2333 7.8070 9.8524 13.9733 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 987.3041 524.5605 337.6251 245.5708 230.6556 | checkpoint False | ce_loss 2.7305 | sae_losses 987.3041 524.5605 337.6251 245.5708 230.6556 | ce_loss_increases 14.7336 5.2044 3.5371 2.4544 0.1771 | compound_ce_loss_increase 13.5463 | l0s 522.5518 514.7168 503.2405 509.2917 504.4014 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.1972 0.1396 0.1151 0.1012 0.1138 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1972 0.1396 0.1151 0.1012 0.1138 | ce_loss_increases 1.1611 1.1100 1.4332 1.2219 0.6630 | compound_ce_loss_increase 7.0352 | l0s 29.7715 21.1023 15.4014 14.3351 16.4334 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.0966 0.0649 0.0737 0.0696 0.0838 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0966 0.0649 0.0737 0.0696 0.0838 | ce_loss_increases 0.4124 0.4584 0.7189 0.6472 0.4036 | compound_ce_loss_increase 6.0999 | l0s 20.9468 15.4493 11.7687 13.0468 17.5238 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.0835 0.0559 0.0677 0.0653 0.0796 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0835 0.0559 0.0677 0.0653 0.0796 | ce_loss_increases 0.3161 0.3527 0.5919 0.5314 0.3422 | compound_ce_loss_increase 5.9040 | l0s 15.7637 12.6997 10.4810 12.6682 17.9152 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.0790 0.0530 0.0658 0.0638 0.0781 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0790 0.0530 0.0658 0.0638 0.0781 | ce_loss_increases 0.2998 0.3038 0.5386 0.4943 0.3254 | compound_ce_loss_increase 5.7929 | l0s 13.7214 11.4431 9.8056 12.3201 17.6313 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.0767 0.0516 0.0648 0.0630 0.0774 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0767 0.0516 0.0648 0.0630 0.0774 | ce_loss_increases 0.3089 0.3005 0.5312 0.4809 0.3200 | compound_ce_loss_increase 5.9202 | l0s 12.4707 10.5642 9.2647 11.9764 17.2184 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.0755 0.0508 0.0642 0.0627 0.0770 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0755 0.0508 0.0642 0.0627 0.0770 | ce_loss_increases 0.2961 0.2934 0.5204 0.4739 0.3158 | compound_ce_loss_increase 6.0876 | l0s 11.8025 10.0521 8.9742 11.7736 16.9861 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.0746 0.0503 0.0638 0.0624 0.0767 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0746 0.0503 0.0638 0.0624 0.0767 | ce_loss_increases 0.2933 0.2842 0.4986 0.4606 0.3109 | compound_ce_loss_increase 6.0643 | l0s 11.2739 9.6912 8.8505 11.6013 16.7478 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.0739 0.0498 0.0635 0.0621 0.0765 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0739 0.0498 0.0635 0.0621 0.0765 | ce_loss_increases 0.2833 0.2776 0.4990 0.4610 0.3072 | compound_ce_loss_increase 6.0324 | l0s 11.0272 9.4591 8.6675 11.4740 16.5603 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.0735 0.0495 0.0633 0.0619 0.0764 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0735 0.0495 0.0633 0.0619 0.0764 | ce_loss_increases 0.2818 0.2716 0.4927 0.4488 0.3115 | compound_ce_loss_increase 6.1538 | l0s 10.6639 9.2630 8.5550 11.4843 16.3902 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.0730 0.0493 0.0632 0.0617 0.0762 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0730 0.0493 0.0632 0.0617 0.0762 | ce_loss_increases 0.2805 0.2770 0.5012 0.4559 0.3091 | compound_ce_loss_increase 6.1324 | l0s 10.3822 9.0572 8.3856 11.2489 16.1901 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.0727 0.0491 0.0630 0.0616 0.0761 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0727 0.0491 0.0630 0.0616 0.0761 | ce_loss_increases 0.2714 0.2675 0.4957 0.4524 0.3047 | compound_ce_loss_increase 6.1868 | l0s 10.3068 8.9261 8.3173 11.2356 16.2338 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.0726 0.0489 0.0629 0.0615 0.0761 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0726 0.0489 0.0629 0.0615 0.0761 | ce_loss_increases 0.2772 0.2625 0.4912 0.4523 0.3058 | compound_ce_loss_increase 6.1439 | l0s 10.1582 8.8404 8.3173 11.2087 16.1265 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.0724 0.0488 0.0629 0.0615 0.0761 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0724 0.0488 0.0629 0.0615 0.0761 | ce_loss_increases 0.2754 0.2600 0.4926 0.4478 0.3061 | compound_ce_loss_increase 6.2101 | l0s 9.9763 8.7575 8.2296 11.1575 16.0378 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.0723 0.0488 0.0628 0.0614 0.0760 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0723 0.0488 0.0628 0.0614 0.0760 | ce_loss_increases 0.2719 0.2580 0.4897 0.4493 0.3071 | compound_ce_loss_increase 6.2306 | l0s 9.9122 8.6689 8.2061 11.1092 15.9663 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.0722 0.0487 0.0628 0.0614 0.0760 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0722 0.0487 0.0628 0.0614 0.0760 | ce_loss_increases 0.2735 0.2576 0.4878 0.4506 0.3045 | compound_ce_loss_increase 6.2379 | l0s 9.8453 8.6378 8.1789 11.0515 15.9512 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.0721 0.0487 0.0627 0.0613 0.0760 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0721 0.0487 0.0627 0.0613 0.0760 | ce_loss_increases 0.2728 0.2576 0.4893 0.4474 0.3055 | compound_ce_loss_increase 6.2510 | l0s 9.8062 8.6063 8.1541 11.0512 15.8944 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.0721 0.0486 0.0627 0.0613 0.0759 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0721 0.0486 0.0627 0.0613 0.0759 | ce_loss_increases 0.2714 0.2580 0.4873 0.4456 0.3057 | compound_ce_loss_increase 6.3179 | l0s 9.7692 8.5767 8.1503 11.0892 15.9306 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.0720 0.0486 0.0627 0.0613 0.0759 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0720 0.0486 0.0627 0.0613 0.0759 | ce_loss_increases 0.2696 0.2574 0.4886 0.4450 0.3048 | compound_ce_loss_increase 6.2795 | l0s 9.7471 8.5406 8.1320 11.0344 15.9094 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.0720 0.0486 0.0627 0.0613 0.0759 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0720 0.0486 0.0627 0.0613 0.0759 | ce_loss_increases 0.2706 0.2559 0.4882 0.4456 0.3033 | compound_ce_loss_increase 6.2783 | l0s 9.7352 8.5310 8.1163 11.0260 15.8737 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.0720 0.0486 0.0626 0.0613 0.0759 | checkpoint True True True True False | ce_loss 2.7305 | sae_losses 0.0720 0.0486 0.0626 0.0613 0.0759 | ce_loss_increases 0.2727 0.2579 0.4847 0.4434 0.3050 | compound_ce_loss_increase 6.3318 | l0s 9.6803 8.5014 8.1259 11.0453 15.9217 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 0 | loss 227.3096 125.0853 84.5692 65.1142 67.1791 | checkpoint False | ce_loss 2.7305 | sae_losses 227.3096 125.0853 84.5692 65.1142 67.1791 | ce_loss_increases 14.3188 5.3126 3.5227 2.4438 0.3794 | compound_ce_loss_increase 12.2888 | l0s 256.3822 252.1598 257.6640 252.1761 258.2471 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 250 | loss 0.1946 0.1165 0.1058 0.0912 0.1051 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1946 0.1165 0.1058 0.0912 0.1051 | ce_loss_increases 1.3221 1.1455 1.3966 1.1405 0.7271 | compound_ce_loss_increase 6.4669 | l0s 28.2331 20.1459 14.4353 13.4622 15.7307 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 500 | loss 0.1033 0.0671 0.0744 0.0693 0.0842 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.1033 0.0671 0.0744 0.0693 0.0842 | ce_loss_increases 0.5019 0.4648 0.7391 0.6445 0.4026 | compound_ce_loss_increase 6.7294 | l0s 20.9785 15.4434 11.4092 12.7625 17.4459 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 750 | loss 0.0911 0.0592 0.0695 0.0659 0.0809 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0911 0.0592 0.0695 0.0659 0.0809 | ce_loss_increases 0.3855 0.3779 0.6303 0.5564 0.3562 | compound_ce_loss_increase 6.0386 | l0s 16.4252 12.9995 10.0678 12.0255 16.8596 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1000 | loss 0.0873 0.0566 0.0679 0.0647 0.0798 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0873 0.0566 0.0679 0.0647 0.0798 | ce_loss_increases 0.3577 0.3515 0.5893 0.5349 0.3448 | compound_ce_loss_increase 6.0544 | l0s 14.4741 11.8798 9.3197 11.4210 16.0594 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1250 | loss 0.0853 0.0553 0.0672 0.0641 0.0792 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0853 0.0553 0.0672 0.0641 0.0792 | ce_loss_increases 0.3470 0.3268 0.5839 0.5280 0.3418 | compound_ce_loss_increase 6.0430 | l0s 13.4529 11.0981 8.7687 10.9694 15.4237 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1500 | loss 0.0839 0.0546 0.0667 0.0638 0.0790 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0839 0.0546 0.0667 0.0638 0.0790 | ce_loss_increases 0.3545 0.3203 0.5773 0.5221 0.3391 | compound_ce_loss_increase 6.0592 | l0s 12.7778 10.5788 8.5156 10.7386 15.1051 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 1750 | loss 0.0830 0.0542 0.0664 0.0636 0.0787 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0830 0.0542 0.0664 0.0636 0.0787 | ce_loss_increases 0.3419 0.3094 0.5633 0.5101 0.3342 | compound_ce_loss_increase 5.9900 | l0s 12.3649 10.2321 8.3374 10.5805 14.8649 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2000 | loss 0.0822 0.0539 0.0662 0.0634 0.0786 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0822 0.0539 0.0662 0.0634 0.0786 | ce_loss_increases 0.3442 0.3079 0.5649 0.5127 0.3326 | compound_ce_loss_increase 5.9945 | l0s 12.0825 9.9626 8.1846 10.4395 14.6080 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2250 | loss 0.0817 0.0536 0.0660 0.0633 0.0785 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0817 0.0536 0.0660 0.0633 0.0785 | ce_loss_increases 0.3458 0.3092 0.5585 0.5023 0.3349 | compound_ce_loss_increase 6.0645 | l0s 11.7261 9.7345 8.0765 10.4374 14.4803 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2500 | loss 0.0813 0.0535 0.0659 0.0632 0.0784 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0813 0.0535 0.0659 0.0632 0.0784 | ce_loss_increases 0.3526 0.3144 0.5659 0.5087 0.3342 | compound_ce_loss_increase 6.0342 | l0s 11.4629 9.5473 7.9557 10.2523 14.2952 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 2750 | loss 0.0809 0.0533 0.0658 0.0631 0.0783 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0809 0.0533 0.0658 0.0631 0.0783 | ce_loss_increases 0.3478 0.3188 0.5623 0.5037 0.3301 | compound_ce_loss_increase 6.0714 | l0s 11.2984 9.4367 7.8699 10.2199 14.3144 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3000 | loss 0.0808 0.0533 0.0657 0.0631 0.0783 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0808 0.0533 0.0657 0.0631 0.0783 | ce_loss_increases 0.3467 0.3207 0.5589 0.5054 0.3311 | compound_ce_loss_increase 5.9939 | l0s 11.1300 9.3763 7.8633 10.1942 14.1981 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3250 | loss 0.0806 0.0532 0.0657 0.0631 0.0783 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0806 0.0532 0.0657 0.0631 0.0783 | ce_loss_increases 0.3462 0.3220 0.5614 0.5026 0.3320 | compound_ce_loss_increase 6.0097 | l0s 10.9300 9.2972 7.8040 10.1428 14.1252 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3500 | loss 0.0804 0.0531 0.0656 0.0631 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0804 0.0531 0.0656 0.0631 0.0782 | ce_loss_increases 0.3419 0.3236 0.5580 0.5025 0.3329 | compound_ce_loss_increase 5.9775 | l0s 10.8840 9.2297 7.7851 10.0913 14.0759 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 3750 | loss 0.0803 0.0531 0.0656 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0803 0.0531 0.0656 0.0630 0.0782 | ce_loss_increases 0.3430 0.3211 0.5571 0.5061 0.3299 | compound_ce_loss_increase 5.9679 | l0s 10.8471 9.2199 7.7683 10.0314 14.0377 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4000 | loss 0.0803 0.0530 0.0656 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0803 0.0530 0.0656 0.0630 0.0782 | ce_loss_increases 0.3416 0.3237 0.5580 0.5034 0.3310 | compound_ce_loss_increase 5.9416 | l0s 10.8269 9.1926 7.7378 10.0352 14.0272 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4250 | loss 0.0802 0.0530 0.0655 0.0630 0.0782 | checkpoint True True True False True | ce_loss 2.7305 | sae_losses 0.0802 0.0530 0.0655 0.0630 0.0782 | ce_loss_increases 0.3424 0.3236 0.5598 0.5018 0.3312 | compound_ce_loss_increase 5.9847 | l0s 10.7967 9.1708 7.7368 10.0487 14.0572 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4500 | loss 0.0802 0.0530 0.0655 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0802 0.0530 0.0655 0.0630 0.0782 | ce_loss_increases 0.3437 0.3254 0.5597 0.5011 0.3304 | compound_ce_loss_increase 5.9399 | l0s 10.8001 9.1407 7.7135 10.0123 14.0348 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 4750 | loss 0.0801 0.0529 0.0655 0.0630 0.0782 | checkpoint True True True True True | ce_loss 2.7305 | sae_losses 0.0801 0.0529 0.0655 0.0630 0.0782 | ce_loss_increases 0.3436 0.3239 0.5609 0.5025 0.3287 | compound_ce_loss_increase 5.9415 | l0s 10.7744 9.1199 7.7017 10.0002 14.0069 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
type eval | step 5000 | loss 0.0801 0.0529 0.0655 0.0630 0.0782 | checkpoint True True True True False | ce_loss 2.7305 | sae_losses 0.0801 0.0529 0.0655 0.0630 0.0782 | ce_loss_increases 0.3456 0.3268 0.5580 0.5014 0.3306 | compound_ce_loss_increase 5.9570 | l0s 10.6992 9.0957 7.7161 10.0124 14.0342 | stream_l1s 4.4510 3.2427 2.6543 2.1799 2.1391
|