Upload folder using huggingface_hub
Browse files
qwen3_06B.blocks.12.hook_resid_post.sae.sparsity200.mse0.001.kl0.01.physics.exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 200, "dtype": "float32", "device": "cuda", "use_error_term": false, "hook_layer": "12", "hook_name": "hook_resid_post", "hook_spec": "blocks.12.hook_resid_post"}
|
qwen3_06B.blocks.12.hook_resid_post.sae.sparsity200.mse0.001.kl0.01.physics.exp4/history.json
ADDED
|
@@ -0,0 +1,461 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
95.37120020803386,
|
| 4 |
+
25.203312384187843,
|
| 5 |
+
23.404503992129815,
|
| 6 |
+
23.316297451566076,
|
| 7 |
+
22.940359946995272,
|
| 8 |
+
23.0670559761372,
|
| 9 |
+
22.838491125843692,
|
| 10 |
+
23.130171774749925,
|
| 11 |
+
22.47384882026664,
|
| 12 |
+
22.9065746096988,
|
| 13 |
+
22.374569103672417,
|
| 14 |
+
22.49056330666964,
|
| 15 |
+
22.823488721922292,
|
| 16 |
+
22.667603141383122,
|
| 17 |
+
22.280506846352093
|
| 18 |
+
],
|
| 19 |
+
"recon_loss": [
|
| 20 |
+
17.733967696432305,
|
| 21 |
+
1.287696025806739,
|
| 22 |
+
1.164006659592119,
|
| 23 |
+
1.0520702965582605,
|
| 24 |
+
1.0177635225443398,
|
| 25 |
+
1.0063369028512201,
|
| 26 |
+
0.9689001952280256,
|
| 27 |
+
0.9734559753306903,
|
| 28 |
+
0.9568076972731818,
|
| 29 |
+
0.9497803951564588,
|
| 30 |
+
0.9651574131355841,
|
| 31 |
+
0.942727035330765,
|
| 32 |
+
0.9513793967472346,
|
| 33 |
+
0.949914007544651,
|
| 34 |
+
0.9312706448196163
|
| 35 |
+
],
|
| 36 |
+
"l1_loss": [
|
| 37 |
+
0.37458431986885404,
|
| 38 |
+
0.11425890362626494,
|
| 39 |
+
0.106774271099775,
|
| 40 |
+
0.10699238857590204,
|
| 41 |
+
0.10557731177369999,
|
| 42 |
+
0.10622444285407312,
|
| 43 |
+
0.10556697633121609,
|
| 44 |
+
0.10663951857251189,
|
| 45 |
+
0.10376199847973741,
|
| 46 |
+
0.10588242213918607,
|
| 47 |
+
0.10324284239293546,
|
| 48 |
+
0.10425651792649593,
|
| 49 |
+
0.10566235766490589,
|
| 50 |
+
0.10502050888765299,
|
| 51 |
+
0.10316120198023612
|
| 52 |
+
],
|
| 53 |
+
"sparsity": [
|
| 54 |
+
82.30947092585974,
|
| 55 |
+
8.604004205274421,
|
| 56 |
+
7.334823580097912,
|
| 57 |
+
6.577015404204639,
|
| 58 |
+
6.114957301106725,
|
| 59 |
+
5.718333419733592,
|
| 60 |
+
5.370022752650774,
|
| 61 |
+
5.148180454602301,
|
| 62 |
+
4.9376493677713835,
|
| 63 |
+
4.730704898663357,
|
| 64 |
+
4.556725888343034,
|
| 65 |
+
4.43207251665173,
|
| 66 |
+
4.309558187593671,
|
| 67 |
+
4.295170889456088,
|
| 68 |
+
4.139950065837061
|
| 69 |
+
],
|
| 70 |
+
"recon_contribution": [
|
| 71 |
+
17.733967696432305,
|
| 72 |
+
1.287696025806739,
|
| 73 |
+
1.164006659592119,
|
| 74 |
+
1.0520702965582605,
|
| 75 |
+
1.0177635225443398,
|
| 76 |
+
1.0063369028512201,
|
| 77 |
+
0.9689001952280256,
|
| 78 |
+
0.9734559753306903,
|
| 79 |
+
0.9568076972731818,
|
| 80 |
+
0.9497803951564588,
|
| 81 |
+
0.9651574131355841,
|
| 82 |
+
0.942727035330765,
|
| 83 |
+
0.9513793967472346,
|
| 84 |
+
0.949914007544651,
|
| 85 |
+
0.9312706448196163
|
| 86 |
+
],
|
| 87 |
+
"l1_contribution": [
|
| 88 |
+
74.91686405767118,
|
| 89 |
+
22.851780724819136,
|
| 90 |
+
21.354854221707097,
|
| 91 |
+
21.398477707037333,
|
| 92 |
+
21.115462330928175,
|
| 93 |
+
21.244888645537355,
|
| 94 |
+
21.113395268545972,
|
| 95 |
+
21.32790372726765,
|
| 96 |
+
20.752399676857976,
|
| 97 |
+
21.1764844283545,
|
| 98 |
+
20.648568453473715,
|
| 99 |
+
20.85130361533512,
|
| 100 |
+
21.13247155288833,
|
| 101 |
+
21.00410176711814,
|
| 102 |
+
20.632240428903202
|
| 103 |
+
],
|
| 104 |
+
"dead_features": [
|
| 105 |
+
0,
|
| 106 |
+
22,
|
| 107 |
+
48,
|
| 108 |
+
74,
|
| 109 |
+
92,
|
| 110 |
+
113,
|
| 111 |
+
115,
|
| 112 |
+
151,
|
| 113 |
+
164,
|
| 114 |
+
173,
|
| 115 |
+
199,
|
| 116 |
+
225,
|
| 117 |
+
258,
|
| 118 |
+
274,
|
| 119 |
+
325
|
| 120 |
+
],
|
| 121 |
+
"dead_feature_percentage": [
|
| 122 |
+
0.0,
|
| 123 |
+
0.537109375,
|
| 124 |
+
1.171875,
|
| 125 |
+
1.806640625,
|
| 126 |
+
2.24609375,
|
| 127 |
+
2.7587890625,
|
| 128 |
+
2.8076171875,
|
| 129 |
+
3.6865234375,
|
| 130 |
+
4.00390625,
|
| 131 |
+
4.2236328125,
|
| 132 |
+
4.8583984375,
|
| 133 |
+
5.4931640625,
|
| 134 |
+
6.298828125,
|
| 135 |
+
6.689453125,
|
| 136 |
+
7.9345703125
|
| 137 |
+
],
|
| 138 |
+
"total_post_layer_mse": [
|
| 139 |
+
1111.5424853509912,
|
| 140 |
+
248.71285880666565,
|
| 141 |
+
196.22695813333868,
|
| 142 |
+
196.91515778053514,
|
| 143 |
+
180.8655782813855,
|
| 144 |
+
186.82532502615706,
|
| 145 |
+
171.53248438509422,
|
| 146 |
+
208.44581314003614,
|
| 147 |
+
182.599455973803,
|
| 148 |
+
193.3109523559204,
|
| 149 |
+
191.7345152891096,
|
| 150 |
+
154.9794025496566,
|
| 151 |
+
172.68364177592525,
|
| 152 |
+
168.31251805239802,
|
| 153 |
+
169.58007575229462
|
| 154 |
+
],
|
| 155 |
+
"block_mse_contribution": [
|
| 156 |
+
1.1115425389218598,
|
| 157 |
+
0.2487128706383524,
|
| 158 |
+
0.19622696745238077,
|
| 159 |
+
0.196915167260525,
|
| 160 |
+
0.18086558714673792,
|
| 161 |
+
0.18682533390282927,
|
| 162 |
+
0.17153249251445565,
|
| 163 |
+
0.20844582306704257,
|
| 164 |
+
0.18259946472759275,
|
| 165 |
+
0.1933109615176571,
|
| 166 |
+
0.19173452440300676,
|
| 167 |
+
0.1549794099904145,
|
| 168 |
+
0.1726836499667201,
|
| 169 |
+
0.16831252594298005,
|
| 170 |
+
0.16958008380020131
|
| 171 |
+
],
|
| 172 |
+
"13_mse": [
|
| 173 |
+
18.661081303273978,
|
| 174 |
+
2.002416369095361,
|
| 175 |
+
1.7138062014691906,
|
| 176 |
+
1.5083244294095013,
|
| 177 |
+
1.4026643563758618,
|
| 178 |
+
1.3386388503156879,
|
| 179 |
+
1.2438268249005513,
|
| 180 |
+
1.2085478361216264,
|
| 181 |
+
1.1533302654744797,
|
| 182 |
+
1.1183669866132575,
|
| 183 |
+
1.1144696561513263,
|
| 184 |
+
1.0745743109958303,
|
| 185 |
+
1.072560211712409,
|
| 186 |
+
1.061816253226922,
|
| 187 |
+
1.0388495157840938
|
| 188 |
+
],
|
| 189 |
+
"14_mse": [
|
| 190 |
+
18.302532173751583,
|
| 191 |
+
1.8986592003105736,
|
| 192 |
+
1.6234010012125781,
|
| 193 |
+
1.4314168225744404,
|
| 194 |
+
1.3319182058197654,
|
| 195 |
+
1.2755143317369972,
|
| 196 |
+
1.1884145142650284,
|
| 197 |
+
1.1625168878847914,
|
| 198 |
+
1.1153332789293997,
|
| 199 |
+
1.0918789268340936,
|
| 200 |
+
1.0969929690457139,
|
| 201 |
+
1.0663652822525478,
|
| 202 |
+
1.0708746233285475,
|
| 203 |
+
1.0668892778314374,
|
| 204 |
+
1.047598732011716
|
| 205 |
+
],
|
| 206 |
+
"15_mse": [
|
| 207 |
+
18.391610906879684,
|
| 208 |
+
1.9728379508549263,
|
| 209 |
+
1.6997918648789203,
|
| 210 |
+
1.5047503011624463,
|
| 211 |
+
1.4100395167394555,
|
| 212 |
+
1.3559201603373647,
|
| 213 |
+
1.2724171184078443,
|
| 214 |
+
1.2498970470262936,
|
| 215 |
+
1.203856077399793,
|
| 216 |
+
1.1800921793614096,
|
| 217 |
+
1.1874531667016255,
|
| 218 |
+
1.1556608069517045,
|
| 219 |
+
1.1622643295220805,
|
| 220 |
+
1.1601135399680655,
|
| 221 |
+
1.1392539218318423
|
| 222 |
+
],
|
| 223 |
+
"16_mse": [
|
| 224 |
+
19.574237594145277,
|
| 225 |
+
2.6638696299154576,
|
| 226 |
+
2.263284824329688,
|
| 227 |
+
2.0023190853710537,
|
| 228 |
+
1.8659341616966956,
|
| 229 |
+
1.791330366625898,
|
| 230 |
+
1.6798815595075771,
|
| 231 |
+
1.6547221978594449,
|
| 232 |
+
1.5991116652696957,
|
| 233 |
+
1.562635813421525,
|
| 234 |
+
1.5690893759786215,
|
| 235 |
+
1.5273816645612641,
|
| 236 |
+
1.533372419812244,
|
| 237 |
+
1.5266077040558033,
|
| 238 |
+
1.5073032017384238
|
| 239 |
+
],
|
| 240 |
+
"17_mse": [
|
| 241 |
+
20.491394418351195,
|
| 242 |
+
3.1846455187439786,
|
| 243 |
+
2.6837111732860963,
|
| 244 |
+
2.3874807469922246,
|
| 245 |
+
2.2376821709907335,
|
| 246 |
+
2.165555802998858,
|
| 247 |
+
2.0421018740631567,
|
| 248 |
+
2.0242153778588787,
|
| 249 |
+
1.963567138652652,
|
| 250 |
+
1.927553829794368,
|
| 251 |
+
1.9390520062985905,
|
| 252 |
+
1.8871012266646041,
|
| 253 |
+
1.8918677727292392,
|
| 254 |
+
1.8832454102036666,
|
| 255 |
+
1.8650007396102084
|
| 256 |
+
],
|
| 257 |
+
"18_mse": [
|
| 258 |
+
22.12528245473095,
|
| 259 |
+
4.177902003285065,
|
| 260 |
+
3.529689018296501,
|
| 261 |
+
3.157850452126207,
|
| 262 |
+
2.9565435294207907,
|
| 263 |
+
2.8584321284748007,
|
| 264 |
+
2.678900777959877,
|
| 265 |
+
2.6531224332224217,
|
| 266 |
+
2.5638079595192016,
|
| 267 |
+
2.5129647977141225,
|
| 268 |
+
2.5139857708139624,
|
| 269 |
+
2.4399853042365454,
|
| 270 |
+
2.4388263331014928,
|
| 271 |
+
2.4200054795584514,
|
| 272 |
+
2.3997637267075356
|
| 273 |
+
],
|
| 274 |
+
"19_mse": [
|
| 275 |
+
23.956308580058952,
|
| 276 |
+
5.201611179787528,
|
| 277 |
+
4.324807997646952,
|
| 278 |
+
3.8575319867385063,
|
| 279 |
+
3.6179239568357904,
|
| 280 |
+
3.517508328962166,
|
| 281 |
+
3.321478823665008,
|
| 282 |
+
3.314407896488271,
|
| 283 |
+
3.2148120435328127,
|
| 284 |
+
3.1632965924625034,
|
| 285 |
+
3.173855092197089,
|
| 286 |
+
3.0793181405489283,
|
| 287 |
+
3.083961148812015,
|
| 288 |
+
3.0620916370314952,
|
| 289 |
+
3.045943768534388
|
| 290 |
+
],
|
| 291 |
+
"20_mse": [
|
| 292 |
+
26.27528652445333,
|
| 293 |
+
6.531867835182625,
|
| 294 |
+
5.461665230395412,
|
| 295 |
+
4.909374874623332,
|
| 296 |
+
4.621029420769362,
|
| 297 |
+
4.5044462571352355,
|
| 298 |
+
4.268259471367882,
|
| 299 |
+
4.26421758602608,
|
| 300 |
+
4.151626748707634,
|
| 301 |
+
4.0874135133800955,
|
| 302 |
+
4.11113303879741,
|
| 303 |
+
3.9917887685547218,
|
| 304 |
+
3.9920658074728186,
|
| 305 |
+
3.969586313104576,
|
| 306 |
+
3.953901756250311
|
| 307 |
+
],
|
| 308 |
+
"21_mse": [
|
| 309 |
+
29.572611079904828,
|
| 310 |
+
8.39827348563866,
|
| 311 |
+
7.011669287622842,
|
| 312 |
+
6.342176180003338,
|
| 313 |
+
5.98508731995825,
|
| 314 |
+
5.883178412447051,
|
| 315 |
+
5.574055451161383,
|
| 316 |
+
5.685209044683954,
|
| 317 |
+
5.505207563434443,
|
| 318 |
+
5.437553548599277,
|
| 319 |
+
5.448056922113775,
|
| 320 |
+
5.251831044008056,
|
| 321 |
+
5.265223188870164,
|
| 322 |
+
5.234549940500121,
|
| 323 |
+
5.215668146180413
|
| 324 |
+
],
|
| 325 |
+
"22_mse": [
|
| 326 |
+
33.2700747589248,
|
| 327 |
+
10.352992856622677,
|
| 328 |
+
8.58893249870548,
|
| 329 |
+
7.737568897469447,
|
| 330 |
+
7.271923473140293,
|
| 331 |
+
7.123960362036044,
|
| 332 |
+
6.7335114997017955,
|
| 333 |
+
6.835193461026217,
|
| 334 |
+
6.624957394359627,
|
| 335 |
+
6.52961840795109,
|
| 336 |
+
6.5440516226235586,
|
| 337 |
+
6.310462373902256,
|
| 338 |
+
6.317037944964572,
|
| 339 |
+
6.280506420242292,
|
| 340 |
+
6.263656544124693
|
| 341 |
+
],
|
| 342 |
+
"23_mse": [
|
| 343 |
+
37.418139438479635,
|
| 344 |
+
12.6129090425549,
|
| 345 |
+
10.496148921607723,
|
| 346 |
+
9.540199258426268,
|
| 347 |
+
8.995596846701183,
|
| 348 |
+
8.844507082312264,
|
| 349 |
+
8.371718425900244,
|
| 350 |
+
8.52638149101283,
|
| 351 |
+
8.259419735441817,
|
| 352 |
+
8.171994307006461,
|
| 353 |
+
8.185560900847294,
|
| 354 |
+
7.875357104575914,
|
| 355 |
+
7.892840274370798,
|
| 356 |
+
7.84286354477168,
|
| 357 |
+
7.843173813045626
|
| 358 |
+
],
|
| 359 |
+
"24_mse": [
|
| 360 |
+
43.76929685757115,
|
| 361 |
+
15.829630092437164,
|
| 362 |
+
13.14439078648173,
|
| 363 |
+
11.915909244392113,
|
| 364 |
+
11.177546448296422,
|
| 365 |
+
10.975457204816589,
|
| 366 |
+
10.34098312542393,
|
| 367 |
+
10.517081979407708,
|
| 368 |
+
10.184919732682248,
|
| 369 |
+
10.073991210036155,
|
| 370 |
+
10.096826815525068,
|
| 371 |
+
9.722224641887498,
|
| 372 |
+
9.767288950229146,
|
| 373 |
+
9.709985274350123,
|
| 374 |
+
9.710594187925537
|
| 375 |
+
],
|
| 376 |
+
"25_mse": [
|
| 377 |
+
53.45814998793308,
|
| 378 |
+
19.946207450191757,
|
| 379 |
+
16.520528214509117,
|
| 380 |
+
14.965904693987975,
|
| 381 |
+
14.032096869200634,
|
| 382 |
+
13.820303389366638,
|
| 383 |
+
12.986208576104142,
|
| 384 |
+
13.282625274188307,
|
| 385 |
+
12.819915474595842,
|
| 386 |
+
12.706837190625915,
|
| 387 |
+
12.741521098445538,
|
| 388 |
+
12.245600036117024,
|
| 389 |
+
12.33595658096194,
|
| 390 |
+
12.247775551987122,
|
| 391 |
+
12.282700847271174
|
| 392 |
+
],
|
| 393 |
+
"26_mse": [
|
| 394 |
+
60.02048654972773,
|
| 395 |
+
22.25097655075261,
|
| 396 |
+
18.26236175716524,
|
| 397 |
+
16.579325396162933,
|
| 398 |
+
15.455498348291616,
|
| 399 |
+
15.318212559348659,
|
| 400 |
+
14.297038620823843,
|
| 401 |
+
14.830009205210516,
|
| 402 |
+
14.201571175125652,
|
| 403 |
+
14.127840438321616,
|
| 404 |
+
14.114407935575034,
|
| 405 |
+
13.43558377247775,
|
| 406 |
+
13.576521784834739,
|
| 407 |
+
13.439644940622976,
|
| 408 |
+
13.497160401082652
|
| 409 |
+
],
|
| 410 |
+
"27_mse": [
|
| 411 |
+
686.255992722805,
|
| 412 |
+
131.68805964129234,
|
| 413 |
+
98.90276935573121,
|
| 414 |
+
109.07502541109535,
|
| 415 |
+
98.50409365714864,
|
| 416 |
+
106.0523597892428,
|
| 417 |
+
95.53368772184194,
|
| 418 |
+
131.2376654220188,
|
| 419 |
+
108.0380197206777,
|
| 420 |
+
119.61891461379851,
|
| 421 |
+
117.898058917995,
|
| 422 |
+
83.91616807192197,
|
| 423 |
+
101.28298040520303,
|
| 424 |
+
97.4068367649433,
|
| 425 |
+
98.76950645019599
|
| 426 |
+
],
|
| 427 |
+
"logit_kl": [
|
| 428 |
+
160.88268443638373,
|
| 429 |
+
81.51228823987528,
|
| 430 |
+
68.94162952085625,
|
| 431 |
+
66.88342972378447,
|
| 432 |
+
62.6268643329018,
|
| 433 |
+
62.90049621479508,
|
| 434 |
+
58.466316821193374,
|
| 435 |
+
62.03663040195307,
|
| 436 |
+
58.20419102740315,
|
| 437 |
+
58.699884051571765,
|
| 438 |
+
56.910884758926855,
|
| 439 |
+
54.15531890416332,
|
| 440 |
+
56.695407448797454,
|
| 441 |
+
54.52748057289593,
|
| 442 |
+
54.741569292638765
|
| 443 |
+
],
|
| 444 |
+
"kl_contribution": [
|
| 445 |
+
1.6088268095408416,
|
| 446 |
+
0.8151228632298189,
|
| 447 |
+
0.6894162805356745,
|
| 448 |
+
0.6688342822959682,
|
| 449 |
+
0.6262686306491811,
|
| 450 |
+
0.6290049474990915,
|
| 451 |
+
0.5846631551810101,
|
| 452 |
+
0.6203662907122764,
|
| 453 |
+
0.5820418970048228,
|
| 454 |
+
0.5869988267125368,
|
| 455 |
+
0.5691088341919065,
|
| 456 |
+
0.5415531765974115,
|
| 457 |
+
0.5669540614390293,
|
| 458 |
+
0.5452747932506969,
|
| 459 |
+
0.5474156798734376
|
| 460 |
+
]
|
| 461 |
+
}
|
qwen3_06B.blocks.12.hook_resid_post.sae.sparsity200.mse0.001.kl0.01.physics.exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f921d3bd28378d7124fc8a6e915d50ff161099f5f010f97074e80e1bbf48caf
|
| 3 |
+
size 33577309
|