Upload Models
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- pythia14m_layer0_exp4/config.json +1 -0
- pythia14m_layer0_exp4/history.json +242 -0
- pythia14m_layer0_exp4/weights.pt +3 -0
- pythia14m_layer2_exp4/config.json +1 -0
- pythia14m_layer2_exp4/history.json +242 -0
- pythia14m_layer2_exp4/weights.pt +3 -0
- pythia14m_layer5_exp4/config.json +1 -0
- pythia14m_layer5_exp4/history.json +102 -0
- pythia14m_layer5_exp4/weights.pt +3 -0
- pythia160m_layer0_exp4/config.json +1 -0
- pythia160m_layer0_exp4/history.json +102 -0
- pythia160m_layer0_exp4/weights.pt +3 -0
- pythia160m_layer11_exp4/config.json +1 -0
- pythia160m_layer11_exp4/history.json +102 -0
- pythia160m_layer11_exp4/weights.pt +3 -0
- pythia160m_layer2_exp4/config.json +1 -0
- pythia160m_layer2_exp4/history.json +102 -0
- pythia160m_layer2_exp4/weights.pt +3 -0
- pythia160m_layer5_exp4/config.json +1 -0
- pythia160m_layer5_exp4/history.json +202 -0
- pythia160m_layer5_exp4/weights.pt +3 -0
- pythia160m_layer8_exp4/config.json +1 -0
- pythia160m_layer8_exp4/history.json +62 -0
- pythia160m_layer8_exp4/weights.pt +3 -0
- pythia31m_layer0_exp4/config.json +1 -0
- pythia31m_layer0_exp4/history.json +242 -0
- pythia31m_layer0_exp4/weights.pt +3 -0
- pythia31m_layer2_exp4/config.json +1 -0
- pythia31m_layer2_exp4/history.json +242 -0
- pythia31m_layer2_exp4/weights.pt +3 -0
- pythia31m_layer5_exp4/config.json +1 -0
- pythia31m_layer5_exp4/history.json +242 -0
- pythia31m_layer5_exp4/weights.pt +3 -0
- pythia410m_layer0_exp4/config.json +1 -0
- pythia410m_layer0_exp4/history.json +162 -0
- pythia410m_layer0_exp4/weights.pt +3 -0
- pythia410m_layer12_exp4/config.json +1 -0
- pythia410m_layer12_exp4/history.json +222 -0
- pythia410m_layer12_exp4/weights.pt +3 -0
- pythia410m_layer16_exp4/config.json +1 -0
- pythia410m_layer16_exp4/history.json +242 -0
- pythia410m_layer16_exp4/weights.pt +3 -0
- pythia410m_layer20_exp4/config.json +1 -0
- pythia410m_layer20_exp4/history.json +222 -0
- pythia410m_layer20_exp4/weights.pt +3 -0
- pythia410m_layer23_exp4/config.json +1 -0
- pythia410m_layer23_exp4/history.json +222 -0
- pythia410m_layer23_exp4/weights.pt +3 -0
- pythia410m_layer4_exp4/config.json +1 -0
- pythia410m_layer4_exp4/history.json +142 -0
pythia14m_layer0_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 128, "d_sae": 512, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "0", "hook_name": "hook_resid_post", "hook_spec": "blocks.0.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia14m_layer0_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.002350835961956504,
|
| 4 |
+
0.0009348777620043241,
|
| 5 |
+
0.0007179698496110426,
|
| 6 |
+
0.0006119242823945135,
|
| 7 |
+
0.000554729806316954,
|
| 8 |
+
0.00047719662552893826,
|
| 9 |
+
0.000514314113381934,
|
| 10 |
+
0.0004725856062888122,
|
| 11 |
+
0.00040279469793117747,
|
| 12 |
+
0.00041239336272976283
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.0004893488696288907,
|
| 16 |
+
0.0001704155632658433,
|
| 17 |
+
0.00011458642346045627,
|
| 18 |
+
8.463381353384154e-05,
|
| 19 |
+
7.192777099248197e-05,
|
| 20 |
+
5.474265194796964e-05,
|
| 21 |
+
6.11373730984543e-05,
|
| 22 |
+
4.977752262604518e-05,
|
| 23 |
+
4.123608410743381e-05,
|
| 24 |
+
4.071823093516339e-05
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.012764840436670016,
|
| 28 |
+
0.013959127097997388,
|
| 29 |
+
0.0152542766178544,
|
| 30 |
+
0.01662405031123038,
|
| 31 |
+
0.017305464441541536,
|
| 32 |
+
0.018180008804401935,
|
| 33 |
+
0.01870120782405138,
|
| 34 |
+
0.019081004576659517,
|
| 35 |
+
0.01934059735509261,
|
| 36 |
+
0.0194797490202837
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99999884382133,
|
| 40 |
+
127.99906652765569,
|
| 41 |
+
127.85375288393882,
|
| 42 |
+
127.12581307052918,
|
| 43 |
+
126.41104376597326,
|
| 44 |
+
125.64602078197223,
|
| 45 |
+
124.57536479998414,
|
| 46 |
+
123.95846863623755,
|
| 47 |
+
123.62613914752829,
|
| 48 |
+
122.99680158750115
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.0004893488696288907,
|
| 52 |
+
0.0001704155632658433,
|
| 53 |
+
0.00011458642346045627,
|
| 54 |
+
8.463381353384154e-05,
|
| 55 |
+
7.192777099248197e-05,
|
| 56 |
+
5.474265194796964e-05,
|
| 57 |
+
6.11373730984543e-05,
|
| 58 |
+
4.977752262604518e-05,
|
| 59 |
+
4.123608410743381e-05,
|
| 60 |
+
4.071823093516339e-05
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0,
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.18614871376371536,
|
| 88 |
+
0.07644622179995647,
|
| 89 |
+
0.060338344049729584,
|
| 90 |
+
0.05272904806136544,
|
| 91 |
+
0.04828020458036997,
|
| 92 |
+
0.04224539829414683,
|
| 93 |
+
0.04531767499546931,
|
| 94 |
+
0.04228080938588794,
|
| 95 |
+
0.03615586221650811,
|
| 96 |
+
0.037167514025313125
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0018614870966192395,
|
| 100 |
+
0.0007644622000523787,
|
| 101 |
+
0.0006033834267778241,
|
| 102 |
+
0.0005272904691907971,
|
| 103 |
+
0.0004828020352914595,
|
| 104 |
+
0.00042245397342911107,
|
| 105 |
+
0.0004531767398840283,
|
| 106 |
+
0.0004228080840919297,
|
| 107 |
+
0.00036155861419348383,
|
| 108 |
+
0.0003716751317797438
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.0021225624777195633,
|
| 136 |
+
0.00148126540563241,
|
| 137 |
+
0.0012875209773106535,
|
| 138 |
+
0.0009513918270647093,
|
| 139 |
+
0.0007787892757978402,
|
| 140 |
+
0.0007308362603310638,
|
| 141 |
+
0.0006435117276692484,
|
| 142 |
+
0.0006993927681857938,
|
| 143 |
+
0.0006074122727421839,
|
| 144 |
+
0.0005432340632307831
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.0003286703012402141,
|
| 148 |
+
0.0002131836933951819,
|
| 149 |
+
0.0001612513693184152,
|
| 150 |
+
0.00011700480393018065,
|
| 151 |
+
9.122331468029993e-05,
|
| 152 |
+
8.419306326683333e-05,
|
| 153 |
+
7.290565689098805e-05,
|
| 154 |
+
7.57794010392498e-05,
|
| 155 |
+
6.403275831101008e-05,
|
| 156 |
+
5.917200356142233e-05
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.014152863184726043,
|
| 160 |
+
0.015889077301016186,
|
| 161 |
+
0.01765171336187735,
|
| 162 |
+
0.019233293285750727,
|
| 163 |
+
0.02040852682341861,
|
| 164 |
+
0.020747396700781223,
|
| 165 |
+
0.02172859367761392,
|
| 166 |
+
0.021750612349353465,
|
| 167 |
+
0.022186011620725317,
|
| 168 |
+
0.022411953607958137
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99999901600906,
|
| 172 |
+
127.99962793362346,
|
| 173 |
+
127.97277647013236,
|
| 174 |
+
127.87401864920606,
|
| 175 |
+
127.84238952858463,
|
| 176 |
+
127.67606054404776,
|
| 177 |
+
127.59440749304387,
|
| 178 |
+
127.40734653316986,
|
| 179 |
+
127.17185840381481,
|
| 180 |
+
127.06388444060309
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.0003286703012402141,
|
| 184 |
+
0.0002131836933951819,
|
| 185 |
+
0.0001612513693184152,
|
| 186 |
+
0.00011700480393018065,
|
| 187 |
+
9.122331468029993e-05,
|
| 188 |
+
8.419306326683333e-05,
|
| 189 |
+
7.290565689098805e-05,
|
| 190 |
+
7.57794010392498e-05,
|
| 191 |
+
6.403275831101008e-05,
|
| 192 |
+
5.917200356142233e-05
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.0,
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
0.0,
|
| 212 |
+
0.0,
|
| 213 |
+
0.0,
|
| 214 |
+
0.0,
|
| 215 |
+
0.0,
|
| 216 |
+
0.0
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.17938922173077143,
|
| 220 |
+
0.12680817412704798,
|
| 221 |
+
0.11262696319945718,
|
| 222 |
+
0.08343870436381903,
|
| 223 |
+
0.06875659769179332,
|
| 224 |
+
0.06466432112682742,
|
| 225 |
+
0.057060608445758935,
|
| 226 |
+
0.06236133815130247,
|
| 227 |
+
0.05433795269541476,
|
| 228 |
+
0.04840620696632619
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.0017938921780521718,
|
| 232 |
+
0.0012680817127725165,
|
| 233 |
+
0.0011262696085275268,
|
| 234 |
+
0.0008343870243604052,
|
| 235 |
+
0.0006875659614942247,
|
| 236 |
+
0.0006466431969981455,
|
| 237 |
+
0.0005706060710227748,
|
| 238 |
+
0.0006236133675463582,
|
| 239 |
+
0.0005433795140776191,
|
| 240 |
+
0.00048406205939841235
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia14m_layer0_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a1254789e341da7e869979a8e0771f43c8018bc11f4e2705878665efc0b5134
|
| 3 |
+
size 532117
|
pythia14m_layer2_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 128, "d_sae": 512, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "2", "hook_name": "hook_resid_post", "hook_spec": "blocks.2.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia14m_layer2_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.0024974587431907257,
|
| 4 |
+
0.0009357552292083598,
|
| 5 |
+
0.0006441175975286851,
|
| 6 |
+
0.0005209526913583354,
|
| 7 |
+
0.00044933348599013816,
|
| 8 |
+
0.0004692463088232264,
|
| 9 |
+
0.00043302303495935837,
|
| 10 |
+
0.00042616698084151485,
|
| 11 |
+
0.00041562574543536896,
|
| 12 |
+
0.0003904228272349527
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.0006204204316159179,
|
| 16 |
+
0.00017772805845800058,
|
| 17 |
+
9.919215879236302e-05,
|
| 18 |
+
6.937097038302745e-05,
|
| 19 |
+
5.5784350717061e-05,
|
| 20 |
+
5.436880682316871e-05,
|
| 21 |
+
4.954367982125358e-05,
|
| 22 |
+
4.753450422270373e-05,
|
| 23 |
+
4.500997007287882e-05,
|
| 24 |
+
4.096635901725161e-05
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.017284850549787796,
|
| 28 |
+
0.022396092205063292,
|
| 29 |
+
0.026255464814299086,
|
| 30 |
+
0.02822229413555668,
|
| 31 |
+
0.02915380093794715,
|
| 32 |
+
0.029126742367079978,
|
| 33 |
+
0.029253377585913547,
|
| 34 |
+
0.029100136687732328,
|
| 35 |
+
0.029255775574209362,
|
| 36 |
+
0.02931088240759721
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99999898574745,
|
| 40 |
+
127.99999470027086,
|
| 41 |
+
127.9999664500491,
|
| 42 |
+
127.99537200027716,
|
| 43 |
+
127.98949656685554,
|
| 44 |
+
127.90479178870005,
|
| 45 |
+
127.9096765639345,
|
| 46 |
+
127.88598192841518,
|
| 47 |
+
127.86500706473626,
|
| 48 |
+
127.8692474261386
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.0006204204316159179,
|
| 52 |
+
0.00017772805845800058,
|
| 53 |
+
9.919215879236302e-05,
|
| 54 |
+
6.937097038302745e-05,
|
| 55 |
+
5.5784350717061e-05,
|
| 56 |
+
5.436880682316871e-05,
|
| 57 |
+
4.954367982125358e-05,
|
| 58 |
+
4.753450422270373e-05,
|
| 59 |
+
4.500997007287882e-05,
|
| 60 |
+
4.096635901725161e-05
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0,
|
| 79 |
+
3.6292955756156594e-06,
|
| 80 |
+
1.2509694921059426e-05,
|
| 81 |
+
5.29175094170288e-06,
|
| 82 |
+
4.256246537688854e-06,
|
| 83 |
+
3.4688251966006235e-06,
|
| 84 |
+
2.9007877803691433e-06
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.18770383551933156,
|
| 88 |
+
0.07580271870837617,
|
| 89 |
+
0.054492545079612285,
|
| 90 |
+
0.04515817307253552,
|
| 91 |
+
0.03899198488759962,
|
| 92 |
+
0.040236781585366616,
|
| 93 |
+
0.037818761271578405,
|
| 94 |
+
0.03743762382645897,
|
| 95 |
+
0.03671469576082136,
|
| 96 |
+
0.03465556892517366
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0018770383104457801,
|
| 100 |
+
0.0007580271702056527,
|
| 101 |
+
0.0005449254384903789,
|
| 102 |
+
0.00045158172075742537,
|
| 103 |
+
0.0003899198399879716,
|
| 104 |
+
0.00040236780693394956,
|
| 105 |
+
0.00037818760431462795,
|
| 106 |
+
0.000374376229761623,
|
| 107 |
+
0.00036714694947551535,
|
| 108 |
+
0.0003465556808660304
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.002235761433576396,
|
| 136 |
+
0.001568728206663992,
|
| 137 |
+
0.0010093971984428158,
|
| 138 |
+
0.0009670981014213411,
|
| 139 |
+
0.0006443741063848443,
|
| 140 |
+
0.0004960434937388739,
|
| 141 |
+
0.0005021103056955249,
|
| 142 |
+
0.0004938424404291415,
|
| 143 |
+
0.0006279017958622502,
|
| 144 |
+
0.00042139393407342635
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.00045685922408287964,
|
| 148 |
+
0.00026093841685184335,
|
| 149 |
+
0.00016569173608809034,
|
| 150 |
+
0.00016257147393598754,
|
| 151 |
+
0.00010730084467948007,
|
| 152 |
+
8.607745689966389e-05,
|
| 153 |
+
8.699815164563527e-05,
|
| 154 |
+
9.082359091392712e-05,
|
| 155 |
+
0.00011087799491068555,
|
| 156 |
+
7.251064786013649e-05
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.02721808145228081,
|
| 160 |
+
0.03167134426239673,
|
| 161 |
+
0.034052184780806585,
|
| 162 |
+
0.033693422752515884,
|
| 163 |
+
0.03525968035450311,
|
| 164 |
+
0.035505264750370816,
|
| 165 |
+
0.0349859887228831,
|
| 166 |
+
0.035129971949486014,
|
| 167 |
+
0.03404857037840297,
|
| 168 |
+
0.03474399006025513
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99999902596099,
|
| 172 |
+
127.99998419699698,
|
| 173 |
+
127.9996505279749,
|
| 174 |
+
127.93096095721648,
|
| 175 |
+
127.89768401323151,
|
| 176 |
+
127.86675165047271,
|
| 177 |
+
127.76218301997517,
|
| 178 |
+
127.68681801670738,
|
| 179 |
+
127.40518527483734,
|
| 180 |
+
127.4482454978461
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.00045685922408287964,
|
| 184 |
+
0.00026093841685184335,
|
| 185 |
+
0.00016569173608809034,
|
| 186 |
+
0.00016257147393598754,
|
| 187 |
+
0.00010730084467948007,
|
| 188 |
+
8.607745689966389e-05,
|
| 189 |
+
8.699815164563527e-05,
|
| 190 |
+
9.082359091392712e-05,
|
| 191 |
+
0.00011087799491068555,
|
| 192 |
+
7.251064786013649e-05
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.0,
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
3.1570998731114016e-05,
|
| 212 |
+
8.380522075942391e-06,
|
| 213 |
+
6.037880930807433e-06,
|
| 214 |
+
4.613538658862451e-06,
|
| 215 |
+
4.749575914215015e-06,
|
| 216 |
+
3.4152656186549417e-06
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.17789022520062653,
|
| 220 |
+
0.1307789815478924,
|
| 221 |
+
0.08437054813490408,
|
| 222 |
+
0.0804526646101491,
|
| 223 |
+
0.05055022746686744,
|
| 224 |
+
0.040158552400723795,
|
| 225 |
+
0.04090742824572913,
|
| 226 |
+
0.03984053197254624,
|
| 227 |
+
0.05122742362520143,
|
| 228 |
+
0.03454680280757392
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.001778902211086888,
|
| 232 |
+
0.0013077897880237376,
|
| 233 |
+
0.0008437054621365458,
|
| 234 |
+
0.000804526627584526,
|
| 235 |
+
0.0005055022634635012,
|
| 236 |
+
0.00040158551510128073,
|
| 237 |
+
0.000409074273100694,
|
| 238 |
+
0.0003984053108930251,
|
| 239 |
+
0.0005122742249415861,
|
| 240 |
+
0.0003454680201977382
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia14m_layer2_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf5e564c6c2e904bdc7faa6dba5ddaa41164c62248003dacfb828b06d59c8cff
|
| 3 |
+
size 532117
|
pythia14m_layer5_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 128, "d_sae": 512, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "5", "hook_name": "hook_resid_post", "hook_spec": "blocks.5.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia14m_layer5_exp4/history.json
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.002171335807189509,
|
| 4 |
+
0.0004760217304321582,
|
| 5 |
+
0.00029963853915857203
|
| 6 |
+
],
|
| 7 |
+
"recon_loss": [
|
| 8 |
+
0.0013176010684409186,
|
| 9 |
+
0.0002397540533449184,
|
| 10 |
+
0.00011932067944890448
|
| 11 |
+
],
|
| 12 |
+
"l1_loss": [
|
| 13 |
+
0.04776091468748645,
|
| 14 |
+
0.06832094815054303,
|
| 15 |
+
0.07560518625684204
|
| 16 |
+
],
|
| 17 |
+
"sparsity": [
|
| 18 |
+
127.99990193597202,
|
| 19 |
+
127.99837555859352,
|
| 20 |
+
127.99220852583592
|
| 21 |
+
],
|
| 22 |
+
"recon_contribution": [
|
| 23 |
+
0.0013176010684409186,
|
| 24 |
+
0.0002397540533449184,
|
| 25 |
+
0.00011932067944890448
|
| 26 |
+
],
|
| 27 |
+
"l1_contribution": [
|
| 28 |
+
0.0,
|
| 29 |
+
0.0,
|
| 30 |
+
0.0
|
| 31 |
+
],
|
| 32 |
+
"aux_loss": [
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
1.8027577064040848e-05
|
| 36 |
+
],
|
| 37 |
+
"logit_kl": [
|
| 38 |
+
0.08537347564734045,
|
| 39 |
+
0.023626768219321897,
|
| 40 |
+
0.016229028596028098
|
| 41 |
+
],
|
| 42 |
+
"kl_contribution": [
|
| 43 |
+
0.0008537347380190143,
|
| 44 |
+
0.00023626767695023788,
|
| 45 |
+
0.0001622902823324205
|
| 46 |
+
],
|
| 47 |
+
"dead_features": [
|
| 48 |
+
0,
|
| 49 |
+
0,
|
| 50 |
+
0
|
| 51 |
+
],
|
| 52 |
+
"dead_feature_percentage": [
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0
|
| 56 |
+
],
|
| 57 |
+
"val_loss": [
|
| 58 |
+
0.001085616160162565,
|
| 59 |
+
0.00044631636424370105,
|
| 60 |
+
0.00017706250308823795
|
| 61 |
+
],
|
| 62 |
+
"val_recon_loss": [
|
| 63 |
+
0.0005686862940138595,
|
| 64 |
+
0.0002042529623299786,
|
| 65 |
+
9.947409705241557e-05
|
| 66 |
+
],
|
| 67 |
+
"val_l1_loss": [
|
| 68 |
+
0.05804161652279463,
|
| 69 |
+
0.07236223318857368,
|
| 70 |
+
0.08036526477049828
|
| 71 |
+
],
|
| 72 |
+
"val_sparsity": [
|
| 73 |
+
127.99999902596099,
|
| 74 |
+
127.99999862386659,
|
| 75 |
+
127.9999893548975
|
| 76 |
+
],
|
| 77 |
+
"val_recon_contribution": [
|
| 78 |
+
0.0005686862940138595,
|
| 79 |
+
0.0002042529623299786,
|
| 80 |
+
9.947409705241557e-05
|
| 81 |
+
],
|
| 82 |
+
"val_l1_contribution": [
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"val_aux_loss": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0
|
| 91 |
+
],
|
| 92 |
+
"val_logit_kl": [
|
| 93 |
+
0.05169298785608744,
|
| 94 |
+
0.02420634076909849,
|
| 95 |
+
0.007758840788109249
|
| 96 |
+
],
|
| 97 |
+
"val_kl_contribution": [
|
| 98 |
+
0.0005169298669883664,
|
| 99 |
+
0.00024206340252859213,
|
| 100 |
+
7.758840610276382e-05
|
| 101 |
+
]
|
| 102 |
+
}
|
pythia14m_layer5_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:965f5bbd4de6b75611e9efa059e2b89dd8daf2885f12722f64f739f4d40786bc
|
| 3 |
+
size 532117
|
pythia160m_layer0_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 768, "d_sae": 3072, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "0", "hook_name": "hook_resid_post", "hook_spec": "blocks.0.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia160m_layer0_exp4/history.json
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.01408560986370862,
|
| 4 |
+
0.007514906397444151,
|
| 5 |
+
0.008009068454698134
|
| 6 |
+
],
|
| 7 |
+
"recon_loss": [
|
| 8 |
+
0.011892832059818668,
|
| 9 |
+
0.006303946278849933,
|
| 10 |
+
0.006514605752281135
|
| 11 |
+
],
|
| 12 |
+
"l1_loss": [
|
| 13 |
+
0.016570691592661904,
|
| 14 |
+
0.015300610695718503,
|
| 15 |
+
0.014500189677108916
|
| 16 |
+
],
|
| 17 |
+
"sparsity": [
|
| 18 |
+
125.82674106935255,
|
| 19 |
+
119.6446358557461,
|
| 20 |
+
113.00992185631577
|
| 21 |
+
],
|
| 22 |
+
"recon_contribution": [
|
| 23 |
+
0.011892832059818668,
|
| 24 |
+
0.006303946278849933,
|
| 25 |
+
0.006514605752281135
|
| 26 |
+
],
|
| 27 |
+
"l1_contribution": [
|
| 28 |
+
0.0,
|
| 29 |
+
0.0,
|
| 30 |
+
0.0
|
| 31 |
+
],
|
| 32 |
+
"aux_loss": [
|
| 33 |
+
0.0,
|
| 34 |
+
0.0,
|
| 35 |
+
0.0
|
| 36 |
+
],
|
| 37 |
+
"logit_kl": [
|
| 38 |
+
0.21927778604788845,
|
| 39 |
+
0.12109601487766723,
|
| 40 |
+
0.1494462743350843
|
| 41 |
+
],
|
| 42 |
+
"kl_contribution": [
|
| 43 |
+
0.002192777817234156,
|
| 44 |
+
0.0012109601214254071,
|
| 45 |
+
0.0014944627094256762
|
| 46 |
+
],
|
| 47 |
+
"dead_features": [
|
| 48 |
+
0,
|
| 49 |
+
0,
|
| 50 |
+
0
|
| 51 |
+
],
|
| 52 |
+
"dead_feature_percentage": [
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0
|
| 56 |
+
],
|
| 57 |
+
"val_loss": [
|
| 58 |
+
0.01905087376617774,
|
| 59 |
+
0.01948434958550584,
|
| 60 |
+
0.017442850234150764
|
| 61 |
+
],
|
| 62 |
+
"val_recon_loss": [
|
| 63 |
+
0.016354236604827415,
|
| 64 |
+
0.016351540761808697,
|
| 65 |
+
0.014324624026314837
|
| 66 |
+
],
|
| 67 |
+
"val_l1_loss": [
|
| 68 |
+
0.017131587560272738,
|
| 69 |
+
0.016653423454681057,
|
| 70 |
+
0.017019785634650206
|
| 71 |
+
],
|
| 72 |
+
"val_sparsity": [
|
| 73 |
+
127.36297070191844,
|
| 74 |
+
126.85029163633631,
|
| 75 |
+
126.02939505339806
|
| 76 |
+
],
|
| 77 |
+
"val_recon_contribution": [
|
| 78 |
+
0.016354236604827415,
|
| 79 |
+
0.016351540761808697,
|
| 80 |
+
0.014324624026314837
|
| 81 |
+
],
|
| 82 |
+
"val_l1_contribution": [
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"val_aux_loss": [
|
| 88 |
+
0.0,
|
| 89 |
+
0.0,
|
| 90 |
+
0.0
|
| 91 |
+
],
|
| 92 |
+
"val_logit_kl": [
|
| 93 |
+
0.2696637213991951,
|
| 94 |
+
0.313280890024716,
|
| 95 |
+
0.3118226270520858
|
| 96 |
+
],
|
| 97 |
+
"val_kl_contribution": [
|
| 98 |
+
0.0026966371536131088,
|
| 99 |
+
0.003132808827268164,
|
| 100 |
+
0.003118226198313203
|
| 101 |
+
]
|
| 102 |
+
}
|
pythia160m_layer0_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:219eb0dcd5a5ff3fcd2515f3d36465b84faca3614459ee4315571b5a3e325507
|
| 3 |
+
size 18907797
|
pythia160m_layer11_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 768, "d_sae": 3072, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "11", "hook_name": "hook_resid_post", "hook_spec": "blocks.11.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia160m_layer11_exp4/history.json
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.31261095851659776,
|
| 4 |
+
0.2187371599532309,
|
| 5 |
+
0.19628824485199792
|
| 6 |
+
],
|
| 7 |
+
"recon_loss": [
|
| 8 |
+
0.29276702486130657,
|
| 9 |
+
0.20466143189847064,
|
| 10 |
+
0.1841151620812562
|
| 11 |
+
],
|
| 12 |
+
"l1_loss": [
|
| 13 |
+
0.15303128357647228,
|
| 14 |
+
0.15231893757472234,
|
| 15 |
+
0.13958976851130017
|
| 16 |
+
],
|
| 17 |
+
"sparsity": [
|
| 18 |
+
127.85488501308727,
|
| 19 |
+
127.70968831899215,
|
| 20 |
+
127.59238116724961
|
| 21 |
+
],
|
| 22 |
+
"recon_contribution": [
|
| 23 |
+
0.29276702486130657,
|
| 24 |
+
0.20466143189847064,
|
| 25 |
+
0.1841151620812562
|
| 26 |
+
],
|
| 27 |
+
"l1_contribution": [
|
| 28 |
+
0.0,
|
| 29 |
+
0.0,
|
| 30 |
+
0.0
|
| 31 |
+
],
|
| 32 |
+
"aux_loss": [
|
| 33 |
+
0.00677939847232077,
|
| 34 |
+
0.006443772708433268,
|
| 35 |
+
0.00583030989418934
|
| 36 |
+
],
|
| 37 |
+
"logit_kl": [
|
| 38 |
+
1.3064536393094226,
|
| 39 |
+
0.7631955589161438,
|
| 40 |
+
0.6342773145964357
|
| 41 |
+
],
|
| 42 |
+
"kl_contribution": [
|
| 43 |
+
0.01306453608910927,
|
| 44 |
+
0.00763195541443392,
|
| 45 |
+
0.006342773007222635
|
| 46 |
+
],
|
| 47 |
+
"dead_features": [
|
| 48 |
+
8,
|
| 49 |
+
3,
|
| 50 |
+
2
|
| 51 |
+
],
|
| 52 |
+
"dead_feature_percentage": [
|
| 53 |
+
0.2604166666666667,
|
| 54 |
+
0.09765625,
|
| 55 |
+
0.06510416666666667
|
| 56 |
+
],
|
| 57 |
+
"val_loss": [
|
| 58 |
+
0.3203053327649324,
|
| 59 |
+
0.3009165501972405,
|
| 60 |
+
0.2737972388817245
|
| 61 |
+
],
|
| 62 |
+
"val_recon_loss": [
|
| 63 |
+
0.29953614784875693,
|
| 64 |
+
0.2827136098942386,
|
| 65 |
+
0.25727858912420665
|
| 66 |
+
],
|
| 67 |
+
"val_l1_loss": [
|
| 68 |
+
0.1699585138065277,
|
| 69 |
+
0.15156140507547403,
|
| 70 |
+
0.15585914083541294
|
| 71 |
+
],
|
| 72 |
+
"val_sparsity": [
|
| 73 |
+
127.99982987542337,
|
| 74 |
+
127.99906710759262,
|
| 75 |
+
127.99931296444134
|
| 76 |
+
],
|
| 77 |
+
"val_recon_contribution": [
|
| 78 |
+
0.29953614784875693,
|
| 79 |
+
0.2827136098942386,
|
| 80 |
+
0.25727858912420665
|
| 81 |
+
],
|
| 82 |
+
"val_l1_contribution": [
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"val_aux_loss": [
|
| 88 |
+
0.009392711516766825,
|
| 89 |
+
0.008806341445332504,
|
| 90 |
+
0.00810368043997775
|
| 91 |
+
],
|
| 92 |
+
"val_logit_kl": [
|
| 93 |
+
1.1376474124278515,
|
| 94 |
+
0.9396599215273151,
|
| 95 |
+
0.8414969695370134
|
| 96 |
+
],
|
| 97 |
+
"val_kl_contribution": [
|
| 98 |
+
0.011376473860784695,
|
| 99 |
+
0.009396599003208399,
|
| 100 |
+
0.00841496950688365
|
| 101 |
+
]
|
| 102 |
+
}
|
pythia160m_layer11_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4020595dfc8ac3447accfe37dc3e463644822c9ba780221f1ab0016b06462ee7
|
| 3 |
+
size 18907797
|
pythia160m_layer2_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 768, "d_sae": 3072, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "2", "hook_name": "hook_resid_post", "hook_spec": "blocks.2.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia160m_layer2_exp4/history.json
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.022805505420151008,
|
| 4 |
+
0.013441352083404757,
|
| 5 |
+
0.011892197134454741
|
| 6 |
+
],
|
| 7 |
+
"recon_loss": [
|
| 8 |
+
0.02104288216927374,
|
| 9 |
+
0.012321489272309707,
|
| 10 |
+
0.010873594475897396
|
| 11 |
+
],
|
| 12 |
+
"l1_loss": [
|
| 13 |
+
0.02589880668108358,
|
| 14 |
+
0.0255978409492341,
|
| 15 |
+
0.02548265174335363
|
| 16 |
+
],
|
| 17 |
+
"sparsity": [
|
| 18 |
+
127.99894944275317,
|
| 19 |
+
127.82450345616762,
|
| 20 |
+
127.5587693298755
|
| 21 |
+
],
|
| 22 |
+
"recon_contribution": [
|
| 23 |
+
0.02104288216927374,
|
| 24 |
+
0.012321489272309707,
|
| 25 |
+
0.010873594475897396
|
| 26 |
+
],
|
| 27 |
+
"l1_contribution": [
|
| 28 |
+
0.0,
|
| 29 |
+
0.0,
|
| 30 |
+
0.0
|
| 31 |
+
],
|
| 32 |
+
"aux_loss": [
|
| 33 |
+
0.0003576286187887249,
|
| 34 |
+
0.0003886501407559619,
|
| 35 |
+
0.00034346553040858434
|
| 36 |
+
],
|
| 37 |
+
"logit_kl": [
|
| 38 |
+
0.1404994673342729,
|
| 39 |
+
0.07312126973403149,
|
| 40 |
+
0.06751371551761214
|
| 41 |
+
],
|
| 42 |
+
"kl_contribution": [
|
| 43 |
+
0.001404994644234939,
|
| 44 |
+
0.0007312126809114996,
|
| 45 |
+
0.0006751371405426363
|
| 46 |
+
],
|
| 47 |
+
"dead_features": [
|
| 48 |
+
0,
|
| 49 |
+
0,
|
| 50 |
+
0
|
| 51 |
+
],
|
| 52 |
+
"dead_feature_percentage": [
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0
|
| 56 |
+
],
|
| 57 |
+
"val_loss": [
|
| 58 |
+
0.03972662800457011,
|
| 59 |
+
0.03050375607654442,
|
| 60 |
+
0.02940352670289703
|
| 61 |
+
],
|
| 62 |
+
"val_recon_loss": [
|
| 63 |
+
0.03624709893205923,
|
| 64 |
+
0.027982315392028376,
|
| 65 |
+
0.026998186516356493
|
| 66 |
+
],
|
| 67 |
+
"val_l1_loss": [
|
| 68 |
+
0.0314848783877364,
|
| 69 |
+
0.03010873408796394,
|
| 70 |
+
0.02960533726180827
|
| 71 |
+
],
|
| 72 |
+
"val_sparsity": [
|
| 73 |
+
127.999844010866,
|
| 74 |
+
127.9903848930208,
|
| 75 |
+
127.97814808268895
|
| 76 |
+
],
|
| 77 |
+
"val_recon_contribution": [
|
| 78 |
+
0.03624709893205923,
|
| 79 |
+
0.027982315392028376,
|
| 80 |
+
0.026998186516356493
|
| 81 |
+
],
|
| 82 |
+
"val_l1_contribution": [
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0
|
| 86 |
+
],
|
| 87 |
+
"val_aux_loss": [
|
| 88 |
+
0.0011356308797065514,
|
| 89 |
+
0.0008778145541581398,
|
| 90 |
+
0.0008475123470171082
|
| 91 |
+
],
|
| 92 |
+
"val_logit_kl": [
|
| 93 |
+
0.2343898291541511,
|
| 94 |
+
0.16436261869332136,
|
| 95 |
+
0.15578279077712176
|
| 96 |
+
],
|
| 97 |
+
"val_kl_contribution": [
|
| 98 |
+
0.0023438982390887506,
|
| 99 |
+
0.0016436261488677046,
|
| 100 |
+
0.0015578278736465298
|
| 101 |
+
]
|
| 102 |
+
}
|
pythia160m_layer2_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df96f23810e4560648c49d7349f8860cb0ecfe9af5208dbe0b4dfc33fe248aac
|
| 3 |
+
size 18907797
|
pythia160m_layer5_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 768, "d_sae": 3072, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "5", "hook_name": "hook_resid_post", "hook_spec": "blocks.5.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia160m_layer5_exp4/history.json
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.040373627999227266,
|
| 4 |
+
0.0235105927501406,
|
| 5 |
+
0.021295735231112865,
|
| 6 |
+
0.02013097347653642,
|
| 7 |
+
0.019527294850420384,
|
| 8 |
+
0.02013132228344983,
|
| 9 |
+
0.020482758718992577,
|
| 10 |
+
0.01904547135662414
|
| 11 |
+
],
|
| 12 |
+
"recon_loss": [
|
| 13 |
+
0.03780413387710748,
|
| 14 |
+
0.022013843651911637,
|
| 15 |
+
0.01985880850947329,
|
| 16 |
+
0.018754747890088023,
|
| 17 |
+
0.018214199935295144,
|
| 18 |
+
0.01880487274365867,
|
| 19 |
+
0.01909509737513402,
|
| 20 |
+
0.0177838154010424
|
| 21 |
+
],
|
| 22 |
+
"l1_loss": [
|
| 23 |
+
0.043025053436962925,
|
| 24 |
+
0.03850345717248868,
|
| 25 |
+
0.03658427574044588,
|
| 26 |
+
0.035529070059914576,
|
| 27 |
+
0.03519518073318767,
|
| 28 |
+
0.034862890761016174,
|
| 29 |
+
0.03476217700307872,
|
| 30 |
+
0.035215672171440254
|
| 31 |
+
],
|
| 32 |
+
"sparsity": [
|
| 33 |
+
127.99999003507654,
|
| 34 |
+
127.99983514279735,
|
| 35 |
+
127.99785971090097,
|
| 36 |
+
127.990553896119,
|
| 37 |
+
127.9786807936065,
|
| 38 |
+
127.85034133495928,
|
| 39 |
+
127.71922843569801,
|
| 40 |
+
127.97289638389536
|
| 41 |
+
],
|
| 42 |
+
"recon_contribution": [
|
| 43 |
+
0.03780413387710748,
|
| 44 |
+
0.022013843651911637,
|
| 45 |
+
0.01985880850947329,
|
| 46 |
+
0.018754747890088023,
|
| 47 |
+
0.018214199935295144,
|
| 48 |
+
0.01880487274365867,
|
| 49 |
+
0.01909509737513402,
|
| 50 |
+
0.0177838154010424
|
| 51 |
+
],
|
| 52 |
+
"l1_contribution": [
|
| 53 |
+
0.0,
|
| 54 |
+
0.0,
|
| 55 |
+
0.0,
|
| 56 |
+
0.0,
|
| 57 |
+
0.0,
|
| 58 |
+
0.0,
|
| 59 |
+
0.0,
|
| 60 |
+
0.0
|
| 61 |
+
],
|
| 62 |
+
"aux_loss": [
|
| 63 |
+
0.0007852440172502277,
|
| 64 |
+
0.0006900960318173985,
|
| 65 |
+
0.000623730818291993,
|
| 66 |
+
0.0005906092030309825,
|
| 67 |
+
0.000575056742301819,
|
| 68 |
+
0.0005994714941864288,
|
| 69 |
+
0.0006408378134598816,
|
| 70 |
+
0.0005992114398593628
|
| 71 |
+
],
|
| 72 |
+
"logit_kl": [
|
| 73 |
+
0.17842501832463709,
|
| 74 |
+
0.0806653115733647,
|
| 75 |
+
0.0813195915373207,
|
| 76 |
+
0.07856163996316137,
|
| 77 |
+
0.07380382034065974,
|
| 78 |
+
0.07269781013811324,
|
| 79 |
+
0.0746823544012142,
|
| 80 |
+
0.0662444544582292
|
| 81 |
+
],
|
| 82 |
+
"kl_contribution": [
|
| 83 |
+
0.0017842501426451416,
|
| 84 |
+
0.0008066530980793031,
|
| 85 |
+
0.000813195896873466,
|
| 86 |
+
0.0007856163825462794,
|
| 87 |
+
0.0007380381860884324,
|
| 88 |
+
0.0007269780845880243,
|
| 89 |
+
0.0007468235271517187,
|
| 90 |
+
0.0006624445293041669
|
| 91 |
+
],
|
| 92 |
+
"dead_features": [
|
| 93 |
+
0,
|
| 94 |
+
0,
|
| 95 |
+
0,
|
| 96 |
+
0,
|
| 97 |
+
0,
|
| 98 |
+
0,
|
| 99 |
+
0,
|
| 100 |
+
0
|
| 101 |
+
],
|
| 102 |
+
"dead_feature_percentage": [
|
| 103 |
+
0.0,
|
| 104 |
+
0.0,
|
| 105 |
+
0.0,
|
| 106 |
+
0.0,
|
| 107 |
+
0.0,
|
| 108 |
+
0.0,
|
| 109 |
+
0.0,
|
| 110 |
+
0.0
|
| 111 |
+
],
|
| 112 |
+
"val_loss": [
|
| 113 |
+
0.050047308106252186,
|
| 114 |
+
0.045878467521418824,
|
| 115 |
+
0.04193598931069958,
|
| 116 |
+
0.03932654552146457,
|
| 117 |
+
0.03896158611771397,
|
| 118 |
+
0.03918718963470399,
|
| 119 |
+
0.03783226667902729,
|
| 120 |
+
0.03736098678865112
|
| 121 |
+
],
|
| 122 |
+
"val_recon_loss": [
|
| 123 |
+
0.04597878260074543,
|
| 124 |
+
0.04146085552434452,
|
| 125 |
+
0.038738831965317515,
|
| 126 |
+
0.036382041950268895,
|
| 127 |
+
0.036052692481573737,
|
| 128 |
+
0.03624962614538134,
|
| 129 |
+
0.03494560202087219,
|
| 130 |
+
0.0344847117835718
|
| 131 |
+
],
|
| 132 |
+
"val_l1_loss": [
|
| 133 |
+
0.06750534617563617,
|
| 134 |
+
0.05467167289466702,
|
| 135 |
+
0.04928356658187381,
|
| 136 |
+
0.045380183623185674,
|
| 137 |
+
0.04458471999872519,
|
| 138 |
+
0.042920941640587996,
|
| 139 |
+
0.04257036754182186,
|
| 140 |
+
0.0421116140295879
|
| 141 |
+
],
|
| 142 |
+
"val_sparsity": [
|
| 143 |
+
127.99998989508278,
|
| 144 |
+
127.99992429893197,
|
| 145 |
+
127.99980824060316,
|
| 146 |
+
127.99982074303293,
|
| 147 |
+
127.9998119330845,
|
| 148 |
+
127.99730209111355,
|
| 149 |
+
127.99881933689443,
|
| 150 |
+
127.99898187888262
|
| 151 |
+
],
|
| 152 |
+
"val_recon_contribution": [
|
| 153 |
+
0.04597878260074543,
|
| 154 |
+
0.04146085552434452,
|
| 155 |
+
0.038738831965317515,
|
| 156 |
+
0.036382041950268895,
|
| 157 |
+
0.036052692481573737,
|
| 158 |
+
0.03624962614538134,
|
| 159 |
+
0.03494560202087219,
|
| 160 |
+
0.0344847117835718
|
| 161 |
+
],
|
| 162 |
+
"val_l1_contribution": [
|
| 163 |
+
0.0,
|
| 164 |
+
0.0,
|
| 165 |
+
0.0,
|
| 166 |
+
0.0,
|
| 167 |
+
0.0,
|
| 168 |
+
0.0,
|
| 169 |
+
0.0,
|
| 170 |
+
0.0
|
| 171 |
+
],
|
| 172 |
+
"val_aux_loss": [
|
| 173 |
+
0.0014391526589458954,
|
| 174 |
+
0.001297814393837904,
|
| 175 |
+
0.0012136513375072158,
|
| 176 |
+
0.0011413065298735313,
|
| 177 |
+
0.001132682456109443,
|
| 178 |
+
0.001155864660366741,
|
| 179 |
+
0.0011376148454526574,
|
| 180 |
+
0.001125563739527724
|
| 181 |
+
],
|
| 182 |
+
"val_logit_kl": [
|
| 183 |
+
0.26293730226376466,
|
| 184 |
+
0.31197976425604995,
|
| 185 |
+
0.19835060795524043,
|
| 186 |
+
0.18031970656336613,
|
| 187 |
+
0.17762112186303655,
|
| 188 |
+
0.1781698907988975,
|
| 189 |
+
0.17490498527589915,
|
| 190 |
+
0.1750711383654824
|
| 191 |
+
],
|
| 192 |
+
"val_kl_contribution": [
|
| 193 |
+
0.0026293729636507096,
|
| 194 |
+
0.0031197975703433217,
|
| 195 |
+
0.0019835060334671734,
|
| 196 |
+
0.001803197027077732,
|
| 197 |
+
0.0017762111800506295,
|
| 198 |
+
0.001781698869566364,
|
| 199 |
+
0.0017490498144681114,
|
| 200 |
+
0.0017507113439752134
|
| 201 |
+
]
|
| 202 |
+
}
|
pythia160m_layer5_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89b5e158449e476f097c7c038296ee4966b8369926fa44f2ca7d92e389e499ff
|
| 3 |
+
size 18907797
|
pythia160m_layer8_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 768, "d_sae": 3072, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "8", "hook_name": "hook_resid_post", "hook_spec": "blocks.8.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia160m_layer8_exp4/history.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.06083375318165945
|
| 4 |
+
],
|
| 5 |
+
"recon_loss": [
|
| 6 |
+
0.05789181708934761
|
| 7 |
+
],
|
| 8 |
+
"l1_loss": [
|
| 9 |
+
0.05494845615965979
|
| 10 |
+
],
|
| 11 |
+
"sparsity": [
|
| 12 |
+
127.99917661705796
|
| 13 |
+
],
|
| 14 |
+
"recon_contribution": [
|
| 15 |
+
0.05789181708934761
|
| 16 |
+
],
|
| 17 |
+
"l1_contribution": [
|
| 18 |
+
0.0
|
| 19 |
+
],
|
| 20 |
+
"aux_loss": [
|
| 21 |
+
0.0012162892464279088
|
| 22 |
+
],
|
| 23 |
+
"logit_kl": [
|
| 24 |
+
0.17256469051702086
|
| 25 |
+
],
|
| 26 |
+
"kl_contribution": [
|
| 27 |
+
0.0017256468694441679
|
| 28 |
+
],
|
| 29 |
+
"dead_features": [
|
| 30 |
+
0
|
| 31 |
+
],
|
| 32 |
+
"dead_feature_percentage": [
|
| 33 |
+
0.0
|
| 34 |
+
],
|
| 35 |
+
"val_loss": [
|
| 36 |
+
0.0773609928601379
|
| 37 |
+
],
|
| 38 |
+
"val_recon_loss": [
|
| 39 |
+
0.07241062774756528
|
| 40 |
+
],
|
| 41 |
+
"val_l1_loss": [
|
| 42 |
+
0.07569787406248955
|
| 43 |
+
],
|
| 44 |
+
"val_sparsity": [
|
| 45 |
+
127.99990451035242
|
| 46 |
+
],
|
| 47 |
+
"val_recon_contribution": [
|
| 48 |
+
0.07241062774756528
|
| 49 |
+
],
|
| 50 |
+
"val_l1_contribution": [
|
| 51 |
+
0.0
|
| 52 |
+
],
|
| 53 |
+
"val_aux_loss": [
|
| 54 |
+
0.0022719658701886927
|
| 55 |
+
],
|
| 56 |
+
"val_logit_kl": [
|
| 57 |
+
0.26783993265298894
|
| 58 |
+
],
|
| 59 |
+
"val_kl_contribution": [
|
| 60 |
+
0.0026783992648019943
|
| 61 |
+
]
|
| 62 |
+
}
|
pythia160m_layer8_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad0bbf1910051156ccbf579f7681eb36ca30f1ed828526c0aea5c17b141e0e47
|
| 3 |
+
size 18907797
|
pythia31m_layer0_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 256, "d_sae": 1024, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "0", "hook_name": "hook_resid_post", "hook_spec": "blocks.0.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia31m_layer0_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.004300681019845012,
|
| 4 |
+
0.0020750122649355125,
|
| 5 |
+
0.0016903404607340362,
|
| 6 |
+
0.0014801529093946375,
|
| 7 |
+
0.0013317227194139805,
|
| 8 |
+
0.0011789969790506323,
|
| 9 |
+
0.0010967958886928504,
|
| 10 |
+
0.0010386491676614182,
|
| 11 |
+
0.0009632098909802126,
|
| 12 |
+
0.0009150082608501084
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.002484193599711894,
|
| 16 |
+
0.0012011405934867856,
|
| 17 |
+
0.0009437641815822014,
|
| 18 |
+
0.0007954730217616765,
|
| 19 |
+
0.0006917270887436044,
|
| 20 |
+
0.0005960185320305464,
|
| 21 |
+
0.0005401509802396783,
|
| 22 |
+
0.0004984021615251788,
|
| 23 |
+
0.00045303893673293694,
|
| 24 |
+
0.0004234032033370705
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.018314964766740906,
|
| 28 |
+
0.018118779445422,
|
| 29 |
+
0.018788140240089863,
|
| 30 |
+
0.019709822628816112,
|
| 31 |
+
0.02073845377499525,
|
| 32 |
+
0.021730719046319028,
|
| 33 |
+
0.02260436980009836,
|
| 34 |
+
0.0232727119070351,
|
| 35 |
+
0.024081100204354406,
|
| 36 |
+
0.024300629562088127
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99999921075229,
|
| 40 |
+
127.99115291394685,
|
| 41 |
+
127.54464618523627,
|
| 42 |
+
126.45392124016792,
|
| 43 |
+
125.14519540194807,
|
| 44 |
+
124.08716318092415,
|
| 45 |
+
123.38990746305988,
|
| 46 |
+
122.22088900668219,
|
| 47 |
+
121.69541614068615,
|
| 48 |
+
120.11455145418752
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.002484193599711894,
|
| 52 |
+
0.0012011405934867856,
|
| 53 |
+
0.0009437641815822014,
|
| 54 |
+
0.0007954730217616765,
|
| 55 |
+
0.0006917270887436044,
|
| 56 |
+
0.0005960185320305464,
|
| 57 |
+
0.0005401509802396783,
|
| 58 |
+
0.0004984021615251788,
|
| 59 |
+
0.00045303893673293694,
|
| 60 |
+
0.0004234032033370705
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0,
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.18164874549732019,
|
| 88 |
+
0.08738716913491974,
|
| 89 |
+
0.07465762978321627,
|
| 90 |
+
0.06846799008115557,
|
| 91 |
+
0.06399956472326213,
|
| 92 |
+
0.05829784584429433,
|
| 93 |
+
0.05566449223691344,
|
| 94 |
+
0.05402470177238952,
|
| 95 |
+
0.0510170965585621,
|
| 96 |
+
0.04916050680110167
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0018164874141644552,
|
| 100 |
+
0.000873871671026167,
|
| 101 |
+
0.0007465762809477155,
|
| 102 |
+
0.0006846798862332306,
|
| 103 |
+
0.0006399956324398467,
|
| 104 |
+
0.0005829784454883055,
|
| 105 |
+
0.0005566449099585428,
|
| 106 |
+
0.0005402470059513694,
|
| 107 |
+
0.0005101709539303556,
|
| 108 |
+
0.0004916050573413729
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.00479095759829058,
|
| 136 |
+
0.003840290768842613,
|
| 137 |
+
0.003157867785275903,
|
| 138 |
+
0.003035797289218229,
|
| 139 |
+
0.0024278055505148667,
|
| 140 |
+
0.0021996161770193984,
|
| 141 |
+
0.0020500354380638664,
|
| 142 |
+
0.0019430801729488946,
|
| 143 |
+
0.0017236777858620003,
|
| 144 |
+
0.0016295625032014555
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.0024979233128068585,
|
| 148 |
+
0.001962823910870463,
|
| 149 |
+
0.0016170786069321203,
|
| 150 |
+
0.001491002234062273,
|
| 151 |
+
0.001214182953471631,
|
| 152 |
+
0.0010834264972208695,
|
| 153 |
+
0.0009939830791358274,
|
| 154 |
+
0.0009279896961269202,
|
| 155 |
+
0.0008203235080974899,
|
| 156 |
+
0.0007575288346015276
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.018823917124498227,
|
| 160 |
+
0.019756025549271344,
|
| 161 |
+
0.021182555520905352,
|
| 162 |
+
0.0226428556838813,
|
| 163 |
+
0.024241922446928382,
|
| 164 |
+
0.025526566652548953,
|
| 165 |
+
0.02647227614603727,
|
| 166 |
+
0.027599845347176563,
|
| 167 |
+
0.02824329997461076,
|
| 168 |
+
0.028785429658075554
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99999901600906,
|
| 172 |
+
127.99827320677491,
|
| 173 |
+
127.94499762289963,
|
| 174 |
+
127.8050213778268,
|
| 175 |
+
127.68573759600426,
|
| 176 |
+
127.56540580146644,
|
| 177 |
+
127.43755226932149,
|
| 178 |
+
127.32129372606269,
|
| 179 |
+
127.1562900456594,
|
| 180 |
+
126.93422992785989
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.0024979233128068585,
|
| 184 |
+
0.001962823910870463,
|
| 185 |
+
0.0016170786069321203,
|
| 186 |
+
0.001491002234062273,
|
| 187 |
+
0.001214182953471631,
|
| 188 |
+
0.0010834264972208695,
|
| 189 |
+
0.0009939830791358274,
|
| 190 |
+
0.0009279896961269202,
|
| 191 |
+
0.0008203235080974899,
|
| 192 |
+
0.0007575288346015276
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.0,
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
0.0,
|
| 212 |
+
0.0,
|
| 213 |
+
0.0,
|
| 214 |
+
0.0,
|
| 215 |
+
0.0,
|
| 216 |
+
0.0
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.22930343301592904,
|
| 220 |
+
0.18774669021909396,
|
| 221 |
+
0.1540789210679962,
|
| 222 |
+
0.15447950859187842,
|
| 223 |
+
0.12136226248581446,
|
| 224 |
+
0.11161897070604385,
|
| 225 |
+
0.10560523832621194,
|
| 226 |
+
0.10150904998142647,
|
| 227 |
+
0.09033542977042029,
|
| 228 |
+
0.0872033689675225
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.002293034278822354,
|
| 232 |
+
0.0018774668614614373,
|
| 233 |
+
0.0015407891760704587,
|
| 234 |
+
0.0015447950534113123,
|
| 235 |
+
0.0012136225976247838,
|
| 236 |
+
0.0011161896820982866,
|
| 237 |
+
0.0010560523594302853,
|
| 238 |
+
0.0010150904775621263,
|
| 239 |
+
0.0009033542776323405,
|
| 240 |
+
0.0008720336701595339
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia31m_layer0_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6d4520603b2eed5092dde9c9895472ba4cc24f8e82c87b9f7fda1f18301def0
|
| 3 |
+
size 2110101
|
pythia31m_layer2_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 256, "d_sae": 1024, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "2", "hook_name": "hook_resid_post", "hook_spec": "blocks.2.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia31m_layer2_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.004840253223749571,
|
| 4 |
+
0.002278674162434965,
|
| 5 |
+
0.0018178231114865586,
|
| 6 |
+
0.0015814056679794054,
|
| 7 |
+
0.0014277329646112623,
|
| 8 |
+
0.0013412063425413574,
|
| 9 |
+
0.0012756717862889252,
|
| 10 |
+
0.0012246038152108614,
|
| 11 |
+
0.0011730338734665044,
|
| 12 |
+
0.001148436724198999
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.0034238550782004227,
|
| 16 |
+
0.00162121779585389,
|
| 17 |
+
0.0012685014878863641,
|
| 18 |
+
0.0010882465497057332,
|
| 19 |
+
0.0009699914813170146,
|
| 20 |
+
0.0009057100239727889,
|
| 21 |
+
0.0008553544285412651,
|
| 22 |
+
0.0008149448384840579,
|
| 23 |
+
0.0007787483570070011,
|
| 24 |
+
0.000759457324907444
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.023966038391710735,
|
| 28 |
+
0.028137029547535032,
|
| 29 |
+
0.031138668188355364,
|
| 30 |
+
0.03273184595632953,
|
| 31 |
+
0.033466166094919085,
|
| 32 |
+
0.034175343001886745,
|
| 33 |
+
0.03464115422536803,
|
| 34 |
+
0.03475952192890828,
|
| 35 |
+
0.034991624315494095,
|
| 36 |
+
0.03512168232818048
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99999894766972,
|
| 40 |
+
127.99999879535876,
|
| 41 |
+
127.99998512891156,
|
| 42 |
+
127.99940252563698,
|
| 43 |
+
127.98986901561923,
|
| 44 |
+
127.97287960017874,
|
| 45 |
+
127.95130734867712,
|
| 46 |
+
127.93205198921439,
|
| 47 |
+
127.92013491649593,
|
| 48 |
+
127.90370445459162
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.0034238550782004227,
|
| 52 |
+
0.00162121779585389,
|
| 53 |
+
0.0012685014878863641,
|
| 54 |
+
0.0010882465497057332,
|
| 55 |
+
0.0009699914813170146,
|
| 56 |
+
0.0009057100239727889,
|
| 57 |
+
0.0008553544285412651,
|
| 58 |
+
0.0008149448384840579,
|
| 59 |
+
0.0007787483570070011,
|
| 60 |
+
0.000759457324907444
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0,
|
| 79 |
+
0.0,
|
| 80 |
+
0.0,
|
| 81 |
+
0.0,
|
| 82 |
+
0.0,
|
| 83 |
+
0.0,
|
| 84 |
+
0.0
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.14163981762439176,
|
| 88 |
+
0.06574563817636613,
|
| 89 |
+
0.05493216343670272,
|
| 90 |
+
0.04931591293975684,
|
| 91 |
+
0.045774149370401716,
|
| 92 |
+
0.043549632888537464,
|
| 93 |
+
0.04203173656442529,
|
| 94 |
+
0.04096589860474241,
|
| 95 |
+
0.03942855264139641,
|
| 96 |
+
0.03889794090252282
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0014163981409009863,
|
| 100 |
+
0.0006574563673997852,
|
| 101 |
+
0.000549321622081619,
|
| 102 |
+
0.0004931591184849525,
|
| 103 |
+
0.000457741483624373,
|
| 104 |
+
0.00043549631904394866,
|
| 105 |
+
0.00042031735608382944,
|
| 106 |
+
0.0004096589767796236,
|
| 107 |
+
0.0003942855173970586,
|
| 108 |
+
0.00038897940029513544
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.00502709000156541,
|
| 136 |
+
0.0039007034772147614,
|
| 137 |
+
0.0032288803249304874,
|
| 138 |
+
0.002927257581174096,
|
| 139 |
+
0.002804508758307038,
|
| 140 |
+
0.00264317928367192,
|
| 141 |
+
0.002608600578615129,
|
| 142 |
+
0.002434029965111639,
|
| 143 |
+
0.0023914756387434935,
|
| 144 |
+
0.002282403783945047
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.003457292411298737,
|
| 148 |
+
0.002653297891127255,
|
| 149 |
+
0.0022225459569390122,
|
| 150 |
+
0.00200313399450686,
|
| 151 |
+
0.001894875585002576,
|
| 152 |
+
0.0017899646719077688,
|
| 153 |
+
0.0017393108522460257,
|
| 154 |
+
0.001639146436600913,
|
| 155 |
+
0.001598493763660973,
|
| 156 |
+
0.0015431521925652275
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.029255775750008203,
|
| 160 |
+
0.03409305798544546,
|
| 161 |
+
0.03660890540881226,
|
| 162 |
+
0.03787013485547253,
|
| 163 |
+
0.03855880506532935,
|
| 164 |
+
0.03901114673451009,
|
| 165 |
+
0.039228656138889796,
|
| 166 |
+
0.03985278617817364,
|
| 167 |
+
0.03992428533313331,
|
| 168 |
+
0.04013821589409439
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99999901600906,
|
| 172 |
+
127.99999901600906,
|
| 173 |
+
127.99998669533379,
|
| 174 |
+
127.99988733303752,
|
| 175 |
+
127.99878762615042,
|
| 176 |
+
127.99467266612872,
|
| 177 |
+
127.99287220524398,
|
| 178 |
+
127.99659067928305,
|
| 179 |
+
127.99316111052718,
|
| 180 |
+
127.99029159199422
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.003457292411298737,
|
| 184 |
+
0.002653297891127255,
|
| 185 |
+
0.0022225459569390122,
|
| 186 |
+
0.00200313399450686,
|
| 187 |
+
0.001894875585002576,
|
| 188 |
+
0.0017899646719077688,
|
| 189 |
+
0.0017393108522460257,
|
| 190 |
+
0.001639146436600913,
|
| 191 |
+
0.001598493763660973,
|
| 192 |
+
0.0015431521925652275
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.0,
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
0.0,
|
| 212 |
+
0.0,
|
| 213 |
+
0.0,
|
| 214 |
+
0.0,
|
| 215 |
+
0.0,
|
| 216 |
+
0.0
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.15697976241360242,
|
| 220 |
+
0.1247405610626714,
|
| 221 |
+
0.10063343908938899,
|
| 222 |
+
0.09241236033735223,
|
| 223 |
+
0.09096331913683979,
|
| 224 |
+
0.0853214634381079,
|
| 225 |
+
0.08692897454269576,
|
| 226 |
+
0.07948835468855259,
|
| 227 |
+
0.07929818935820886,
|
| 228 |
+
0.07392516069547303
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.001569797589473653,
|
| 232 |
+
0.0012474055831268984,
|
| 233 |
+
0.001006334367727135,
|
| 234 |
+
0.0009241235837066287,
|
| 235 |
+
0.0009096331720620641,
|
| 236 |
+
0.000853214615332741,
|
| 237 |
+
0.0008692897259197252,
|
| 238 |
+
0.000794883529224444,
|
| 239 |
+
0.0007929818747124443,
|
| 240 |
+
0.0007392515909833094
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia31m_layer2_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f34741f7fc57ee1eaeb9f406d1bcbd75be26738362375cf94b85f4f244bc225b
|
| 3 |
+
size 2110101
|
pythia31m_layer5_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 256, "d_sae": 1024, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "5", "hook_name": "hook_resid_post", "hook_spec": "blocks.5.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia31m_layer5_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.01302572470217497,
|
| 4 |
+
0.006334375048786429,
|
| 5 |
+
0.005149282265527211,
|
| 6 |
+
0.004496611818289232,
|
| 7 |
+
0.004148950476407288,
|
| 8 |
+
0.003787454145203282,
|
| 9 |
+
0.003564906887585017,
|
| 10 |
+
0.0033458373704781507,
|
| 11 |
+
0.003125993215361485,
|
| 12 |
+
0.0030131820160827975
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.01142719742367061,
|
| 16 |
+
0.005590697214642908,
|
| 17 |
+
0.004537759655951345,
|
| 18 |
+
0.003947360160720016,
|
| 19 |
+
0.003594230861316846,
|
| 20 |
+
0.0032965857957104414,
|
| 21 |
+
0.0030951943612334653,
|
| 22 |
+
0.0028815273986326465,
|
| 23 |
+
0.0027076934539068983,
|
| 24 |
+
0.002581914819645173
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.05338030695212082,
|
| 28 |
+
0.06104766827975881,
|
| 29 |
+
0.0636382561596195,
|
| 30 |
+
0.06481172291801976,
|
| 31 |
+
0.0648203661830964,
|
| 32 |
+
0.06476055569067841,
|
| 33 |
+
0.0642494309673292,
|
| 34 |
+
0.0634408291638871,
|
| 35 |
+
0.06275553809996093,
|
| 36 |
+
0.06169423727342308
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99999894766972,
|
| 40 |
+
127.9999989684394,
|
| 41 |
+
127.99999569375372,
|
| 42 |
+
127.99996844393804,
|
| 43 |
+
127.9994392810413,
|
| 44 |
+
127.99841874221275,
|
| 45 |
+
127.99723561835592,
|
| 46 |
+
127.99646395563863,
|
| 47 |
+
127.9961876427846,
|
| 48 |
+
127.9972310005645
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.01142719742367061,
|
| 52 |
+
0.005590697214642908,
|
| 53 |
+
0.004537759655951345,
|
| 54 |
+
0.003947360160720016,
|
| 55 |
+
0.003594230861316846,
|
| 56 |
+
0.0032965857957104414,
|
| 57 |
+
0.0030951943612334653,
|
| 58 |
+
0.0028815273986326465,
|
| 59 |
+
0.0027076934539068983,
|
| 60 |
+
0.002581914819645173
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0,
|
| 78 |
+
0.0,
|
| 79 |
+
1.7302230243679277e-05,
|
| 80 |
+
3.6648317296062165e-05,
|
| 81 |
+
2.1797268164266805e-05,
|
| 82 |
+
3.2166564811902974e-05,
|
| 83 |
+
1.429139824549054e-05,
|
| 84 |
+
2.4387967948830974e-05
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.15985273175320153,
|
| 88 |
+
0.07436778483415418,
|
| 89 |
+
0.061152261987788274,
|
| 90 |
+
0.05492516664007236,
|
| 91 |
+
0.05374173960984835,
|
| 92 |
+
0.04542200427887769,
|
| 93 |
+
0.04479152615775376,
|
| 94 |
+
0.0432143418233294,
|
| 95 |
+
0.04040083714918032,
|
| 96 |
+
0.040687923729521125
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0015985272828884212,
|
| 100 |
+
0.0007436778312912397,
|
| 101 |
+
0.0006115226066179452,
|
| 102 |
+
0.0005492516540830951,
|
| 103 |
+
0.0005374173841865128,
|
| 104 |
+
0.00045422003226280346,
|
| 105 |
+
0.00044791525210638,
|
| 106 |
+
0.0004321434086908294,
|
| 107 |
+
0.0004040083621791057,
|
| 108 |
+
0.0004068792279936059
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.012593392951398764,
|
| 136 |
+
0.010081298126552346,
|
| 137 |
+
0.00909839349830156,
|
| 138 |
+
0.009864159005370117,
|
| 139 |
+
0.007821473611060269,
|
| 140 |
+
0.007082399969910365,
|
| 141 |
+
0.006772226295971078,
|
| 142 |
+
0.007223451139361383,
|
| 143 |
+
0.005915704346959776,
|
| 144 |
+
0.005833740660381063
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.010802852455103646,
|
| 148 |
+
0.008644133223411876,
|
| 149 |
+
0.007623522123068124,
|
| 150 |
+
0.0071035392608216116,
|
| 151 |
+
0.006411265383485526,
|
| 152 |
+
0.005986284447891416,
|
| 153 |
+
0.0056099123008495075,
|
| 154 |
+
0.0055146071770999045,
|
| 155 |
+
0.0049199635288986856,
|
| 156 |
+
0.004769040122975026
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.05918895065459633,
|
| 160 |
+
0.06397366729462613,
|
| 161 |
+
0.06503240367927192,
|
| 162 |
+
0.06557243518348611,
|
| 163 |
+
0.06482489162093395,
|
| 164 |
+
0.06454214074739427,
|
| 165 |
+
0.06382626404378439,
|
| 166 |
+
0.061668518981426876,
|
| 167 |
+
0.0627917640188626,
|
| 168 |
+
0.06102580897557421
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99999901600906,
|
| 172 |
+
127.99999901600906,
|
| 173 |
+
127.99999901600906,
|
| 174 |
+
127.99997027238643,
|
| 175 |
+
127.99944398968356,
|
| 176 |
+
127.99947956580665,
|
| 177 |
+
127.99942305562281,
|
| 178 |
+
127.9930292557414,
|
| 179 |
+
127.99932314589498,
|
| 180 |
+
127.99888194376074
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.010802852455103646,
|
| 184 |
+
0.008644133223411876,
|
| 185 |
+
0.007623522123068124,
|
| 186 |
+
0.0071035392608216116,
|
| 187 |
+
0.006411265383485526,
|
| 188 |
+
0.005986284447891416,
|
| 189 |
+
0.0056099123008495075,
|
| 190 |
+
0.0055146071770999045,
|
| 191 |
+
0.0049199635288986856,
|
| 192 |
+
0.004769040122975026
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.0,
|
| 208 |
+
0.0,
|
| 209 |
+
0.0,
|
| 210 |
+
0.0,
|
| 211 |
+
0.0002775527701288509,
|
| 212 |
+
0.0,
|
| 213 |
+
0.0,
|
| 214 |
+
0.00019326450697037026,
|
| 215 |
+
0.00017862027186734794,
|
| 216 |
+
0.00016590934168533858
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.17905405332265822,
|
| 220 |
+
0.1437164928144915,
|
| 221 |
+
0.14748713988929202,
|
| 222 |
+
0.27606198113144365,
|
| 223 |
+
0.1132655481382406,
|
| 224 |
+
0.10961155515911902,
|
| 225 |
+
0.11623140145346969,
|
| 226 |
+
0.1515579490459691,
|
| 227 |
+
0.08171205628971205,
|
| 228 |
+
0.08987912293235949
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.001790540493281641,
|
| 232 |
+
0.0014371648950516657,
|
| 233 |
+
0.0014748713662458767,
|
| 234 |
+
0.0027606197468746964,
|
| 235 |
+
0.0011326554564942692,
|
| 236 |
+
0.0010961155265391627,
|
| 237 |
+
0.0011623139878522205,
|
| 238 |
+
0.0015155794566855017,
|
| 239 |
+
0.000817120544429273,
|
| 240 |
+
0.0008987912084618846
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia31m_layer5_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b33bf87955234d9181b1acd282cac084c9ebb5a1615370cdd87107ea3f1cf09
|
| 3 |
+
size 2110101
|
pythia410m_layer0_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "0", "hook_name": "hook_resid_post", "hook_spec": "blocks.0.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer0_exp4/history.json
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.02322444988299709,
|
| 4 |
+
0.018646012573120964,
|
| 5 |
+
0.01737235312412633,
|
| 6 |
+
0.015005767014458228,
|
| 7 |
+
0.015878045804901708,
|
| 8 |
+
0.01305246714723572
|
| 9 |
+
],
|
| 10 |
+
"recon_loss": [
|
| 11 |
+
0.02027805474271555,
|
| 12 |
+
0.016173136122854335,
|
| 13 |
+
0.014859554402406806,
|
| 14 |
+
0.012985716067138761,
|
| 15 |
+
0.013644954730323879,
|
| 16 |
+
0.011271215657941902
|
| 17 |
+
],
|
| 18 |
+
"l1_loss": [
|
| 19 |
+
0.015994312166035203,
|
| 20 |
+
0.013097935160976952,
|
| 21 |
+
0.012171232466925672,
|
| 22 |
+
0.01153450303169275,
|
| 23 |
+
0.010803141293983007,
|
| 24 |
+
0.010980826322734694
|
| 25 |
+
],
|
| 26 |
+
"sparsity": [
|
| 27 |
+
122.42357081640004,
|
| 28 |
+
101.26598719027427,
|
| 29 |
+
81.48973336193825,
|
| 30 |
+
67.09398575486809,
|
| 31 |
+
54.792241452177294,
|
| 32 |
+
51.434536035610414
|
| 33 |
+
],
|
| 34 |
+
"recon_contribution": [
|
| 35 |
+
0.02027805474271555,
|
| 36 |
+
0.016173136122854335,
|
| 37 |
+
0.014859554402406806,
|
| 38 |
+
0.012985716067138761,
|
| 39 |
+
0.013644954730323879,
|
| 40 |
+
0.011271215657941902
|
| 41 |
+
],
|
| 42 |
+
"l1_contribution": [
|
| 43 |
+
0.0,
|
| 44 |
+
0.0,
|
| 45 |
+
0.0,
|
| 46 |
+
0.0,
|
| 47 |
+
0.0,
|
| 48 |
+
0.0
|
| 49 |
+
],
|
| 50 |
+
"aux_loss": [
|
| 51 |
+
0.000324454694522478,
|
| 52 |
+
0.0005382559986946366,
|
| 53 |
+
0.0005608199303003134,
|
| 54 |
+
0.00048689568389824906,
|
| 55 |
+
0.0005315753970913291,
|
| 56 |
+
0.000449218279342203
|
| 57 |
+
],
|
| 58 |
+
"logit_kl": [
|
| 59 |
+
0.2621940491085504,
|
| 60 |
+
0.19346204612525805,
|
| 61 |
+
0.19519788424354284,
|
| 62 |
+
0.1533155285579215,
|
| 63 |
+
0.1701515712614717,
|
| 64 |
+
0.1332033246496993
|
| 65 |
+
],
|
| 66 |
+
"kl_contribution": [
|
| 67 |
+
0.002621940434891342,
|
| 68 |
+
0.0019346204188037722,
|
| 69 |
+
0.0019519787990913207,
|
| 70 |
+
0.0015331552511537675,
|
| 71 |
+
0.0017015156734655754,
|
| 72 |
+
0.001332033215689189
|
| 73 |
+
],
|
| 74 |
+
"dead_features": [
|
| 75 |
+
0,
|
| 76 |
+
0,
|
| 77 |
+
0,
|
| 78 |
+
0,
|
| 79 |
+
0,
|
| 80 |
+
0
|
| 81 |
+
],
|
| 82 |
+
"dead_feature_percentage": [
|
| 83 |
+
0.0,
|
| 84 |
+
0.0,
|
| 85 |
+
0.0,
|
| 86 |
+
0.0,
|
| 87 |
+
0.0,
|
| 88 |
+
0.0
|
| 89 |
+
],
|
| 90 |
+
"val_loss": [
|
| 91 |
+
0.03434971954482271,
|
| 92 |
+
0.042083952062657914,
|
| 93 |
+
0.03296107299133343,
|
| 94 |
+
0.032812064094659366,
|
| 95 |
+
0.032438541241227116,
|
| 96 |
+
0.028229163100656082
|
| 97 |
+
],
|
| 98 |
+
"val_recon_loss": [
|
| 99 |
+
0.029236371849817155,
|
| 100 |
+
0.034351466901038435,
|
| 101 |
+
0.027217990994031986,
|
| 102 |
+
0.027579497245530927,
|
| 103 |
+
0.02698679796455158,
|
| 104 |
+
0.023628788665512483
|
| 105 |
+
],
|
| 106 |
+
"val_l1_loss": [
|
| 107 |
+
0.01531838950595697,
|
| 108 |
+
0.014435824888188398,
|
| 109 |
+
0.013629610239799349,
|
| 110 |
+
0.013034051475451513,
|
| 111 |
+
0.012736345495756161,
|
| 112 |
+
0.013262153712305656
|
| 113 |
+
],
|
| 114 |
+
"val_sparsity": [
|
| 115 |
+
126.35284980867517,
|
| 116 |
+
113.19826088142308,
|
| 117 |
+
94.04735224098576,
|
| 118 |
+
81.22015007882159,
|
| 119 |
+
69.82908706110426,
|
| 120 |
+
67.10531562656124
|
| 121 |
+
],
|
| 122 |
+
"val_recon_contribution": [
|
| 123 |
+
0.029236371849817155,
|
| 124 |
+
0.034351466901038435,
|
| 125 |
+
0.027217990994031986,
|
| 126 |
+
0.027579497245530927,
|
| 127 |
+
0.02698679796455158,
|
| 128 |
+
0.023628788665512483
|
| 129 |
+
],
|
| 130 |
+
"val_l1_contribution": [
|
| 131 |
+
0.0,
|
| 132 |
+
0.0,
|
| 133 |
+
0.0,
|
| 134 |
+
0.0,
|
| 135 |
+
0.0,
|
| 136 |
+
0.0
|
| 137 |
+
],
|
| 138 |
+
"val_aux_loss": [
|
| 139 |
+
0.0009785492232676361,
|
| 140 |
+
0.0011667576835746548,
|
| 141 |
+
0.000938603805312176,
|
| 142 |
+
0.0009437479296393874,
|
| 143 |
+
0.0009271282813519617,
|
| 144 |
+
0.0008241019077178663
|
| 145 |
+
],
|
| 146 |
+
"val_logit_kl": [
|
| 147 |
+
0.4134798564934178,
|
| 148 |
+
0.6565727608169225,
|
| 149 |
+
0.4804478284277899,
|
| 150 |
+
0.4288818996834625,
|
| 151 |
+
0.4524615113333592,
|
| 152 |
+
0.37762726124871976
|
| 153 |
+
],
|
| 154 |
+
"val_kl_contribution": [
|
| 155 |
+
0.004134798473681703,
|
| 156 |
+
0.0065657274607888024,
|
| 157 |
+
0.0048044781823761455,
|
| 158 |
+
0.004288818901995013,
|
| 159 |
+
0.004524615013399421,
|
| 160 |
+
0.003776272530757929
|
| 161 |
+
]
|
| 162 |
+
}
|
pythia410m_layer0_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0048d4bdfd4a04d9127c7e85a5f26de7c50b118df12388d3106722fcafe0be1
|
| 3 |
+
size 33598101
|
pythia410m_layer12_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "12", "hook_name": "hook_resid_post", "hook_spec": "blocks.12.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer12_exp4/history.json
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.0633780482815014,
|
| 4 |
+
0.040449679832560724,
|
| 5 |
+
0.03699899633362646,
|
| 6 |
+
0.03426451659879686,
|
| 7 |
+
0.03293758990266741,
|
| 8 |
+
0.03207839903093793,
|
| 9 |
+
0.03173348016212078,
|
| 10 |
+
0.031233223596293675,
|
| 11 |
+
0.030805410166522877
|
| 12 |
+
],
|
| 13 |
+
"recon_loss": [
|
| 14 |
+
0.06043631105367669,
|
| 15 |
+
0.038418971138588824,
|
| 16 |
+
0.035141831789564634,
|
| 17 |
+
0.03255485847445303,
|
| 18 |
+
0.031354127378251995,
|
| 19 |
+
0.030511992129525965,
|
| 20 |
+
0.03020807131643182,
|
| 21 |
+
0.02974190070849933,
|
| 22 |
+
0.02936934014974997
|
| 23 |
+
],
|
| 24 |
+
"l1_loss": [
|
| 25 |
+
0.04254397630569831,
|
| 26 |
+
0.03591604542389458,
|
| 27 |
+
0.03335641302631495,
|
| 28 |
+
0.032121884312054746,
|
| 29 |
+
0.031592240415094335,
|
| 30 |
+
0.03115089382712616,
|
| 31 |
+
0.0310382004413152,
|
| 32 |
+
0.03091504763450115,
|
| 33 |
+
0.030861117897349022
|
| 34 |
+
],
|
| 35 |
+
"sparsity": [
|
| 36 |
+
127.99999706455237,
|
| 37 |
+
127.9996121193666,
|
| 38 |
+
127.99637660876377,
|
| 39 |
+
127.98658852135854,
|
| 40 |
+
127.96970221697744,
|
| 41 |
+
127.951625713203,
|
| 42 |
+
127.9493801200022,
|
| 43 |
+
127.94733452147884,
|
| 44 |
+
127.94911084114531
|
| 45 |
+
],
|
| 46 |
+
"recon_contribution": [
|
| 47 |
+
0.06043631105367669,
|
| 48 |
+
0.038418971138588824,
|
| 49 |
+
0.035141831789564634,
|
| 50 |
+
0.03255485847445303,
|
| 51 |
+
0.031354127378251995,
|
| 52 |
+
0.030511992129525965,
|
| 53 |
+
0.03020807131643182,
|
| 54 |
+
0.02974190070849933,
|
| 55 |
+
0.02936934014974997
|
| 56 |
+
],
|
| 57 |
+
"l1_contribution": [
|
| 58 |
+
0.0,
|
| 59 |
+
0.0,
|
| 60 |
+
0.0,
|
| 61 |
+
0.0,
|
| 62 |
+
0.0,
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0
|
| 67 |
+
],
|
| 68 |
+
"aux_loss": [
|
| 69 |
+
0.0013407035204427683,
|
| 70 |
+
0.001206279968894205,
|
| 71 |
+
0.0011090461909263893,
|
| 72 |
+
0.001031540746801869,
|
| 73 |
+
0.0009955380241685131,
|
| 74 |
+
0.0009693856348674246,
|
| 75 |
+
0.0009607186671241843,
|
| 76 |
+
0.0009464948740661037,
|
| 77 |
+
0.0009347905664793055
|
| 78 |
+
],
|
| 79 |
+
"logit_kl": [
|
| 80 |
+
0.16010338188860626,
|
| 81 |
+
0.08244287678397205,
|
| 82 |
+
0.07481183652206579,
|
| 83 |
+
0.06781174056231976,
|
| 84 |
+
0.05879245127995791,
|
| 85 |
+
0.05970212593953549,
|
| 86 |
+
0.0564690228549936,
|
| 87 |
+
0.05448280248077698,
|
| 88 |
+
0.050127942717639214
|
| 89 |
+
],
|
| 90 |
+
"kl_contribution": [
|
| 91 |
+
0.001601033781792148,
|
| 92 |
+
0.0008244287497182284,
|
| 93 |
+
0.0007481183488240035,
|
| 94 |
+
0.0006781173903706248,
|
| 95 |
+
0.0005879244992334175,
|
| 96 |
+
0.0005970212459777497,
|
| 97 |
+
0.0005646902159745458,
|
| 98 |
+
0.0005448280124407546,
|
| 99 |
+
0.0005012794155875479
|
| 100 |
+
],
|
| 101 |
+
"dead_features": [
|
| 102 |
+
0,
|
| 103 |
+
0,
|
| 104 |
+
0,
|
| 105 |
+
0,
|
| 106 |
+
0,
|
| 107 |
+
0,
|
| 108 |
+
0,
|
| 109 |
+
0,
|
| 110 |
+
0
|
| 111 |
+
],
|
| 112 |
+
"dead_feature_percentage": [
|
| 113 |
+
0.0,
|
| 114 |
+
0.0,
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
],
|
| 123 |
+
"val_loss": [
|
| 124 |
+
0.13484913517282204,
|
| 125 |
+
0.08673548322172665,
|
| 126 |
+
0.07260407511397066,
|
| 127 |
+
0.09031630709753802,
|
| 128 |
+
0.069086731533803,
|
| 129 |
+
0.06976089404319591,
|
| 130 |
+
0.06714305356618817,
|
| 131 |
+
0.06708424529826598,
|
| 132 |
+
0.06623545442045628
|
| 133 |
+
],
|
| 134 |
+
"val_recon_loss": [
|
| 135 |
+
0.12502442019018786,
|
| 136 |
+
0.08171022347835101,
|
| 137 |
+
0.06793430633250555,
|
| 138 |
+
0.08208081987802818,
|
| 139 |
+
0.06493967481428035,
|
| 140 |
+
0.06584496814918377,
|
| 141 |
+
0.06334391628806291,
|
| 142 |
+
0.06323459462052392,
|
| 143 |
+
0.0625037070404381
|
| 144 |
+
],
|
| 145 |
+
"val_l1_loss": [
|
| 146 |
+
0.0801361854501688,
|
| 147 |
+
0.06303330405578457,
|
| 148 |
+
0.05338532578408285,
|
| 149 |
+
0.047394623265672356,
|
| 150 |
+
0.044924426564048874,
|
| 151 |
+
0.042197366175273504,
|
| 152 |
+
0.041195685872431505,
|
| 153 |
+
0.04001485846973939,
|
| 154 |
+
0.03953232180102069
|
| 155 |
+
],
|
| 156 |
+
"val_sparsity": [
|
| 157 |
+
127.99999902596099,
|
| 158 |
+
127.9999050225306,
|
| 159 |
+
127.99987431430426,
|
| 160 |
+
127.99977505938242,
|
| 161 |
+
127.99917653494128,
|
| 162 |
+
127.99904358034944,
|
| 163 |
+
127.9994777487062,
|
| 164 |
+
127.997718065774,
|
| 165 |
+
127.9978028799609
|
| 166 |
+
],
|
| 167 |
+
"val_recon_contribution": [
|
| 168 |
+
0.12502442019018786,
|
| 169 |
+
0.08171022347835101,
|
| 170 |
+
0.06793430633250555,
|
| 171 |
+
0.08208081987802818,
|
| 172 |
+
0.06493967481428035,
|
| 173 |
+
0.06584496814918377,
|
| 174 |
+
0.06334391628806291,
|
| 175 |
+
0.06323459462052392,
|
| 176 |
+
0.0625037070404381
|
| 177 |
+
],
|
| 178 |
+
"val_l1_contribution": [
|
| 179 |
+
0.0,
|
| 180 |
+
0.0,
|
| 181 |
+
0.0,
|
| 182 |
+
0.0,
|
| 183 |
+
0.0,
|
| 184 |
+
0.0,
|
| 185 |
+
0.0,
|
| 186 |
+
0.0,
|
| 187 |
+
0.0
|
| 188 |
+
],
|
| 189 |
+
"val_aux_loss": [
|
| 190 |
+
0.003909160225321689,
|
| 191 |
+
0.0025626278714785036,
|
| 192 |
+
0.002135797693944836,
|
| 193 |
+
0.0025796899164746306,
|
| 194 |
+
0.002045745794176429,
|
| 195 |
+
0.0020736640022027946,
|
| 196 |
+
0.001995693062206156,
|
| 197 |
+
0.0019915359528347686,
|
| 198 |
+
0.001970262986344236
|
| 199 |
+
],
|
| 200 |
+
"val_logit_kl": [
|
| 201 |
+
0.5915554641851345,
|
| 202 |
+
0.24626321253831795,
|
| 203 |
+
0.25339712481480303,
|
| 204 |
+
0.5655797486722821,
|
| 205 |
+
0.21013109983773948,
|
| 206 |
+
0.18422620171263834,
|
| 207 |
+
0.18034442895003636,
|
| 208 |
+
0.18581148112418705,
|
| 209 |
+
0.17614844139294697
|
| 210 |
+
],
|
| 211 |
+
"val_kl_contribution": [
|
| 212 |
+
0.005915554506022539,
|
| 213 |
+
0.0024626320693430332,
|
| 214 |
+
0.0025339711937538835,
|
| 215 |
+
0.005655797356085895,
|
| 216 |
+
0.00210131095507153,
|
| 217 |
+
0.001842261975140703,
|
| 218 |
+
0.0018034442500080125,
|
| 219 |
+
0.00185811476983906,
|
| 220 |
+
0.0017614843742361293
|
| 221 |
+
]
|
| 222 |
+
}
|
pythia410m_layer12_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a2255ec8c37ecfcb854ce1ccc53ecab0f64002f18a20d0f865564f16df12a12
|
| 3 |
+
size 33598101
|
pythia410m_layer16_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "16", "hook_name": "hook_resid_post", "hook_spec": "blocks.16.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer16_exp4/history.json
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.08353466329041416,
|
| 4 |
+
0.05369645557594494,
|
| 5 |
+
0.047405824230006895,
|
| 6 |
+
0.044784668386793826,
|
| 7 |
+
0.04229313513422943,
|
| 8 |
+
0.041824697643626625,
|
| 9 |
+
0.04138549756404087,
|
| 10 |
+
0.04064657775593918,
|
| 11 |
+
0.040763516015391764,
|
| 12 |
+
0.040025475279456796
|
| 13 |
+
],
|
| 14 |
+
"recon_loss": [
|
| 15 |
+
0.07973694055004826,
|
| 16 |
+
0.051114708574796386,
|
| 17 |
+
0.04514499670722794,
|
| 18 |
+
0.042655482153812194,
|
| 19 |
+
0.040345438679987855,
|
| 20 |
+
0.0398980563738047,
|
| 21 |
+
0.03948248920712059,
|
| 22 |
+
0.03881170973519773,
|
| 23 |
+
0.038894988390230006,
|
| 24 |
+
0.038235727643523156
|
| 25 |
+
],
|
| 26 |
+
"l1_loss": [
|
| 27 |
+
0.04915676139693998,
|
| 28 |
+
0.04172572852729291,
|
| 29 |
+
0.03947106894602603,
|
| 30 |
+
0.03876089097462094,
|
| 31 |
+
0.038228173823875056,
|
| 32 |
+
0.038097299605831095,
|
| 33 |
+
0.038022073062674265,
|
| 34 |
+
0.03790181738403905,
|
| 35 |
+
0.03772651518740693,
|
| 36 |
+
0.03776252770394141
|
| 37 |
+
],
|
| 38 |
+
"sparsity": [
|
| 39 |
+
127.99964790898096,
|
| 40 |
+
127.99597880368657,
|
| 41 |
+
127.99156529933701,
|
| 42 |
+
127.97838906497574,
|
| 43 |
+
127.96438878331124,
|
| 44 |
+
127.95379145738218,
|
| 45 |
+
127.94487200977582,
|
| 46 |
+
127.93863260854178,
|
| 47 |
+
127.8981907008564,
|
| 48 |
+
127.94249999847689
|
| 49 |
+
],
|
| 50 |
+
"recon_contribution": [
|
| 51 |
+
0.07973694055004826,
|
| 52 |
+
0.051114708574796386,
|
| 53 |
+
0.04514499670722794,
|
| 54 |
+
0.042655482153812194,
|
| 55 |
+
0.040345438679987855,
|
| 56 |
+
0.0398980563738047,
|
| 57 |
+
0.03948248920712059,
|
| 58 |
+
0.03881170973519773,
|
| 59 |
+
0.038894988390230006,
|
| 60 |
+
0.038235727643523156
|
| 61 |
+
],
|
| 62 |
+
"l1_contribution": [
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0,
|
| 67 |
+
0.0,
|
| 68 |
+
0.0,
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0
|
| 73 |
+
],
|
| 74 |
+
"aux_loss": [
|
| 75 |
+
0.0018474537990520372,
|
| 76 |
+
0.0016171679440222363,
|
| 77 |
+
0.001449510196445488,
|
| 78 |
+
0.0013769177987272793,
|
| 79 |
+
0.0013080102823555938,
|
| 80 |
+
0.0012934218953530932,
|
| 81 |
+
0.0012797989406937332,
|
| 82 |
+
0.0012576378080757257,
|
| 83 |
+
0.0012620125437483273,
|
| 84 |
+
0.001237945258316327
|
| 85 |
+
],
|
| 86 |
+
"logit_kl": [
|
| 87 |
+
0.1950268866206957,
|
| 88 |
+
0.09645791333287061,
|
| 89 |
+
0.08113173586054419,
|
| 90 |
+
0.07522684408473476,
|
| 91 |
+
0.06396862018021827,
|
| 92 |
+
0.0633219388218938,
|
| 93 |
+
0.06232094896844032,
|
| 94 |
+
0.05772301857148427,
|
| 95 |
+
0.06065151304642107,
|
| 96 |
+
0.05518023526598573
|
| 97 |
+
],
|
| 98 |
+
"kl_contribution": [
|
| 99 |
+
0.0019502688225744521,
|
| 100 |
+
0.0009645791112404306,
|
| 101 |
+
0.0008113173406807044,
|
| 102 |
+
0.0007522684241525242,
|
| 103 |
+
0.0006396861880489062,
|
| 104 |
+
0.0006332193745150482,
|
| 105 |
+
0.0006232094753783651,
|
| 106 |
+
0.0005772301725555273,
|
| 107 |
+
0.0006065151165585484,
|
| 108 |
+
0.0005518023404650354
|
| 109 |
+
],
|
| 110 |
+
"dead_features": [
|
| 111 |
+
0,
|
| 112 |
+
0,
|
| 113 |
+
0,
|
| 114 |
+
0,
|
| 115 |
+
0,
|
| 116 |
+
0,
|
| 117 |
+
0,
|
| 118 |
+
0,
|
| 119 |
+
0,
|
| 120 |
+
0
|
| 121 |
+
],
|
| 122 |
+
"dead_feature_percentage": [
|
| 123 |
+
0.0,
|
| 124 |
+
0.0,
|
| 125 |
+
0.0,
|
| 126 |
+
0.0,
|
| 127 |
+
0.0,
|
| 128 |
+
0.0,
|
| 129 |
+
0.0,
|
| 130 |
+
0.0,
|
| 131 |
+
0.0,
|
| 132 |
+
0.0
|
| 133 |
+
],
|
| 134 |
+
"val_loss": [
|
| 135 |
+
0.12729294324079626,
|
| 136 |
+
0.10341824259421112,
|
| 137 |
+
0.09583418694972613,
|
| 138 |
+
0.09145536595301322,
|
| 139 |
+
0.09011536416026854,
|
| 140 |
+
0.08824515689006994,
|
| 141 |
+
0.08703586160595013,
|
| 142 |
+
0.09029735316747105,
|
| 143 |
+
0.08669550285447407,
|
| 144 |
+
0.08668003849658111
|
| 145 |
+
],
|
| 146 |
+
"val_recon_loss": [
|
| 147 |
+
0.12030711326272528,
|
| 148 |
+
0.09713160396062315,
|
| 149 |
+
0.09056324481300634,
|
| 150 |
+
0.08660473329933704,
|
| 151 |
+
0.08515992711151339,
|
| 152 |
+
0.08358545409687293,
|
| 153 |
+
0.08249965653344751,
|
| 154 |
+
0.08519215440832041,
|
| 155 |
+
0.08217771135284135,
|
| 156 |
+
0.08209042393368436
|
| 157 |
+
],
|
| 158 |
+
"val_l1_loss": [
|
| 159 |
+
0.08187440372174136,
|
| 160 |
+
0.06497470241739012,
|
| 161 |
+
0.058365051371842176,
|
| 162 |
+
0.053752636876310995,
|
| 163 |
+
0.05160476077265059,
|
| 164 |
+
0.05018334892806493,
|
| 165 |
+
0.04871046773307381,
|
| 166 |
+
0.048060337744182696,
|
| 167 |
+
0.047186675193342555,
|
| 168 |
+
0.04675913578841238
|
| 169 |
+
],
|
| 170 |
+
"val_sparsity": [
|
| 171 |
+
127.99992907886774,
|
| 172 |
+
127.99915684618121,
|
| 173 |
+
127.99892826591606,
|
| 174 |
+
127.99672018533835,
|
| 175 |
+
127.99829922736313,
|
| 176 |
+
127.99585862530193,
|
| 177 |
+
127.98862459008556,
|
| 178 |
+
127.99453963089509,
|
| 179 |
+
127.99135453372801,
|
| 180 |
+
127.99394974108449
|
| 181 |
+
],
|
| 182 |
+
"val_recon_contribution": [
|
| 183 |
+
0.12030711326272528,
|
| 184 |
+
0.09713160396062315,
|
| 185 |
+
0.09056324481300634,
|
| 186 |
+
0.08660473329933704,
|
| 187 |
+
0.08515992711151339,
|
| 188 |
+
0.08358545409687293,
|
| 189 |
+
0.08249965653344751,
|
| 190 |
+
0.08519215440832041,
|
| 191 |
+
0.08217771135284135,
|
| 192 |
+
0.08209042393368436
|
| 193 |
+
],
|
| 194 |
+
"val_l1_contribution": [
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0,
|
| 199 |
+
0.0,
|
| 200 |
+
0.0,
|
| 201 |
+
0.0,
|
| 202 |
+
0.0,
|
| 203 |
+
0.0,
|
| 204 |
+
0.0
|
| 205 |
+
],
|
| 206 |
+
"val_aux_loss": [
|
| 207 |
+
0.003767186209488777,
|
| 208 |
+
0.0030674240877703748,
|
| 209 |
+
0.0028675942471113798,
|
| 210 |
+
0.0027543867477319494,
|
| 211 |
+
0.0027067201603555215,
|
| 212 |
+
0.002650021515729101,
|
| 213 |
+
0.002624100405593377,
|
| 214 |
+
0.0027113321562234594,
|
| 215 |
+
0.002617328412975119,
|
| 216 |
+
0.002601935845993964
|
| 217 |
+
],
|
| 218 |
+
"val_logit_kl": [
|
| 219 |
+
0.32186438839566106,
|
| 220 |
+
0.3219214724050983,
|
| 221 |
+
0.24033480349154648,
|
| 222 |
+
0.20962458871464087,
|
| 223 |
+
0.22487169409890925,
|
| 224 |
+
0.20096812819023666,
|
| 225 |
+
0.19121047352584097,
|
| 226 |
+
0.2393866624295549,
|
| 227 |
+
0.19004631119321985,
|
| 228 |
+
0.19876787712503866
|
| 229 |
+
],
|
| 230 |
+
"val_kl_contribution": [
|
| 231 |
+
0.003218643812694143,
|
| 232 |
+
0.003219214650755346,
|
| 233 |
+
0.002403347981931419,
|
| 234 |
+
0.002096245839181286,
|
| 235 |
+
0.0022487168889549873,
|
| 236 |
+
0.0020096812358551086,
|
| 237 |
+
0.0019121046914511285,
|
| 238 |
+
0.002393866570213465,
|
| 239 |
+
0.0019004630685057397,
|
| 240 |
+
0.0019876787269919364
|
| 241 |
+
]
|
| 242 |
+
}
|
pythia410m_layer16_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b643fa696e34bdbb77a9a4bd9dad0e20373dba13ac300673d364407885ffdb43
|
| 3 |
+
size 33598101
|
pythia410m_layer20_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "20", "hook_name": "hook_resid_post", "hook_spec": "blocks.20.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer20_exp4/history.json
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.16211504342132493,
|
| 4 |
+
0.10067129953884386,
|
| 5 |
+
0.09280351666689676,
|
| 6 |
+
0.08699335354446229,
|
| 7 |
+
0.08336949619057228,
|
| 8 |
+
0.08215251792654964,
|
| 9 |
+
0.08116990813187673,
|
| 10 |
+
0.08045406652759834,
|
| 11 |
+
0.08055890680032152
|
| 12 |
+
],
|
| 13 |
+
"recon_loss": [
|
| 14 |
+
0.15564783398880067,
|
| 15 |
+
0.09632656519755911,
|
| 16 |
+
0.088733848639867,
|
| 17 |
+
0.08320554298688464,
|
| 18 |
+
0.07976921046399486,
|
| 19 |
+
0.07864626789610907,
|
| 20 |
+
0.07771860353459356,
|
| 21 |
+
0.07706233202657255,
|
| 22 |
+
0.07715556492941106
|
| 23 |
+
],
|
| 24 |
+
"l1_loss": [
|
| 25 |
+
0.07019442083067286,
|
| 26 |
+
0.05875471460306764,
|
| 27 |
+
0.05630684493890475,
|
| 28 |
+
0.05551435578844346,
|
| 29 |
+
0.05522927838443574,
|
| 30 |
+
0.054897712533465315,
|
| 31 |
+
0.05467336115591269,
|
| 32 |
+
0.054394984066810884,
|
| 33 |
+
0.053900332916899836
|
| 34 |
+
],
|
| 35 |
+
"sparsity": [
|
| 36 |
+
127.99681294505262,
|
| 37 |
+
127.9055342042511,
|
| 38 |
+
127.69139970062865,
|
| 39 |
+
127.68776041020499,
|
| 40 |
+
127.68349952490057,
|
| 41 |
+
127.6114986890457,
|
| 42 |
+
127.64406834929478,
|
| 43 |
+
127.68963794050545,
|
| 44 |
+
127.65165958993015
|
| 45 |
+
],
|
| 46 |
+
"recon_contribution": [
|
| 47 |
+
0.15564783398880067,
|
| 48 |
+
0.09632656519755911,
|
| 49 |
+
0.088733848639867,
|
| 50 |
+
0.08320554298688464,
|
| 51 |
+
0.07976921046399486,
|
| 52 |
+
0.07864626789610907,
|
| 53 |
+
0.07771860353459356,
|
| 54 |
+
0.07706233202657255,
|
| 55 |
+
0.07715556492941106
|
| 56 |
+
],
|
| 57 |
+
"l1_contribution": [
|
| 58 |
+
0.0,
|
| 59 |
+
0.0,
|
| 60 |
+
0.0,
|
| 61 |
+
0.0,
|
| 62 |
+
0.0,
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0
|
| 67 |
+
],
|
| 68 |
+
"aux_loss": [
|
| 69 |
+
0.003619418565862932,
|
| 70 |
+
0.0031015758182703027,
|
| 71 |
+
0.0029331032825763185,
|
| 72 |
+
0.002770557122376026,
|
| 73 |
+
0.0026508612855516726,
|
| 74 |
+
0.002613636069342041,
|
| 75 |
+
0.0025777324663133726,
|
| 76 |
+
0.0025438382175684265,
|
| 77 |
+
0.002547697336522832
|
| 78 |
+
],
|
| 79 |
+
"logit_kl": [
|
| 80 |
+
0.2847791214840328,
|
| 81 |
+
0.12431586245345307,
|
| 82 |
+
0.1136564812107441,
|
| 83 |
+
0.10172533851282539,
|
| 84 |
+
0.0949424416005963,
|
| 85 |
+
0.08926140512094687,
|
| 86 |
+
0.08735721425225995,
|
| 87 |
+
0.08478963147754191,
|
| 88 |
+
0.08556444501967428
|
| 89 |
+
],
|
| 90 |
+
"kl_contribution": [
|
| 91 |
+
0.0028477911476770326,
|
| 92 |
+
0.0012431585956815953,
|
| 93 |
+
0.001136564786498183,
|
| 94 |
+
0.001017253363524866,
|
| 95 |
+
0.000949424394834643,
|
| 96 |
+
0.0008926140309661963,
|
| 97 |
+
0.0008735721221158493,
|
| 98 |
+
0.0008478962964973156,
|
| 99 |
+
0.0008556444317649331
|
| 100 |
+
],
|
| 101 |
+
"dead_features": [
|
| 102 |
+
0,
|
| 103 |
+
0,
|
| 104 |
+
0,
|
| 105 |
+
0,
|
| 106 |
+
0,
|
| 107 |
+
0,
|
| 108 |
+
0,
|
| 109 |
+
0,
|
| 110 |
+
0
|
| 111 |
+
],
|
| 112 |
+
"dead_feature_percentage": [
|
| 113 |
+
0.0,
|
| 114 |
+
0.0,
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
],
|
| 123 |
+
"val_loss": [
|
| 124 |
+
0.2301682496696426,
|
| 125 |
+
0.20511982654276786,
|
| 126 |
+
0.19695799923360158,
|
| 127 |
+
0.18786643335984166,
|
| 128 |
+
0.18302077987061638,
|
| 129 |
+
0.18684740998070926,
|
| 130 |
+
0.17843776434890143,
|
| 131 |
+
0.19148184931153658,
|
| 132 |
+
0.17577280922336289
|
| 133 |
+
],
|
| 134 |
+
"val_recon_loss": [
|
| 135 |
+
0.21850402330956856,
|
| 136 |
+
0.1953204816401086,
|
| 137 |
+
0.18739983046775188,
|
| 138 |
+
0.17901436925130723,
|
| 139 |
+
0.17441177826166587,
|
| 140 |
+
0.17760054877654957,
|
| 141 |
+
0.17005954135458126,
|
| 142 |
+
0.17914778125497333,
|
| 143 |
+
0.16759699058703323
|
| 144 |
+
],
|
| 145 |
+
"val_l1_loss": [
|
| 146 |
+
0.10386819195261277,
|
| 147 |
+
0.08928536385575353,
|
| 148 |
+
0.08236716298224817,
|
| 149 |
+
0.07651199199858442,
|
| 150 |
+
0.07403400410702964,
|
| 151 |
+
0.07176928357582968,
|
| 152 |
+
0.07048508180800431,
|
| 153 |
+
0.06899321687723874,
|
| 154 |
+
0.06810225372874211
|
| 155 |
+
],
|
| 156 |
+
"val_sparsity": [
|
| 157 |
+
127.9975695958846,
|
| 158 |
+
127.98939837915472,
|
| 159 |
+
127.97696320439728,
|
| 160 |
+
127.96649647235654,
|
| 161 |
+
127.9721652407475,
|
| 162 |
+
127.97337379784868,
|
| 163 |
+
127.9734211548628,
|
| 164 |
+
127.96097953272971,
|
| 165 |
+
127.98348645457675
|
| 166 |
+
],
|
| 167 |
+
"val_recon_contribution": [
|
| 168 |
+
0.21850402330956856,
|
| 169 |
+
0.1953204816401086,
|
| 170 |
+
0.18739983046775188,
|
| 171 |
+
0.17901436925130723,
|
| 172 |
+
0.17441177826166587,
|
| 173 |
+
0.17760054877654957,
|
| 174 |
+
0.17005954135458126,
|
| 175 |
+
0.17914778125497333,
|
| 176 |
+
0.16759699058703323
|
| 177 |
+
],
|
| 178 |
+
"val_l1_contribution": [
|
| 179 |
+
0.0,
|
| 180 |
+
0.0,
|
| 181 |
+
0.0,
|
| 182 |
+
0.0,
|
| 183 |
+
0.0,
|
| 184 |
+
0.0,
|
| 185 |
+
0.0,
|
| 186 |
+
0.0,
|
| 187 |
+
0.0
|
| 188 |
+
],
|
| 189 |
+
"val_aux_loss": [
|
| 190 |
+
0.0069029904664602,
|
| 191 |
+
0.0062348846392050175,
|
| 192 |
+
0.006025284951339488,
|
| 193 |
+
0.005700396759133415,
|
| 194 |
+
0.005613630053175546,
|
| 195 |
+
0.005654315462770505,
|
| 196 |
+
0.005434500712242107,
|
| 197 |
+
0.005724624889534219,
|
| 198 |
+
0.005348385494034814
|
| 199 |
+
],
|
| 200 |
+
"val_logit_kl": [
|
| 201 |
+
0.4761236185027707,
|
| 202 |
+
0.35644603614060244,
|
| 203 |
+
0.3532883652411012,
|
| 204 |
+
0.3151667239698753,
|
| 205 |
+
0.2995371618432952,
|
| 206 |
+
0.3592545676846116,
|
| 207 |
+
0.2943722312150355,
|
| 208 |
+
0.6609443491667523,
|
| 209 |
+
0.2827433166836728
|
| 210 |
+
],
|
| 211 |
+
"val_kl_contribution": [
|
| 212 |
+
0.004761236077995428,
|
| 213 |
+
0.003564460284901958,
|
| 214 |
+
0.0035328835714714215,
|
| 215 |
+
0.0031516671700970943,
|
| 216 |
+
0.0029953715524163142,
|
| 217 |
+
0.003592545598765868,
|
| 218 |
+
0.002943722245873488,
|
| 219 |
+
0.0066094433456982595,
|
| 220 |
+
0.0028274331021233496
|
| 221 |
+
]
|
| 222 |
+
}
|
pythia410m_layer20_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ab7f4748676278f9c95c91c807c83980ef9ff77e2521c75b75a33d062556064
|
| 3 |
+
size 33598101
|
pythia410m_layer23_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "23", "hook_name": "hook_resid_post", "hook_spec": "blocks.23.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer23_exp4/history.json
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.19471221902794772,
|
| 4 |
+
0.1359062709891239,
|
| 5 |
+
0.1277088505231155,
|
| 6 |
+
0.12214199725044941,
|
| 7 |
+
0.1202180515805736,
|
| 8 |
+
0.11676322992978966,
|
| 9 |
+
0.115552180465277,
|
| 10 |
+
0.11407808340687015,
|
| 11 |
+
0.11555235020938023
|
| 12 |
+
],
|
| 13 |
+
"recon_loss": [
|
| 14 |
+
0.19232082875029685,
|
| 15 |
+
0.13457462265614265,
|
| 16 |
+
0.12647040038050142,
|
| 17 |
+
0.12096263629007416,
|
| 18 |
+
0.11905944195428961,
|
| 19 |
+
0.11564715564785062,
|
| 20 |
+
0.11444143741687392,
|
| 21 |
+
0.11299694314457057,
|
| 22 |
+
0.11445592510464944
|
| 23 |
+
],
|
| 24 |
+
"l1_loss": [
|
| 25 |
+
0.055780474241897325,
|
| 26 |
+
0.05456285348868575,
|
| 27 |
+
0.05571047466133337,
|
| 28 |
+
0.05619010674731701,
|
| 29 |
+
0.056660164546512216,
|
| 30 |
+
0.056776592727689694,
|
| 31 |
+
0.056775683729813016,
|
| 32 |
+
0.05716668963668946,
|
| 33 |
+
0.05740164942741935
|
| 34 |
+
],
|
| 35 |
+
"sparsity": [
|
| 36 |
+
127.99998547161123,
|
| 37 |
+
127.99983322988189,
|
| 38 |
+
127.99986248050973,
|
| 39 |
+
127.99983973771388,
|
| 40 |
+
127.99978587155766,
|
| 41 |
+
127.9997793706489,
|
| 42 |
+
127.99977200087531,
|
| 43 |
+
127.99970554483347,
|
| 44 |
+
127.99962357038162
|
| 45 |
+
],
|
| 46 |
+
"recon_contribution": [
|
| 47 |
+
0.19232082875029685,
|
| 48 |
+
0.13457462265614265,
|
| 49 |
+
0.12647040038050142,
|
| 50 |
+
0.12096263629007416,
|
| 51 |
+
0.11905944195428961,
|
| 52 |
+
0.11564715564785062,
|
| 53 |
+
0.11444143741687392,
|
| 54 |
+
0.11299694314457057,
|
| 55 |
+
0.11445592510464944
|
| 56 |
+
],
|
| 57 |
+
"l1_contribution": [
|
| 58 |
+
0.0,
|
| 59 |
+
0.0,
|
| 60 |
+
0.0,
|
| 61 |
+
0.0,
|
| 62 |
+
0.0,
|
| 63 |
+
0.0,
|
| 64 |
+
0.0,
|
| 65 |
+
0.0,
|
| 66 |
+
0.0
|
| 67 |
+
],
|
| 68 |
+
"aux_loss": [
|
| 69 |
+
0.0,
|
| 70 |
+
0.0,
|
| 71 |
+
0.0,
|
| 72 |
+
0.0,
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0
|
| 78 |
+
],
|
| 79 |
+
"logit_kl": [
|
| 80 |
+
0.2391390282738474,
|
| 81 |
+
0.13316483657803732,
|
| 82 |
+
0.12384502456427135,
|
| 83 |
+
0.11793610287539512,
|
| 84 |
+
0.11586095334475222,
|
| 85 |
+
0.1116074294151025,
|
| 86 |
+
0.11107431043923145,
|
| 87 |
+
0.10811403353433184,
|
| 88 |
+
0.10964251046631816
|
| 89 |
+
],
|
| 90 |
+
"kl_contribution": [
|
| 91 |
+
0.002391390229333737,
|
| 92 |
+
0.0013316483353383475,
|
| 93 |
+
0.0012384502178298042,
|
| 94 |
+
0.0011793610017333423,
|
| 95 |
+
0.0011586095068843345,
|
| 96 |
+
0.001116074269209412,
|
| 97 |
+
0.0011107430793556256,
|
| 98 |
+
0.001081140311065653,
|
| 99 |
+
0.0010964250803543526
|
| 100 |
+
],
|
| 101 |
+
"dead_features": [
|
| 102 |
+
0,
|
| 103 |
+
0,
|
| 104 |
+
0,
|
| 105 |
+
0,
|
| 106 |
+
0,
|
| 107 |
+
0,
|
| 108 |
+
0,
|
| 109 |
+
0,
|
| 110 |
+
0
|
| 111 |
+
],
|
| 112 |
+
"dead_feature_percentage": [
|
| 113 |
+
0.0,
|
| 114 |
+
0.0,
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0,
|
| 120 |
+
0.0,
|
| 121 |
+
0.0
|
| 122 |
+
],
|
| 123 |
+
"val_loss": [
|
| 124 |
+
0.316197270421698,
|
| 125 |
+
0.2933344059487951,
|
| 126 |
+
0.2857086572056464,
|
| 127 |
+
0.2835407973804944,
|
| 128 |
+
0.277085559516265,
|
| 129 |
+
0.2770049967166241,
|
| 130 |
+
0.27574585438763105,
|
| 131 |
+
0.2738202984551851,
|
| 132 |
+
0.2707638205966317
|
| 133 |
+
],
|
| 134 |
+
"val_recon_loss": [
|
| 135 |
+
0.31154836239678274,
|
| 136 |
+
0.289137319403987,
|
| 137 |
+
0.2816178682296182,
|
| 138 |
+
0.2795479790337624,
|
| 139 |
+
0.2732222687210998,
|
| 140 |
+
0.2730984420010795,
|
| 141 |
+
0.2718467891338141,
|
| 142 |
+
0.2699995644996514,
|
| 143 |
+
0.2669640527543075
|
| 144 |
+
],
|
| 145 |
+
"val_l1_loss": [
|
| 146 |
+
0.06124183855974528,
|
| 147 |
+
0.06370097175849833,
|
| 148 |
+
0.0660762913653738,
|
| 149 |
+
0.06600362401978636,
|
| 150 |
+
0.06667904545303802,
|
| 151 |
+
0.06654060923615839,
|
| 152 |
+
0.06765059681811532,
|
| 153 |
+
0.06747716691671637,
|
| 154 |
+
0.06754671838955412
|
| 155 |
+
],
|
| 156 |
+
"val_sparsity": [
|
| 157 |
+
127.99999902596099,
|
| 158 |
+
127.99998284512787,
|
| 159 |
+
127.99997265411474,
|
| 160 |
+
127.99988684509083,
|
| 161 |
+
127.99996130188072,
|
| 162 |
+
127.99994086092681,
|
| 163 |
+
127.99990802090691,
|
| 164 |
+
127.99997085855529,
|
| 165 |
+
127.99995167587953
|
| 166 |
+
],
|
| 167 |
+
"val_recon_contribution": [
|
| 168 |
+
0.31154836239678274,
|
| 169 |
+
0.289137319403987,
|
| 170 |
+
0.2816178682296182,
|
| 171 |
+
0.2795479790337624,
|
| 172 |
+
0.2732222687210998,
|
| 173 |
+
0.2730984420010795,
|
| 174 |
+
0.2718467891338141,
|
| 175 |
+
0.2699995644996514,
|
| 176 |
+
0.2669640527543075
|
| 177 |
+
],
|
| 178 |
+
"val_l1_contribution": [
|
| 179 |
+
0.0,
|
| 180 |
+
0.0,
|
| 181 |
+
0.0,
|
| 182 |
+
0.0,
|
| 183 |
+
0.0,
|
| 184 |
+
0.0,
|
| 185 |
+
0.0,
|
| 186 |
+
0.0,
|
| 187 |
+
0.0
|
| 188 |
+
],
|
| 189 |
+
"val_aux_loss": [
|
| 190 |
+
0.0,
|
| 191 |
+
0.0,
|
| 192 |
+
0.0,
|
| 193 |
+
0.0,
|
| 194 |
+
0.0,
|
| 195 |
+
0.0,
|
| 196 |
+
0.0,
|
| 197 |
+
0.0,
|
| 198 |
+
0.0
|
| 199 |
+
],
|
| 200 |
+
"val_logit_kl": [
|
| 201 |
+
0.46489082612283444,
|
| 202 |
+
0.41970864466743973,
|
| 203 |
+
0.4090789091696744,
|
| 204 |
+
0.3992818322054693,
|
| 205 |
+
0.3863290916419148,
|
| 206 |
+
0.3906554739206339,
|
| 207 |
+
0.3899065362410565,
|
| 208 |
+
0.38207343585284825,
|
| 209 |
+
0.379976795738337
|
| 210 |
+
],
|
| 211 |
+
"val_kl_contribution": [
|
| 212 |
+
0.0046489081529668555,
|
| 213 |
+
0.004197086352757223,
|
| 214 |
+
0.004090789005092419,
|
| 215 |
+
0.0039928182316654156,
|
| 216 |
+
0.003863290828857478,
|
| 217 |
+
0.003906554652470842,
|
| 218 |
+
0.0038990652727258087,
|
| 219 |
+
0.0038207342707171615,
|
| 220 |
+
0.003799767870171844
|
| 221 |
+
]
|
| 222 |
+
}
|
pythia410m_layer23_exp4/weights.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6448973ed9c9c14ba995c7aea4ac5d33682a57aba29202332d3846c72fd55572
|
| 3 |
+
size 33598101
|
pythia410m_layer4_exp4/config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"d_in": 1024, "d_sae": 4096, "l1_coefficient": 0.0, "dtype": "float32", "device": "cuda", "hook_layer": "4", "hook_name": "hook_resid_post", "hook_spec": "blocks.4.hook_resid_post", "top_k": 128, "top_k_aux": 64, "n_batches_to_dead": 200, "aux_penalty": 0.03125}
|
pythia410m_layer4_exp4/history.json
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"loss": [
|
| 3 |
+
0.029894512988028612,
|
| 4 |
+
0.018774982353548998,
|
| 5 |
+
0.01757451586409247,
|
| 6 |
+
0.016827763248608532,
|
| 7 |
+
0.01671755956206978
|
| 8 |
+
],
|
| 9 |
+
"recon_loss": [
|
| 10 |
+
0.028509802402828747,
|
| 11 |
+
0.017858383666847354,
|
| 12 |
+
0.016714493958947134,
|
| 13 |
+
0.016002113766082564,
|
| 14 |
+
0.015895764886641607
|
| 15 |
+
],
|
| 16 |
+
"l1_loss": [
|
| 17 |
+
0.030895500990668626,
|
| 18 |
+
0.02851335875632515,
|
| 19 |
+
0.027246419608275223,
|
| 20 |
+
0.02655725095407554,
|
| 21 |
+
0.026199420892925908
|
| 22 |
+
],
|
| 23 |
+
"sparsity": [
|
| 24 |
+
127.86990778762069,
|
| 25 |
+
127.20474829215104,
|
| 26 |
+
126.57427332924844,
|
| 27 |
+
126.12415448377439,
|
| 28 |
+
126.17486108409514
|
| 29 |
+
],
|
| 30 |
+
"recon_contribution": [
|
| 31 |
+
0.028509802402828747,
|
| 32 |
+
0.017858383666847354,
|
| 33 |
+
0.016714493958947134,
|
| 34 |
+
0.016002113766082564,
|
| 35 |
+
0.015895764886641607
|
| 36 |
+
],
|
| 37 |
+
"l1_contribution": [
|
| 38 |
+
0.0,
|
| 39 |
+
0.0,
|
| 40 |
+
0.0,
|
| 41 |
+
0.0,
|
| 42 |
+
0.0
|
| 43 |
+
],
|
| 44 |
+
"aux_loss": [
|
| 45 |
+
0.0006074781647182751,
|
| 46 |
+
0.0005631423298345148,
|
| 47 |
+
0.0005285829312475791,
|
| 48 |
+
0.000506121144354717,
|
| 49 |
+
0.0005024663806066721
|
| 50 |
+
],
|
| 51 |
+
"logit_kl": [
|
| 52 |
+
0.07772324425328908,
|
| 53 |
+
0.03534563696527424,
|
| 54 |
+
0.03314389820006117,
|
| 55 |
+
0.03195283477390675,
|
| 56 |
+
0.03193282936116555
|
| 57 |
+
],
|
| 58 |
+
"kl_contribution": [
|
| 59 |
+
0.0007772324256051311,
|
| 60 |
+
0.0003534563615086878,
|
| 61 |
+
0.0003314389744292581,
|
| 62 |
+
0.0003195283404557151,
|
| 63 |
+
0.0003193282865419601
|
| 64 |
+
],
|
| 65 |
+
"dead_features": [
|
| 66 |
+
0,
|
| 67 |
+
0,
|
| 68 |
+
0,
|
| 69 |
+
0,
|
| 70 |
+
0
|
| 71 |
+
],
|
| 72 |
+
"dead_feature_percentage": [
|
| 73 |
+
0.0,
|
| 74 |
+
0.0,
|
| 75 |
+
0.0,
|
| 76 |
+
0.0,
|
| 77 |
+
0.0
|
| 78 |
+
],
|
| 79 |
+
"val_loss": [
|
| 80 |
+
0.05225575266271383,
|
| 81 |
+
0.04949509506847968,
|
| 82 |
+
0.04847414334131568,
|
| 83 |
+
0.04836168864751231,
|
| 84 |
+
0.04753865694363975
|
| 85 |
+
],
|
| 86 |
+
"val_recon_loss": [
|
| 87 |
+
0.04925031143315526,
|
| 88 |
+
0.04667543858985354,
|
| 89 |
+
0.04570288170486188,
|
| 90 |
+
0.04559107506220338,
|
| 91 |
+
0.044836636487968345
|
| 92 |
+
],
|
| 93 |
+
"val_l1_loss": [
|
| 94 |
+
0.030398870868670134,
|
| 95 |
+
0.029119303775803167,
|
| 96 |
+
0.028552788553902475,
|
| 97 |
+
0.028373856366574357,
|
| 98 |
+
0.028430795825100497
|
| 99 |
+
],
|
| 100 |
+
"val_sparsity": [
|
| 101 |
+
127.99124588504914,
|
| 102 |
+
127.91639768746916,
|
| 103 |
+
127.88443639288161,
|
| 104 |
+
127.73844488508752,
|
| 105 |
+
127.8981660100234
|
| 106 |
+
],
|
| 107 |
+
"val_recon_contribution": [
|
| 108 |
+
0.04925031143315526,
|
| 109 |
+
0.04667543858985354,
|
| 110 |
+
0.04570288170486188,
|
| 111 |
+
0.04559107506220338,
|
| 112 |
+
0.044836636487968345
|
| 113 |
+
],
|
| 114 |
+
"val_l1_contribution": [
|
| 115 |
+
0.0,
|
| 116 |
+
0.0,
|
| 117 |
+
0.0,
|
| 118 |
+
0.0,
|
| 119 |
+
0.0
|
| 120 |
+
],
|
| 121 |
+
"val_aux_loss": [
|
| 122 |
+
0.0015425308895145443,
|
| 123 |
+
0.0014635327424499028,
|
| 124 |
+
0.0014332294819043186,
|
| 125 |
+
0.0014288228828355674,
|
| 126 |
+
0.0014068659569972705
|
| 127 |
+
],
|
| 128 |
+
"val_logit_kl": [
|
| 129 |
+
0.14629103523297832,
|
| 130 |
+
0.1356123878651454,
|
| 131 |
+
0.133803220682425,
|
| 132 |
+
0.1341790673617055,
|
| 133 |
+
0.1295154520311731
|
| 134 |
+
],
|
| 135 |
+
"val_kl_contribution": [
|
| 136 |
+
0.0014629103198525002,
|
| 137 |
+
0.0013561238491139088,
|
| 138 |
+
0.0013380321782384967,
|
| 139 |
+
0.0013417906447417182,
|
| 140 |
+
0.0012951544919965203
|
| 141 |
+
]
|
| 142 |
+
}
|