Davide Ghilardi commited on
Commit
f14e5c8
Β·
1 Parent(s): 47db9ec
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. baseline/.DS_Store +0 -0
  3. {step_7629/k1-c0 β†’ baseline/0}/cfg.json +0 -0
  4. {step_7629/layers.0 β†’ baseline/0}/sae.safetensors +0 -0
  5. {step_7629/k2-c0 β†’ baseline/1}/cfg.json +0 -0
  6. {step_7629/layers.1 β†’ baseline/1}/sae.safetensors +0 -0
  7. {step_7629/k2-c1 β†’ baseline/10}/cfg.json +0 -0
  8. {step_7629/layers.10 β†’ baseline/10}/sae.safetensors +0 -0
  9. {step_7629/k3-c0 β†’ baseline/11}/cfg.json +0 -0
  10. {step_7629/layers.11 β†’ baseline/11}/sae.safetensors +0 -0
  11. {step_7629/k3-c1 β†’ baseline/2}/cfg.json +0 -0
  12. {step_7629/layers.2 β†’ baseline/2}/sae.safetensors +0 -0
  13. {step_7629/k4-c0 β†’ baseline/3}/cfg.json +0 -0
  14. {step_7629/layers.3 β†’ baseline/3}/sae.safetensors +0 -0
  15. {step_7629/k4-c1 β†’ baseline/4}/cfg.json +0 -0
  16. {step_7629/layers.4 β†’ baseline/4}/sae.safetensors +0 -0
  17. {step_7629/k5-c0 β†’ baseline/5}/cfg.json +0 -0
  18. {step_7629/layers.5 β†’ baseline/5}/sae.safetensors +0 -0
  19. {step_7629/k5-c1 β†’ baseline/6}/cfg.json +0 -0
  20. {step_7629/layers.6 β†’ baseline/6}/sae.safetensors +0 -0
  21. {step_7629/k6-c0 β†’ baseline/7}/cfg.json +0 -0
  22. {step_7629/layers.7 β†’ baseline/7}/sae.safetensors +0 -0
  23. {step_7629/layers.0 β†’ baseline/8}/cfg.json +0 -0
  24. {step_7629/layers.8 β†’ baseline/8}/sae.safetensors +0 -0
  25. {step_7629/layers.1 β†’ baseline/9}/cfg.json +0 -0
  26. {step_7629/layers.9 β†’ baseline/9}/sae.safetensors +0 -0
  27. cluster/.DS_Store +0 -0
  28. {step_7629/layers.10 β†’ cluster/0-1}/cfg.json +0 -0
  29. {step_7629/k6-c0 β†’ cluster/0-1}/sae.safetensors +0 -0
  30. {step_7629/layers.11 β†’ cluster/0-10}/cfg.json +0 -0
  31. {step_7629/k1-c0 β†’ cluster/0-10}/sae.safetensors +0 -0
  32. {step_7629/layers.2 β†’ cluster/0-2}/cfg.json +0 -0
  33. {step_7629/k3-c1 β†’ cluster/0-2}/sae.safetensors +0 -0
  34. {step_7629/layers.3 β†’ cluster/0-6}/cfg.json +0 -0
  35. {step_7629/k2-c0 β†’ cluster/0-6}/sae.safetensors +0 -0
  36. {step_7629/layers.4 β†’ cluster/3-4}/cfg.json +0 -0
  37. {step_7629/k5-c1 β†’ cluster/3-4}/sae.safetensors +0 -0
  38. {step_7629/layers.5 β†’ cluster/3-6}/cfg.json +0 -0
  39. {step_7629/k3-c0 β†’ cluster/3-6}/sae.safetensors +0 -0
  40. {step_7629/layers.6 β†’ cluster/5-6}/cfg.json +0 -0
  41. {step_7629/k5-c0 β†’ cluster/5-6}/sae.safetensors +0 -0
  42. {step_7629/layers.7 β†’ cluster/7-10}/cfg.json +0 -0
  43. {step_7629/k2-c1 β†’ cluster/7-10}/sae.safetensors +0 -0
  44. {step_7629/layers.8 β†’ cluster/7-8}/cfg.json +0 -0
  45. {step_7629/k4-c0 β†’ cluster/7-8}/sae.safetensors +0 -0
  46. {step_7629/layers.9 β†’ cluster/9-10}/cfg.json +0 -0
  47. {step_7629/k4-c1 β†’ cluster/9-10}/sae.safetensors +0 -0
  48. cluster/config.json +268 -0
  49. step_7629/scaling_factors.pt β†’ scaling_factors.pt +0 -0
  50. step_7629/config.json +0 -1
.DS_Store ADDED
Binary file (8.2 kB). View file
 
baseline/.DS_Store ADDED
Binary file (8.2 kB). View file
 
{step_7629/k1-c0 β†’ baseline/0}/cfg.json RENAMED
File without changes
{step_7629/layers.0 β†’ baseline/0}/sae.safetensors RENAMED
File without changes
{step_7629/k2-c0 β†’ baseline/1}/cfg.json RENAMED
File without changes
{step_7629/layers.1 β†’ baseline/1}/sae.safetensors RENAMED
File without changes
{step_7629/k2-c1 β†’ baseline/10}/cfg.json RENAMED
File without changes
{step_7629/layers.10 β†’ baseline/10}/sae.safetensors RENAMED
File without changes
{step_7629/k3-c0 β†’ baseline/11}/cfg.json RENAMED
File without changes
{step_7629/layers.11 β†’ baseline/11}/sae.safetensors RENAMED
File without changes
{step_7629/k3-c1 β†’ baseline/2}/cfg.json RENAMED
File without changes
{step_7629/layers.2 β†’ baseline/2}/sae.safetensors RENAMED
File without changes
{step_7629/k4-c0 β†’ baseline/3}/cfg.json RENAMED
File without changes
{step_7629/layers.3 β†’ baseline/3}/sae.safetensors RENAMED
File without changes
{step_7629/k4-c1 β†’ baseline/4}/cfg.json RENAMED
File without changes
{step_7629/layers.4 β†’ baseline/4}/sae.safetensors RENAMED
File without changes
{step_7629/k5-c0 β†’ baseline/5}/cfg.json RENAMED
File without changes
{step_7629/layers.5 β†’ baseline/5}/sae.safetensors RENAMED
File without changes
{step_7629/k5-c1 β†’ baseline/6}/cfg.json RENAMED
File without changes
{step_7629/layers.6 β†’ baseline/6}/sae.safetensors RENAMED
File without changes
{step_7629/k6-c0 β†’ baseline/7}/cfg.json RENAMED
File without changes
{step_7629/layers.7 β†’ baseline/7}/sae.safetensors RENAMED
File without changes
{step_7629/layers.0 β†’ baseline/8}/cfg.json RENAMED
File without changes
{step_7629/layers.8 β†’ baseline/8}/sae.safetensors RENAMED
File without changes
{step_7629/layers.1 β†’ baseline/9}/cfg.json RENAMED
File without changes
{step_7629/layers.9 β†’ baseline/9}/sae.safetensors RENAMED
File without changes
cluster/.DS_Store ADDED
Binary file (8.2 kB). View file
 
{step_7629/layers.10 β†’ cluster/0-1}/cfg.json RENAMED
File without changes
{step_7629/k6-c0 β†’ cluster/0-1}/sae.safetensors RENAMED
File without changes
{step_7629/layers.11 β†’ cluster/0-10}/cfg.json RENAMED
File without changes
{step_7629/k1-c0 β†’ cluster/0-10}/sae.safetensors RENAMED
File without changes
{step_7629/layers.2 β†’ cluster/0-2}/cfg.json RENAMED
File without changes
{step_7629/k3-c1 β†’ cluster/0-2}/sae.safetensors RENAMED
File without changes
{step_7629/layers.3 β†’ cluster/0-6}/cfg.json RENAMED
File without changes
{step_7629/k2-c0 β†’ cluster/0-6}/sae.safetensors RENAMED
File without changes
{step_7629/layers.4 β†’ cluster/3-4}/cfg.json RENAMED
File without changes
{step_7629/k5-c1 β†’ cluster/3-4}/sae.safetensors RENAMED
File without changes
{step_7629/layers.5 β†’ cluster/3-6}/cfg.json RENAMED
File without changes
{step_7629/k3-c0 β†’ cluster/3-6}/sae.safetensors RENAMED
File without changes
{step_7629/layers.6 β†’ cluster/5-6}/cfg.json RENAMED
File without changes
{step_7629/k5-c0 β†’ cluster/5-6}/sae.safetensors RENAMED
File without changes
{step_7629/layers.7 β†’ cluster/7-10}/cfg.json RENAMED
File without changes
{step_7629/k2-c1 β†’ cluster/7-10}/sae.safetensors RENAMED
File without changes
{step_7629/layers.8 β†’ cluster/7-8}/cfg.json RENAMED
File without changes
{step_7629/k4-c0 β†’ cluster/7-8}/sae.safetensors RENAMED
File without changes
{step_7629/layers.9 β†’ cluster/9-10}/cfg.json RENAMED
File without changes
{step_7629/k4-c1 β†’ cluster/9-10}/sae.safetensors RENAMED
File without changes
cluster/config.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sae": {
3
+ "expansion_factor": 16,
4
+ "normalize_decoder": true,
5
+ "num_latents": 0,
6
+ "k": 128,
7
+ "multi_topk": false,
8
+ "jumprelu": false,
9
+ "jumprelu_init_threshold": 0.001,
10
+ "jumprelu_bandwidth": 0.001,
11
+ "jumprelu_target_l0": null,
12
+ "jumprelu_per_layer_l0": false,
13
+ "init_enc_as_dec_transpose": true,
14
+ "init_b_dec_as_zeros": false
15
+ },
16
+ "batch_size": 16,
17
+ "max_seq_len": 1024,
18
+ "num_training_tokens": 1000000000,
19
+ "grad_acc_steps": 1,
20
+ "micro_acc_steps": 1,
21
+ "adam_8bit": false,
22
+ "adam_epsilon": 1e-08,
23
+ "adam_betas": [
24
+ 0.9,
25
+ 0.999
26
+ ],
27
+ "lr": null,
28
+ "lr_scheduler_name": "constant",
29
+ "lr_warmup_steps": 0.0,
30
+ "l1_coefficient": 0.0,
31
+ "l1_warmup_steps": 0.0,
32
+ "use_l2_loss": false,
33
+ "auxk_alpha": 0.03125,
34
+ "dead_feature_threshold": 10000000,
35
+ "hookpoints": [
36
+ "layers.0",
37
+ "layers.1",
38
+ "layers.2",
39
+ "layers.3",
40
+ "layers.4",
41
+ "layers.5",
42
+ "layers.6",
43
+ "layers.7",
44
+ "layers.8",
45
+ "layers.9",
46
+ "layers.10",
47
+ "layers.11"
48
+ ],
49
+ "layers": [
50
+ 0,
51
+ 1,
52
+ 2,
53
+ 3,
54
+ 4,
55
+ 5,
56
+ 6,
57
+ 7,
58
+ 8,
59
+ 9,
60
+ 10,
61
+ 11
62
+ ],
63
+ "layer_stride": 1,
64
+ "distribute_modules": true,
65
+ "save_every": 100000,
66
+ "normalize_activations": 1.0,
67
+ "num_norm_estimation_tokens": 5000000,
68
+ "clusters": {
69
+ "k1-c0": [
70
+ 0,
71
+ 1,
72
+ 2,
73
+ 3,
74
+ 4,
75
+ 5,
76
+ 6,
77
+ 7,
78
+ 8,
79
+ 9,
80
+ 10
81
+ ],
82
+ "k2-c0": [
83
+ 0,
84
+ 1,
85
+ 2,
86
+ 3,
87
+ 4,
88
+ 5,
89
+ 6
90
+ ],
91
+ "k2-c1": [
92
+ 7,
93
+ 8,
94
+ 9,
95
+ 10
96
+ ],
97
+ "k3-c0": [
98
+ 3,
99
+ 4,
100
+ 5,
101
+ 6
102
+ ],
103
+ "k3-c1": [
104
+ 0,
105
+ 1,
106
+ 2
107
+ ],
108
+ "k4-c0": [
109
+ 7,
110
+ 8
111
+ ],
112
+ "k4-c1": [
113
+ 9,
114
+ 10
115
+ ],
116
+ "k5-c0": [
117
+ 5,
118
+ 6
119
+ ],
120
+ "k5-c1": [
121
+ 3,
122
+ 4
123
+ ],
124
+ "k6-c0": [
125
+ 0,
126
+ 1
127
+ ],
128
+ "layers.0": [
129
+ 0
130
+ ],
131
+ "layers.1": [
132
+ 1
133
+ ],
134
+ "layers.2": [
135
+ 2
136
+ ],
137
+ "layers.3": [
138
+ 3
139
+ ],
140
+ "layers.4": [
141
+ 4
142
+ ],
143
+ "layers.5": [
144
+ 5
145
+ ],
146
+ "layers.6": [
147
+ 6
148
+ ],
149
+ "layers.7": [
150
+ 7
151
+ ],
152
+ "layers.8": [
153
+ 8
154
+ ],
155
+ "layers.9": [
156
+ 9
157
+ ],
158
+ "layers.10": [
159
+ 10
160
+ ],
161
+ "layers.11": [
162
+ 11
163
+ ]
164
+ },
165
+ "cluster_hookpoints": {
166
+ "k1-c0": [
167
+ "layers.0",
168
+ "layers.1",
169
+ "layers.2",
170
+ "layers.3",
171
+ "layers.4",
172
+ "layers.5",
173
+ "layers.6",
174
+ "layers.7",
175
+ "layers.8",
176
+ "layers.9",
177
+ "layers.10"
178
+ ],
179
+ "k2-c0": [
180
+ "layers.0",
181
+ "layers.1",
182
+ "layers.2",
183
+ "layers.3",
184
+ "layers.4",
185
+ "layers.5",
186
+ "layers.6"
187
+ ],
188
+ "k2-c1": [
189
+ "layers.7",
190
+ "layers.8",
191
+ "layers.9",
192
+ "layers.10"
193
+ ],
194
+ "k3-c0": [
195
+ "layers.3",
196
+ "layers.4",
197
+ "layers.5",
198
+ "layers.6"
199
+ ],
200
+ "k3-c1": [
201
+ "layers.0",
202
+ "layers.1",
203
+ "layers.2"
204
+ ],
205
+ "k4-c0": [
206
+ "layers.7",
207
+ "layers.8"
208
+ ],
209
+ "k4-c1": [
210
+ "layers.9",
211
+ "layers.10"
212
+ ],
213
+ "k5-c0": [
214
+ "layers.5",
215
+ "layers.6"
216
+ ],
217
+ "k5-c1": [
218
+ "layers.3",
219
+ "layers.4"
220
+ ],
221
+ "k6-c0": [
222
+ "layers.0",
223
+ "layers.1"
224
+ ],
225
+ "layers.0": [
226
+ "layers.0"
227
+ ],
228
+ "layers.1": [
229
+ "layers.1"
230
+ ],
231
+ "layers.2": [
232
+ "layers.2"
233
+ ],
234
+ "layers.3": [
235
+ "layers.3"
236
+ ],
237
+ "layers.4": [
238
+ "layers.4"
239
+ ],
240
+ "layers.5": [
241
+ "layers.5"
242
+ ],
243
+ "layers.6": [
244
+ "layers.6"
245
+ ],
246
+ "layers.7": [
247
+ "layers.7"
248
+ ],
249
+ "layers.8": [
250
+ "layers.8"
251
+ ],
252
+ "layers.9": [
253
+ "layers.9"
254
+ ],
255
+ "layers.10": [
256
+ "layers.10"
257
+ ],
258
+ "layers.11": [
259
+ "layers.11"
260
+ ]
261
+ },
262
+ "hook": null,
263
+ "keep_last_n_checkpoints": 4,
264
+ "resume_from": null,
265
+ "log_to_wandb": true,
266
+ "run_name": "checkpoints-clusters/pythia-160m-topk",
267
+ "wandb_log_frequency": 1
268
+ }
step_7629/scaling_factors.pt β†’ scaling_factors.pt RENAMED
File without changes
step_7629/config.json DELETED
@@ -1 +0,0 @@
1
- {"sae": {"expansion_factor": 16, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "jumprelu": false, "jumprelu_init_threshold": 0.001, "jumprelu_bandwidth": 0.001, "jumprelu_target_l0": null, "jumprelu_per_layer_l0": false, "init_enc_as_dec_transpose": true, "init_b_dec_as_zeros": false}, "batch_size": 16, "max_seq_len": 1024, "num_training_tokens": 1000000000, "grad_acc_steps": 1, "micro_acc_steps": 1, "adam_8bit": false, "adam_epsilon": 1e-08, "adam_betas": [0.9, 0.999], "lr": null, "lr_scheduler_name": "constant", "lr_warmup_steps": 0.0, "l1_coefficient": 0.0, "l1_warmup_steps": 0.0, "use_l2_loss": false, "auxk_alpha": 0.03125, "dead_feature_threshold": 10000000, "hookpoints": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10", "layers.11"], "layers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "layer_stride": 1, "distribute_modules": true, "save_every": 100000, "normalize_activations": 1.0, "num_norm_estimation_tokens": 5000000, "clusters": {"k1-c0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "k2-c0": [0, 1, 2, 3, 4, 5, 6], "k2-c1": [7, 8, 9, 10], "k3-c0": [3, 4, 5, 6], "k3-c1": [0, 1, 2], "k4-c0": [7, 8], "k4-c1": [9, 10], "k5-c0": [5, 6], "k5-c1": [3, 4], "k6-c0": [0, 1], "layers.0": [0], "layers.1": [1], "layers.2": [2], "layers.3": [3], "layers.4": [4], "layers.5": [5], "layers.6": [6], "layers.7": [7], "layers.8": [8], "layers.9": [9], "layers.10": [10], "layers.11": [11]}, "cluster_hookpoints": {"k1-c0": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6", "layers.7", "layers.8", "layers.9", "layers.10"], "k2-c0": ["layers.0", "layers.1", "layers.2", "layers.3", "layers.4", "layers.5", "layers.6"], "k2-c1": ["layers.7", "layers.8", "layers.9", "layers.10"], "k3-c0": ["layers.3", "layers.4", "layers.5", "layers.6"], "k3-c1": ["layers.0", "layers.1", "layers.2"], "k4-c0": ["layers.7", "layers.8"], "k4-c1": ["layers.9", "layers.10"], "k5-c0": ["layers.5", "layers.6"], "k5-c1": ["layers.3", "layers.4"], "k6-c0": ["layers.0", "layers.1"], "layers.0": ["layers.0"], "layers.1": ["layers.1"], "layers.2": ["layers.2"], "layers.3": ["layers.3"], "layers.4": ["layers.4"], "layers.5": ["layers.5"], "layers.6": ["layers.6"], "layers.7": ["layers.7"], "layers.8": ["layers.8"], "layers.9": ["layers.9"], "layers.10": ["layers.10"], "layers.11": ["layers.11"]}, "hook": null, "keep_last_n_checkpoints": 4, "resume_from": null, "log_to_wandb": true, "run_name": "checkpoints-clusters/pythia-160m-topk", "wandb_log_frequency": 1}