SAELens
CallumMcDougallGDM commited on
Commit
26eef25
·
verified ·
1 Parent(s): 84bc460

Add files using upload-large-folder tool

Browse files
clt/width_262k_l0_big/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": false
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "clt"
10
  }
clt/width_262k_l0_big_affine/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": true
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": true,
9
+ "type": "clt"
10
  }
clt/width_262k_l0_medium/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "clt"
10
  }
clt/width_262k_l0_medium_affine/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": true
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 262080,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": true,
9
+ "type": "clt"
10
  }
clt/width_524k_l0_big/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": false
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "clt"
10
  }
clt/width_524k_l0_big_affine/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": true
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": true,
9
+ "type": "clt"
10
  }
clt/width_524k_l0_medium/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "clt"
10
  }
clt/width_524k_l0_medium_affine/config.json CHANGED
@@ -2,8 +2,9 @@
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": true
 
9
  }
 
2
  "hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
3
  "hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
4
  "width": 524160,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": true,
9
+ "type": "clt"
10
  }
crosscoder/layer_7_13_17_22_width_1m_l0_big/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
+ "width": 1048576,
5
+ "model_name": "google/gemma-3-1b-it",
6
+ "architecture": "jump_relu",
7
+ "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
+ }
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8552b46e2a913bd44bb8a5626af0b62e27c3956374b69289ebb41e4345fd3e01
3
+ size 6041899912
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b4d1218a3579d2bb0871fe8d61eeaf7d51d7f519308cb945868ba28da0845b
3
+ size 6041899912
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8323684abc106863d6abea16cdb0d181180304064373f4f8ce9852cda818f218
3
+ size 6041899912
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_3.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f603eeffdb8d3542db9d5329a9b17ee3c5d900967f0c93240f5baabb70e0fa7b
3
+ size 6041899912
crosscoder/layer_7_13_17_22_width_1m_l0_medium/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 1048576,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 1048576,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_262k_l0_big/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 262144,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 262144,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_262k_l0_medium/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 262144,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 262144,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_524k_l0_big/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 524288,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 524288,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_524k_l0_medium/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 524288,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 524288,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_65k_l0_big/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 65536,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 65536,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 150,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }
crosscoder/layer_7_13_17_22_width_65k_l0_medium/config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "hf_hook_point_in": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
3
- "hf_hook_point_out": "model.layers.{7.0,13.5,17.400000000000002,22.599999999999998}.output",
4
  "width": 65536,
5
- "model_name": "gemma-v3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
- "affine_connection": false
 
9
  }
 
1
  {
2
+ "hf_hook_point_in": "model.layers.{7,13,17,22}.output",
3
+ "hf_hook_point_out": "model.layers.{7,13,17,22}.output",
4
  "width": 65536,
5
+ "model_name": "google/gemma-3-1b-it",
6
  "architecture": "jump_relu",
7
  "l0": 50,
8
+ "affine_connection": false,
9
+ "type": "crosscoder"
10
  }