Add files using upload-large-folder tool
Browse files- clt/width_262k_l0_big/config.json +3 -2
- clt/width_262k_l0_big_affine/config.json +3 -2
- clt/width_262k_l0_medium/config.json +3 -2
- clt/width_262k_l0_medium_affine/config.json +3 -2
- clt/width_524k_l0_big/config.json +3 -2
- clt/width_524k_l0_big_affine/config.json +3 -2
- clt/width_524k_l0_medium/config.json +3 -2
- clt/width_524k_l0_medium_affine/config.json +3 -2
- crosscoder/layer_7_13_17_22_width_1m_l0_big/config.json +10 -0
- crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_0.safetensors +3 -0
- crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_1.safetensors +3 -0
- crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_2.safetensors +3 -0
- crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_3.safetensors +3 -0
- crosscoder/layer_7_13_17_22_width_1m_l0_medium/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_262k_l0_big/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_262k_l0_medium/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_524k_l0_big/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_524k_l0_medium/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_65k_l0_big/config.json +5 -4
- crosscoder/layer_7_13_17_22_width_65k_l0_medium/config.json +5 -4
clt/width_262k_l0_big/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_262k_l0_big_affine/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": true
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": true,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_262k_l0_medium/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_262k_l0_medium_affine/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": true
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 262080,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": true,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_524k_l0_big/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_524k_l0_big_affine/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": true
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": true,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_524k_l0_medium/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
clt/width_524k_l0_medium_affine/config.json
CHANGED
|
@@ -2,8 +2,9 @@
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": true
|
|
|
|
| 9 |
}
|
|
|
|
| 2 |
"hf_hook_point_in": "model.layers.{all}.pre_feedforward_layernorm.output",
|
| 3 |
"hf_hook_point_out": "model.layers.{all}.post_feedforward_layernorm.output",
|
| 4 |
"width": 524160,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": true,
|
| 9 |
+
"type": "clt"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_1m_l0_big/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
+
"width": 1048576,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
+
}
|
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_0.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8552b46e2a913bd44bb8a5626af0b62e27c3956374b69289ebb41e4345fd3e01
|
| 3 |
+
size 6041899912
|
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_1.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87b4d1218a3579d2bb0871fe8d61eeaf7d51d7f519308cb945868ba28da0845b
|
| 3 |
+
size 6041899912
|
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_2.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8323684abc106863d6abea16cdb0d181180304064373f4f8ce9852cda818f218
|
| 3 |
+
size 6041899912
|
crosscoder/layer_7_13_17_22_width_1m_l0_big/params_layer_3.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f603eeffdb8d3542db9d5329a9b17ee3c5d900967f0c93240f5baabb70e0fa7b
|
| 3 |
+
size 6041899912
|
crosscoder/layer_7_13_17_22_width_1m_l0_medium/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 1048576,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 1048576,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_262k_l0_big/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 262144,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 262144,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_262k_l0_medium/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 262144,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 262144,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_524k_l0_big/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 524288,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 524288,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_524k_l0_medium/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 524288,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 524288,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_65k_l0_big/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 65536,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 65536,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 150,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|
crosscoder/layer_7_13_17_22_width_65k_l0_medium/config.json
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"hf_hook_point_in": "model.layers.{7
|
| 3 |
-
"hf_hook_point_out": "model.layers.{7
|
| 4 |
"width": 65536,
|
| 5 |
-
"model_name": "gemma-
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
-
"affine_connection": false
|
|
|
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.{7,13,17,22}.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.{7,13,17,22}.output",
|
| 4 |
"width": 65536,
|
| 5 |
+
"model_name": "google/gemma-3-1b-it",
|
| 6 |
"architecture": "jump_relu",
|
| 7 |
"l0": 50,
|
| 8 |
+
"affine_connection": false,
|
| 9 |
+
"type": "crosscoder"
|
| 10 |
}
|