Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- attn_out/layer_12_width_16k_l0_big/params.safetensors +3 -0
- attn_out/layer_12_width_16k_l0_medium/params.safetensors +3 -0
- attn_out/layer_12_width_16k_l0_small/params.safetensors +3 -0
- attn_out/layer_12_width_65k_l0_big/params.safetensors +3 -0
- attn_out/layer_12_width_65k_l0_medium/params.safetensors +3 -0
- attn_out/layer_12_width_65k_l0_small/params.safetensors +3 -0
- attn_out/layer_15_width_16k_l0_big/params.safetensors +3 -0
- attn_out/layer_15_width_16k_l0_medium/params.safetensors +3 -0
- attn_out/layer_15_width_16k_l0_small/params.safetensors +3 -0
- attn_out/layer_15_width_65k_l0_big/params.safetensors +3 -0
- attn_out/layer_15_width_65k_l0_medium/params.safetensors +3 -0
- attn_out/layer_15_width_65k_l0_small/params.safetensors +3 -0
- attn_out/layer_5_width_16k_l0_big/params.safetensors +3 -0
- attn_out/layer_5_width_16k_l0_medium/params.safetensors +3 -0
- attn_out/layer_5_width_16k_l0_small/params.safetensors +3 -0
- attn_out/layer_5_width_65k_l0_big/params.safetensors +3 -0
- attn_out/layer_5_width_65k_l0_medium/params.safetensors +3 -0
- attn_out/layer_5_width_65k_l0_small/params.safetensors +3 -0
- attn_out/layer_9_width_16k_l0_big/params.safetensors +3 -0
- attn_out/layer_9_width_16k_l0_medium/params.safetensors +3 -0
- attn_out/layer_9_width_16k_l0_small/params.safetensors +3 -0
- attn_out/layer_9_width_65k_l0_big/params.safetensors +3 -0
- attn_out/layer_9_width_65k_l0_medium/params.safetensors +3 -0
- attn_out/layer_9_width_65k_l0_small/params.safetensors +3 -0
- resid_post_all/layer_0_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_0_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_0_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_0_width_262k_l0_small/config.json +9 -0
- resid_post_all/layer_10_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_10_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_10_width_262k_l0_small/config.json +9 -0
- resid_post_all/layer_11_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_11_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_11_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_12_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_12_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_12_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_12_width_262k_l0_small/config.json +9 -0
- resid_post_all/layer_13_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_13_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_13_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_13_width_262k_l0_small/config.json +9 -0
- resid_post_all/layer_14_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_14_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_14_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_14_width_262k_l0_small/config.json +9 -0
- resid_post_all/layer_15_width_16k_l0_big/config.json +9 -0
- resid_post_all/layer_15_width_16k_l0_small/config.json +9 -0
- resid_post_all/layer_15_width_262k_l0_big/config.json +9 -0
- resid_post_all/layer_15_width_262k_l0_small/config.json +9 -0
attn_out/layer_12_width_16k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f28368da006346daa050858f201ad31c77d007e2423b9ee161ff0ac564754e6
|
| 3 |
+
size 134353272
|
attn_out/layer_12_width_16k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e26d7eabfb7b586a6e8388e61c92e8672860e0200158ee1eafeb8133cb50cde0
|
| 3 |
+
size 134353272
|
attn_out/layer_12_width_16k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a115f88a1ab3f5eae9607373c5812f6e7fff1c02997626f4b0de8999ef053f4
|
| 3 |
+
size 134353272
|
attn_out/layer_12_width_65k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb09a673f518aae52f1dbac67ac60ec446c29f6a716ff63da169c13a76b3abc5
|
| 3 |
+
size 537399680
|
attn_out/layer_12_width_65k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ffb322f341adf677569cdd32625c8069f99545cc1094c68fae97750da3bd450
|
| 3 |
+
size 537399680
|
attn_out/layer_12_width_65k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c238ca353cb1aacba61f521688801cbd088ae6fb28398374ddc2d409e09f49e0
|
| 3 |
+
size 537399680
|
attn_out/layer_15_width_16k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fcc0871ad39152806052828251c639d2fd4d2d3aeb7e58beb0c9fe27356fe66b
|
| 3 |
+
size 134353272
|
attn_out/layer_15_width_16k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba7e101b9ac5ddf9044ece3bd24c48dbb1d59cec916fd282af7128a1df40a9ae
|
| 3 |
+
size 134353272
|
attn_out/layer_15_width_16k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a482def41c378a7324db6b4fa727691564985659ee3829781583f8483764abdb
|
| 3 |
+
size 134353272
|
attn_out/layer_15_width_65k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0b4805b719e05259773a203d5495b3db2303beafafb77458bacb8b0f0d53ade
|
| 3 |
+
size 537399680
|
attn_out/layer_15_width_65k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69125af286a2877fb612127d30ed08d7a93e351a624697368ce59e178cc2610e
|
| 3 |
+
size 537399680
|
attn_out/layer_15_width_65k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3836a6b3c60fbbcb37b63b0caa894f0d0d064b2b2823825481bff069ba31e170
|
| 3 |
+
size 537399680
|
attn_out/layer_5_width_16k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d465f79c10c66ccf0f00d17b94efddb3d15237283a746d43a5b4623ea7ab9e2d
|
| 3 |
+
size 134353272
|
attn_out/layer_5_width_16k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13d2be8b5d9532cd7e061a953e281081118f97c48328ce2418ea866950b0aa9d
|
| 3 |
+
size 134353272
|
attn_out/layer_5_width_16k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:317372bb845c53222b98faea3c2fd0ec82c11c85752a6b1f9992336f60744fba
|
| 3 |
+
size 134353272
|
attn_out/layer_5_width_65k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:709ec73b8009581f89471a73a26953eaeb4f576978b7d780b01b35055c435b70
|
| 3 |
+
size 537399680
|
attn_out/layer_5_width_65k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6873e146162b7bded50793313a8ebd63557a87be4c90b5a9cbe6889f9b472810
|
| 3 |
+
size 537399680
|
attn_out/layer_5_width_65k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cfa565a59b59dbdaf0020944f676aa5d9d7f4f18fa91a8f730de476d3857a03
|
| 3 |
+
size 537399680
|
attn_out/layer_9_width_16k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a602e7379fef1b8ca6d9de5f71afe1dc701fbed21734ef239f92c9d7027d8fcb
|
| 3 |
+
size 134353272
|
attn_out/layer_9_width_16k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c4cc52c98dcb24494df4345b130f9443f49dcfa4b62745f2258f5ca3b28072c
|
| 3 |
+
size 134353272
|
attn_out/layer_9_width_16k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:978eea8ed95c313def1b5c0e20885ef4886b84548c957a9f61a5eebd872e2020
|
| 3 |
+
size 134353272
|
attn_out/layer_9_width_65k_l0_big/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f591fb8142312fced18512af06b4120d9b269d3f44e118d671f623041e43536d
|
| 3 |
+
size 537399680
|
attn_out/layer_9_width_65k_l0_medium/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06f90ff0d7c44f2ecef5bc77aa48a4616c7e987d52e543ea83a08b0925dc79ad
|
| 3 |
+
size 537399680
|
attn_out/layer_9_width_65k_l0_small/params.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a420e72d57e3130cba1827fe6b576e4e2f18b30fd76445effcb1a78c8d475609
|
| 3 |
+
size 537399680
|
resid_post_all/layer_0_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.0.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.0.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 60,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_0_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.0.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.0.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 10,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_0_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.0.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.0.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 60,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_0_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.0.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.0.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 10,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_10_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.10.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.10.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_10_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.10.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.10.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_10_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.10.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.10.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_11_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.11.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.11.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_11_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.11.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.11.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_11_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.11.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.11.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_12_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.12.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.12.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_12_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.12.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.12.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_12_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.12.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.12.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_12_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.12.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.12.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_13_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.13.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.13.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_13_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.13.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.13.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_13_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.13.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.13.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_13_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.13.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.13.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_14_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.14.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.14.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_14_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.14.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.14.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_14_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.14.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.14.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_14_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.14.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.14.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_15_width_16k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.15.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.15.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_15_width_16k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.15.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.15.output",
|
| 4 |
+
"width": 16384,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_15_width_262k_l0_big/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.15.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.15.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 120,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|
resid_post_all/layer_15_width_262k_l0_small/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"hf_hook_point_in": "model.layers.15.output",
|
| 3 |
+
"hf_hook_point_out": "model.layers.15.output",
|
| 4 |
+
"width": 262144,
|
| 5 |
+
"model_name": "gemma-v3-270m-it",
|
| 6 |
+
"architecture": "jump_relu",
|
| 7 |
+
"l0": 20,
|
| 8 |
+
"affine_connection": false
|
| 9 |
+
}
|