diff --git a/attn_out/layer_12_width_16k_l0_big/config.json b/attn_out/layer_12_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a1d1314c8e7d156b83b10b145fff3cc75f2dc725 --- /dev/null +++ b/attn_out/layer_12_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.12.self_attn.o_proj.input", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_12_width_16k_l0_small/config.json b/attn_out/layer_12_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..451276b588a740b897000a6570654372204d4fe3 --- /dev/null +++ b/attn_out/layer_12_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.12.self_attn.o_proj.input", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_12_width_262k_l0_big/config.json b/attn_out/layer_12_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d23d5aa25ee2f9b0ffe2fe9b403c944a52aeb05 --- /dev/null +++ b/attn_out/layer_12_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.12.self_attn.o_proj.input", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_12_width_262k_l0_small/config.json b/attn_out/layer_12_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..741fb3dc840a023a549d7ba4556198f2d475fa5d --- /dev/null +++ b/attn_out/layer_12_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.12.self_attn.o_proj.input", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_12_width_65k_l0_big/config.json b/attn_out/layer_12_width_65k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0702595f9c217b6009c22e61799b631f19de1f1b --- /dev/null +++ b/attn_out/layer_12_width_65k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.12.self_attn.o_proj.input", + "width": 65536, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_15_width_16k_l0_medium/config.json b/attn_out/layer_15_width_16k_l0_medium/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6f40db29c4ddbf409b291444766347841f9d94b3 --- /dev/null +++ b/attn_out/layer_15_width_16k_l0_medium/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.15.self_attn.o_proj.input", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_15_width_16k_l0_small/config.json b/attn_out/layer_15_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0690184806bca6fe8d8b4596c4d85e29c3b5172 --- /dev/null +++ b/attn_out/layer_15_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.15.self_attn.o_proj.input", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_15_width_262k_l0_medium/config.json b/attn_out/layer_15_width_262k_l0_medium/config.json new file mode 100644 index 0000000000000000000000000000000000000000..48de0de1377f404d3d0aac4126c3a1dd55083713 --- /dev/null +++ b/attn_out/layer_15_width_262k_l0_medium/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.15.self_attn.o_proj.input", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_15_width_262k_l0_small/config.json b/attn_out/layer_15_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6455588e9365da4c520afd98452d761e870d6de --- /dev/null +++ b/attn_out/layer_15_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.15.self_attn.o_proj.input", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_15_width_65k_l0_small/config.json b/attn_out/layer_15_width_65k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb353d6f64ca22f2fb78027e39e9c50351530e23 --- /dev/null +++ b/attn_out/layer_15_width_65k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.15.self_attn.o_proj.input", + "width": 65536, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_9_width_16k_l0_big/config.json b/attn_out/layer_9_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e2be54d160a43837f5d6e56d9939d7215223c8ec --- /dev/null +++ b/attn_out/layer_9_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.9.self_attn.o_proj.input", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/attn_out/layer_9_width_262k_l0_small/config.json b/attn_out/layer_9_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d95c406aef894f2f53481ddfcb0215cdd65214ea --- /dev/null +++ b/attn_out/layer_9_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.self_attn.o_proj.input", + "hf_hook_point_out": "model.layers.9.self_attn.o_proj.input", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_0_width_16k_l0_big/config.json b/transcoder_all/layer_0_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb5494200b715f6b5485eb82b9c57d01adf4cf8 --- /dev/null +++ b/transcoder_all/layer_0_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.0.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.0.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_0_width_262k_l0_big_affine/config.json b/transcoder_all/layer_0_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c61b4e0265ea7c886da3848f3b38affce4cae5b0 --- /dev/null +++ b/transcoder_all/layer_0_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.0.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.0.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_0_width_262k_l0_small/config.json b/transcoder_all/layer_0_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95cbb2c3ace8947e3c232ccb64b9f68e87e492f2 --- /dev/null +++ b/transcoder_all/layer_0_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.0.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.0.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 10, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_10_width_16k_l0_big/config.json b/transcoder_all/layer_10_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ebb20520fb3c46e530456245ad47a5d943196983 --- /dev/null +++ b/transcoder_all/layer_10_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.10.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.10.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_10_width_16k_l0_small/config.json b/transcoder_all/layer_10_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed003058b8ba6c078df31b4be4059e4ab6cf9b59 --- /dev/null +++ b/transcoder_all/layer_10_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.10.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.10.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_10_width_16k_l0_small_affine/config.json b/transcoder_all/layer_10_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8b75cc37a65ba851bf0d4466fbd369b6943ad9e --- /dev/null +++ b/transcoder_all/layer_10_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.10.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.10.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_10_width_262k_l0_big_affine/config.json b/transcoder_all/layer_10_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d06210222132d43cc76b782810350f1494063064 --- /dev/null +++ b/transcoder_all/layer_10_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.10.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.10.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_16k_l0_big/config.json b/transcoder_all/layer_11_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be6c1adcd8169f015eea10ddb757667a6beb1557 --- /dev/null +++ b/transcoder_all/layer_11_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_16k_l0_big_affine/config.json b/transcoder_all/layer_11_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee7ff18d553cb58c92447323e2b71c94f8e5f1f0 --- /dev/null +++ b/transcoder_all/layer_11_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_16k_l0_small/config.json b/transcoder_all/layer_11_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cd4c70ade12c57ff138ea11c98da45dcef6f39c2 --- /dev/null +++ b/transcoder_all/layer_11_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_262k_l0_small/config.json b/transcoder_all/layer_11_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6002baeac7f0af73750d97b4107120583ec7902f --- /dev/null +++ b/transcoder_all/layer_11_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_262k_l0_small_affine/config.json b/transcoder_all/layer_11_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c88bd4e24fa08f3a7bf704cf0588323c8ca05ab5 --- /dev/null +++ b/transcoder_all/layer_11_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_12_width_16k_l0_big_affine/config.json b/transcoder_all/layer_12_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0db464fa728930a074a163276cb2df6e8c532a8 --- /dev/null +++ b/transcoder_all/layer_12_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_13_width_16k_l0_big_affine/config.json b/transcoder_all/layer_13_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3256f801a534297b85dd6aa7c390fd6d520f718a --- /dev/null +++ b/transcoder_all/layer_13_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.13.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.13.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_13_width_16k_l0_small_affine/config.json b/transcoder_all/layer_13_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7ca4249a3fbec246a4afa8a7214bde9831d5cc42 --- /dev/null +++ b/transcoder_all/layer_13_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.13.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.13.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_13_width_262k_l0_big_affine/config.json b/transcoder_all/layer_13_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d580a6473e80df7b8b5b621b9373e02bb63ede00 --- /dev/null +++ b/transcoder_all/layer_13_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.13.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.13.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_13_width_262k_l0_small_affine/config.json b/transcoder_all/layer_13_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c888789ece6b992b7aa396a1d1ef5dee7ea3679c --- /dev/null +++ b/transcoder_all/layer_13_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.13.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.13.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_14_width_16k_l0_big/config.json b/transcoder_all/layer_14_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0cab431f0881f87e0aaf42717d52ca9bf78b077 --- /dev/null +++ b/transcoder_all/layer_14_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.14.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.14.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_14_width_16k_l0_big_affine/config.json b/transcoder_all/layer_14_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b616b2309704597a6eaea6d00c98faa84b3802cc --- /dev/null +++ b/transcoder_all/layer_14_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.14.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.14.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_15_width_16k_l0_big_affine/config.json b/transcoder_all/layer_15_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..87a99f5eb488bdca5c82c29904087fb8482d6dce --- /dev/null +++ b/transcoder_all/layer_15_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.15.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_15_width_262k_l0_big_affine/config.json b/transcoder_all/layer_15_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d4b82f47cd72259203f5e9587ab1fbe54fb481f --- /dev/null +++ b/transcoder_all/layer_15_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.15.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.15.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_16_width_262k_l0_big_affine/config.json b/transcoder_all/layer_16_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..31b22c5b0a0efbea125b323f3d639db56173d91d --- /dev/null +++ b/transcoder_all/layer_16_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_16_width_262k_l0_small/config.json b/transcoder_all/layer_16_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c9fe88292e5c55d1f9c9ca36794c0cf7e01c0f3 --- /dev/null +++ b/transcoder_all/layer_16_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_16_width_262k_l0_small_affine/config.json b/transcoder_all/layer_16_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d92c14ed876f570b8cfe9bdc2041bc4066ccca2c --- /dev/null +++ b/transcoder_all/layer_16_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_17_width_16k_l0_big/config.json b/transcoder_all/layer_17_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6b7c0a04ae3c37424c7e6e1fe8e6d609b1f15a4a --- /dev/null +++ b/transcoder_all/layer_17_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.17.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.17.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_17_width_16k_l0_small/config.json b/transcoder_all/layer_17_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a33e1200d3c5c4127f7f9fe1222c53e031ff0c0 --- /dev/null +++ b/transcoder_all/layer_17_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.17.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.17.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_17_width_262k_l0_big/config.json b/transcoder_all/layer_17_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..941fcfc4f71b0c5e167d5616c9cc125ed41df97e --- /dev/null +++ b/transcoder_all/layer_17_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.17.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.17.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_17_width_262k_l0_big_affine/config.json b/transcoder_all/layer_17_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a5b0cfa7d130bebd75ef04451af6dd21d2976be --- /dev/null +++ b/transcoder_all/layer_17_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.17.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.17.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_17_width_262k_l0_small/config.json b/transcoder_all/layer_17_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8b8e9c4767ec151d0da8ffda060d90576b2c6fa --- /dev/null +++ b/transcoder_all/layer_17_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.17.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.17.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_1_width_262k_l0_small_affine/config.json b/transcoder_all/layer_1_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77c0db3d07fcf6264ed0b00e3feba33186e2d9da --- /dev/null +++ b/transcoder_all/layer_1_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.1.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.1.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 11, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_2_width_16k_l0_big/config.json b/transcoder_all/layer_2_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e7fbca418d3099cee790318907ba131bf576b45 --- /dev/null +++ b/transcoder_all/layer_2_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.2.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.2.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 80, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_2_width_16k_l0_small/config.json b/transcoder_all/layer_2_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d189e7d50c05029d0bffacce943d3600d33a629f --- /dev/null +++ b/transcoder_all/layer_2_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.2.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.2.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 13, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_2_width_16k_l0_small_affine/config.json b/transcoder_all/layer_2_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3cc4af8c4408d2df86ebfa571243b4ee82f268a8 --- /dev/null +++ b/transcoder_all/layer_2_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.2.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.2.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 13, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_2_width_262k_l0_big/config.json b/transcoder_all/layer_2_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2cac582fa63a4bcfa44182778af8499588f4222c --- /dev/null +++ b/transcoder_all/layer_2_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.2.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.2.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 80, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_2_width_262k_l0_small_affine/config.json b/transcoder_all/layer_2_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..01120267e6e5cab244a329f49c04781322753643 --- /dev/null +++ b/transcoder_all/layer_2_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.2.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.2.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 13, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_16k_l0_big/config.json b/transcoder_all/layer_3_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..919c04048c090e3e14f91f76feff2b32dd65f6bf --- /dev/null +++ b/transcoder_all/layer_3_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 90, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_16k_l0_small_affine/config.json b/transcoder_all/layer_3_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..44c821fa4dcb1ec7fc9daffc3187b1a6fc6996ba --- /dev/null +++ b/transcoder_all/layer_3_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 15, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_262k_l0_big_affine/config.json b/transcoder_all/layer_3_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..47c72ee1ba248febd2b461a6fafcccc618cf6e3a --- /dev/null +++ b/transcoder_all/layer_3_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 90, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_262k_l0_small/config.json b/transcoder_all/layer_3_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..693bd173af8113336cffa1910143c6b3f69334ae --- /dev/null +++ b/transcoder_all/layer_3_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 15, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_262k_l0_small_affine/config.json b/transcoder_all/layer_3_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2045ae57d9bfbd03cad865cb17766720f37b525d --- /dev/null +++ b/transcoder_all/layer_3_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 15, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_4_width_16k_l0_small/config.json b/transcoder_all/layer_4_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..79d385f59fc574f83284068dc12cd64215d555d9 --- /dev/null +++ b/transcoder_all/layer_4_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.4.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.4.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 16, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_4_width_262k_l0_small_affine/config.json b/transcoder_all/layer_4_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a3460dc093827776e0ce13f10727dec4c0552cf --- /dev/null +++ b/transcoder_all/layer_4_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.4.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.4.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 16, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_16k_l0_big/config.json b/transcoder_all/layer_5_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..09dda6e574c1d537fd570d17f7ebb82a7c94eef2 --- /dev/null +++ b/transcoder_all/layer_5_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 110, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_16k_l0_small/config.json b/transcoder_all/layer_5_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..433eb32b49037ba116932e436aff6444bc560293 --- /dev/null +++ b/transcoder_all/layer_5_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 18, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_16k_l0_small_affine/config.json b/transcoder_all/layer_5_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4455a1472ec311214577a5501991bc225fa36e3e --- /dev/null +++ b/transcoder_all/layer_5_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 18, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_262k_l0_small/config.json b/transcoder_all/layer_5_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..51a308ea45bf2d7f922722a504eec8ec275daad7 --- /dev/null +++ b/transcoder_all/layer_5_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 18, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_262k_l0_small_affine/config.json b/transcoder_all/layer_5_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bc25118bd55a2b71e9c50ca8ffe9272520ef2d72 --- /dev/null +++ b/transcoder_all/layer_5_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 18, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_6_width_16k_l0_big_affine/config.json b/transcoder_all/layer_6_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b199bdace0cba2c0bb96f59276623f6c9e62ead9 --- /dev/null +++ b/transcoder_all/layer_6_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.6.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.6.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_6_width_16k_l0_small/config.json b/transcoder_all/layer_6_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..398ddc6d4125f015fc52ee8704c3e6b25a210c69 --- /dev/null +++ b/transcoder_all/layer_6_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.6.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.6.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_7_width_16k_l0_big/config.json b/transcoder_all/layer_7_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e04502d3b14d1eece77307b34de1de90cec2d86b --- /dev/null +++ b/transcoder_all/layer_7_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.7.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.7.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_7_width_16k_l0_big_affine/config.json b/transcoder_all/layer_7_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..37401bfe5a584fe4ef7ad256addb300734b29bc1 --- /dev/null +++ b/transcoder_all/layer_7_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.7.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.7.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_7_width_16k_l0_small/config.json b/transcoder_all/layer_7_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f425e7528c5df9d91b8866e6796cfe9b663d2602 --- /dev/null +++ b/transcoder_all/layer_7_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.7.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.7.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_7_width_16k_l0_small_affine/config.json b/transcoder_all/layer_7_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7cec02f803c70771951243e8bc6923860e0f0b72 --- /dev/null +++ b/transcoder_all/layer_7_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.7.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.7.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_7_width_262k_l0_big/config.json b/transcoder_all/layer_7_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3740df3afdf9bec13dffcb9427e4c3893c75529a --- /dev/null +++ b/transcoder_all/layer_7_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.7.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.7.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_8_width_16k_l0_big/config.json b/transcoder_all/layer_8_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3776cc60cc65e022dd59b8a35b7711e3e869525b --- /dev/null +++ b/transcoder_all/layer_8_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.8.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.8.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_8_width_16k_l0_small_affine/config.json b/transcoder_all/layer_8_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6c4c3d7b8211ded5f2d74420b80c0f242828127 --- /dev/null +++ b/transcoder_all/layer_8_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.8.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.8.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_8_width_262k_l0_big/config.json b/transcoder_all/layer_8_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..86b64c16f62cc53e32adfb7fe1cc3f7eb851aac6 --- /dev/null +++ b/transcoder_all/layer_8_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.8.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.8.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_8_width_262k_l0_big_affine/config.json b/transcoder_all/layer_8_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1bed4eeb8f9ed9da412fc28b9f84b650bcce75af --- /dev/null +++ b/transcoder_all/layer_8_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.8.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.8.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_8_width_262k_l0_small/config.json b/transcoder_all/layer_8_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d87019e706c4bb1872b3fc649afcce7755cf4f46 --- /dev/null +++ b/transcoder_all/layer_8_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.8.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.8.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_16k_l0_big_affine/config.json b/transcoder_all/layer_9_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4b9667fcc447a3099ca0b1ba675649549df262a4 --- /dev/null +++ b/transcoder_all/layer_9_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_16k_l0_small_affine/config.json b/transcoder_all/layer_9_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ea0330788c25238d6244aa55b4c0a08f9389aab --- /dev/null +++ b/transcoder_all/layer_9_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_262k_l0_big/config.json b/transcoder_all/layer_9_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66374ea8e78050636add4c79034b389b0078f254 --- /dev/null +++ b/transcoder_all/layer_9_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_262k_l0_small_affine/config.json b/transcoder_all/layer_9_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8e18e8d79d6ea5a843faa77f0d01b254da5549a --- /dev/null +++ b/transcoder_all/layer_9_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-270m-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file