diff --git a/transcoder/layer_16_width_16k_l0_big/config.json b/transcoder/layer_16_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e06f4bbbb1c1b04875c2e1f96f11f3966cccb5b9 --- /dev/null +++ b/transcoder/layer_16_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 129, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_16_width_262k_l0_medium_affine/config.json b/transcoder/layer_16_width_262k_l0_medium_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..81518065771d3d224c3192e3a151b52f2dab86ff --- /dev/null +++ b/transcoder/layer_16_width_262k_l0_medium_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 53, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_16_width_65k_l0_small/config.json b/transcoder/layer_16_width_65k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d789b5682025908c1ffb40ef73e1e2e69498bb15 --- /dev/null +++ b/transcoder/layer_16_width_65k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.16.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.16.post_feedforward_layernorm.output", + "width": 65536, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 17, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_31_width_16k_l0_small/config.json b/transcoder/layer_31_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..def1f209b4138eed98d5c84f6ee4da79498a94cf --- /dev/null +++ b/transcoder/layer_31_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_31_width_16k_l0_small_affine/config.json b/transcoder/layer_31_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..819afbb625ce0a2f16d96710e46d50d77b14ef0f --- /dev/null +++ b/transcoder/layer_31_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_31_width_262k_l0_big/config.json b/transcoder/layer_31_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fcd8a647eeb7c6dcdca8fb2e5624c1e606dd1df --- /dev/null +++ b/transcoder/layer_31_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_31_width_262k_l0_big_affine/config.json b/transcoder/layer_31_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89401316579b7d3c88c4c9e39d3151a23a8a320a --- /dev/null +++ b/transcoder/layer_31_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_31_width_262k_l0_medium_affine/config.json b/transcoder/layer_31_width_262k_l0_medium_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..db7fca213b517bde13e7831c3b174f8cc69f5597 --- /dev/null +++ b/transcoder/layer_31_width_262k_l0_medium_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_31_width_262k_l0_small/config.json b/transcoder/layer_31_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7a29d4ef7f68a7797de44b4106492d753f2732ed --- /dev/null +++ b/transcoder/layer_31_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_31_width_262k_l0_small_affine/config.json b/transcoder/layer_31_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe7047b6862c4bda85f8775ed464104b6114bce6 --- /dev/null +++ b/transcoder/layer_31_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_31_width_65k_l0_big/config.json b/transcoder/layer_31_width_65k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8e06f58e5409a486ae6592a495713bbbbff14a87 --- /dev/null +++ b/transcoder/layer_31_width_65k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 65536, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_40_width_16k_l0_big_affine/config.json b/transcoder/layer_40_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea6fb3582e5a12c32e34d4970c5969375f9fe474 --- /dev/null +++ b/transcoder/layer_40_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_40_width_16k_l0_medium/config.json b/transcoder/layer_40_width_16k_l0_medium/config.json new file mode 100644 index 0000000000000000000000000000000000000000..47671fd186a77b519f09f034807b19b35497c2b7 --- /dev/null +++ b/transcoder/layer_40_width_16k_l0_medium/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_40_width_262k_l0_medium/config.json b/transcoder/layer_40_width_262k_l0_medium/config.json new file mode 100644 index 0000000000000000000000000000000000000000..474b4e1fb76a4a90c0e85eeed7df0e5e383ca83f --- /dev/null +++ b/transcoder/layer_40_width_262k_l0_medium/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_40_width_262k_l0_medium_affine/config.json b/transcoder/layer_40_width_262k_l0_medium_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a2a96aadb8438ac74748fc613e85dbf9460869e4 --- /dev/null +++ b/transcoder/layer_40_width_262k_l0_medium_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_53_width_16k_l0_big/config.json b/transcoder/layer_53_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d6c7afbb016a53ebaa47b9574eec7ca4b69151a0 --- /dev/null +++ b/transcoder/layer_53_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_53_width_16k_l0_medium/config.json b/transcoder/layer_53_width_16k_l0_medium/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cd63805ba49f9372a138f21a93cf94117b4d9aa1 --- /dev/null +++ b/transcoder/layer_53_width_16k_l0_medium/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_53_width_16k_l0_small_affine/config.json b/transcoder/layer_53_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2b7f6719362f02c3d29d951e7bf9a92250a6025a --- /dev/null +++ b/transcoder/layer_53_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_53_width_262k_l0_big/config.json b/transcoder/layer_53_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e6140ce4d03660a5f0c7f7099f0664fb2dd71be9 --- /dev/null +++ b/transcoder/layer_53_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_53_width_65k_l0_big/config.json b/transcoder/layer_53_width_65k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aa79d8717eb4f3833468398854fcdf01f9341679 --- /dev/null +++ b/transcoder/layer_53_width_65k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 65536, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 150, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder/layer_53_width_65k_l0_medium_affine/config.json b/transcoder/layer_53_width_65k_l0_medium_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8d4eaafcd2a76fcf0dd1c40eb4acff2715818896 --- /dev/null +++ b/transcoder/layer_53_width_65k_l0_medium_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 65536, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 60, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder/layer_53_width_65k_l0_small/config.json b/transcoder/layer_53_width_65k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc7bfda2611c87240a3e98245724f140071c17d --- /dev/null +++ b/transcoder/layer_53_width_65k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 65536, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_0_width_16k_l0_small/config.json b/transcoder_all/layer_0_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..324a8cf1447098932b2227d986fda9798d0d6bd9 --- /dev/null +++ b/transcoder_all/layer_0_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.0.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.0.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 10, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_16k_l0_big_affine/config.json b/transcoder_all/layer_11_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5d8faa43394a99342f3b80d87e08b750d6196f --- /dev/null +++ b/transcoder_all/layer_11_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 91, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_11_width_262k_l0_small_affine/config.json b/transcoder_all/layer_11_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d72146ef2488a0b789c342efcd2db3a7f676e9b5 --- /dev/null +++ b/transcoder_all/layer_11_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.11.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.11.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 15, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_12_width_262k_l0_big/config.json b/transcoder_all/layer_12_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f632ae4e875056c6713272a97a1983a45e8f7d6 --- /dev/null +++ b/transcoder_all/layer_12_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.12.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.12.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 94, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_13_width_262k_l0_small_affine/config.json b/transcoder_all/layer_13_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..825a622a43d3994b0797a5221c6482db44b9ca37 --- /dev/null +++ b/transcoder_all/layer_13_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.13.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.13.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 16, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_14_width_16k_l0_small/config.json b/transcoder_all/layer_14_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..016664d6c7bb41e7a72571001ce22bf91e5efaf3 --- /dev/null +++ b/transcoder_all/layer_14_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.14.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.14.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 16, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_14_width_16k_l0_small_affine/config.json b/transcoder_all/layer_14_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0e93f5ed2ddcbd69475d0ea9d843872a2e65c6d --- /dev/null +++ b/transcoder_all/layer_14_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.14.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.14.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 16, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_18_width_262k_l0_big/config.json b/transcoder_all/layer_18_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f29482b778603b949021e03aa499b7c94d987f0 --- /dev/null +++ b/transcoder_all/layer_18_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.18.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.18.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 112, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_1_width_16k_l0_big/config.json b/transcoder_all/layer_1_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..091b4be694a38d9cbe55e95b27df7539ed584842 --- /dev/null +++ b/transcoder_all/layer_1_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.1.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.1.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 62, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_1_width_262k_l0_small/config.json b/transcoder_all/layer_1_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d00be62decbe8ef036cbd70b59a2f59ec01758bb --- /dev/null +++ b/transcoder_all/layer_1_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.1.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.1.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 10, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_22_width_16k_l0_small/config.json b/transcoder_all/layer_22_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95eee77bba6d7b980c6e4bc771838c0f6952eb07 --- /dev/null +++ b/transcoder_all/layer_22_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.22.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.22.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_22_width_16k_l0_small_affine/config.json b/transcoder_all/layer_22_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec3fefd9798e56a17effd590001b69b0fd6f41b2 --- /dev/null +++ b/transcoder_all/layer_22_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.22.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.22.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_28_width_262k_l0_big_affine/config.json b/transcoder_all/layer_28_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9bf444b77b157653c18fef0a34a53275b2dd9c1 --- /dev/null +++ b/transcoder_all/layer_28_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.28.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.28.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_29_width_262k_l0_small_affine/config.json b/transcoder_all/layer_29_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fb921a552a12d98dcca3dc83e772445d5acfce51 --- /dev/null +++ b/transcoder_all/layer_29_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.29.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.29.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_31_width_16k_l0_big/config.json b/transcoder_all/layer_31_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2f4cce1f949cfb0be2be653791b5a102e697b12 --- /dev/null +++ b/transcoder_all/layer_31_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.31.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.31.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_32_width_16k_l0_small_affine/config.json b/transcoder_all/layer_32_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4a7855ceb8df9059f76ef39d0385fedf8eb2f021 --- /dev/null +++ b/transcoder_all/layer_32_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.32.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.32.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_35_width_262k_l0_small_affine/config.json b/transcoder_all/layer_35_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1e45b84fd1bcbe8665478fe0ed6e42b4987b9838 --- /dev/null +++ b/transcoder_all/layer_35_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.35.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.35.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_36_width_16k_l0_small_affine/config.json b/transcoder_all/layer_36_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..613254f77400e85b4cf0de257d05b3cae1d6864a --- /dev/null +++ b/transcoder_all/layer_36_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.36.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.36.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_36_width_262k_l0_small/config.json b/transcoder_all/layer_36_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aadda7fbbdf56e388a38c305110aefb67d9818fc --- /dev/null +++ b/transcoder_all/layer_36_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.36.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.36.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_3_width_16k_l0_big/config.json b/transcoder_all/layer_3_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66b6503a19eea6c7bb0660e7f59fa50db3c21961 --- /dev/null +++ b/transcoder_all/layer_3_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.3.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.3.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 68, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_40_width_16k_l0_small/config.json b/transcoder_all/layer_40_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..956cc88153e2c70c69b69df706839c223727c507 --- /dev/null +++ b/transcoder_all/layer_40_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_40_width_16k_l0_small_affine/config.json b/transcoder_all/layer_40_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5adf97fc746aa68315a5341d5412cffadf5a5814 --- /dev/null +++ b/transcoder_all/layer_40_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_40_width_262k_l0_big/config.json b/transcoder_all/layer_40_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..84340c048e007b7621f3c3feaf7f38156ef9c415 --- /dev/null +++ b/transcoder_all/layer_40_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.40.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.40.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_41_width_262k_l0_small/config.json b/transcoder_all/layer_41_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fc92ac39aa217d1a4c6724365aea403573cd835a --- /dev/null +++ b/transcoder_all/layer_41_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.41.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.41.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_42_width_16k_l0_small/config.json b/transcoder_all/layer_42_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddaeda1e8cbff6014f31e140e6ad629d52ec4929 --- /dev/null +++ b/transcoder_all/layer_42_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.42.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.42.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_44_width_262k_l0_big_affine/config.json b/transcoder_all/layer_44_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..80b6b3bc39193c7499988714df46766ecf484092 --- /dev/null +++ b/transcoder_all/layer_44_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.44.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.44.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_45_width_16k_l0_big_affine/config.json b/transcoder_all/layer_45_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d944fac33267b8ed72bf90ef1878086c15a0a8 --- /dev/null +++ b/transcoder_all/layer_45_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.45.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.45.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_46_width_16k_l0_big/config.json b/transcoder_all/layer_46_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..346d0b5e7ad21e5c69dd6458a758f004e1742535 --- /dev/null +++ b/transcoder_all/layer_46_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.46.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.46.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_46_width_16k_l0_big_affine/config.json b/transcoder_all/layer_46_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..324c475af09eec7db6a43e769c9b3634c5ef2906 --- /dev/null +++ b/transcoder_all/layer_46_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.46.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.46.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_46_width_16k_l0_small_affine/config.json b/transcoder_all/layer_46_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1d1b6fdb1c40c09a78632a58c2cad4911c3f4169 --- /dev/null +++ b/transcoder_all/layer_46_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.46.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.46.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_46_width_262k_l0_small/config.json b/transcoder_all/layer_46_width_262k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a43f4b0ede299c56cee05d1213fc2a2ff7ffeb6e --- /dev/null +++ b/transcoder_all/layer_46_width_262k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.46.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.46.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_49_width_16k_l0_big/config.json b/transcoder_all/layer_49_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0fa6bbdde6754ab3ad1f8dca4b02302ca4b853ad --- /dev/null +++ b/transcoder_all/layer_49_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.49.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.49.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_50_width_262k_l0_big_affine/config.json b/transcoder_all/layer_50_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..24f34f9c756187d2570fa0beef7ad4e4a31a4425 --- /dev/null +++ b/transcoder_all/layer_50_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.50.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.50.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_51_width_16k_l0_small/config.json b/transcoder_all/layer_51_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..572b5bd0cbee48eb8bd220b343392b928a01fde8 --- /dev/null +++ b/transcoder_all/layer_51_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.51.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.51.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_51_width_262k_l0_big_affine/config.json b/transcoder_all/layer_51_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..89acd293c80f8094b6d00d3696b8bb31b9d7653c --- /dev/null +++ b/transcoder_all/layer_51_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.51.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.51.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_51_width_262k_l0_small_affine/config.json b/transcoder_all/layer_51_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5cd18580a118e9496e165732e78561c8ec1f281f --- /dev/null +++ b/transcoder_all/layer_51_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.51.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.51.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_52_width_262k_l0_big/config.json b/transcoder_all/layer_52_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f47bba09c967717485e3d3c90f4378d624a77eb --- /dev/null +++ b/transcoder_all/layer_52_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.52.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.52.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_53_width_16k_l0_big_affine/config.json b/transcoder_all/layer_53_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e57aeafff11664702463732f88872b79c7cb149c --- /dev/null +++ b/transcoder_all/layer_53_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.53.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.53.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_54_width_262k_l0_big/config.json b/transcoder_all/layer_54_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f10b0af60a53a6fcad0efd0c6bd9f27b1593d1b4 --- /dev/null +++ b/transcoder_all/layer_54_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.54.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.54.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_56_width_16k_l0_small/config.json b/transcoder_all/layer_56_width_16k_l0_small/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e92e0569eb323655b9d5d05635bc69c83a9855d --- /dev/null +++ b/transcoder_all/layer_56_width_16k_l0_small/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.56.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.56.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_59_width_16k_l0_big_affine/config.json b/transcoder_all/layer_59_width_16k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8cec1a45c2e9e4493e4890901e75867b52b5c4 --- /dev/null +++ b/transcoder_all/layer_59_width_16k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.59.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.59.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_59_width_262k_l0_small_affine/config.json b/transcoder_all/layer_59_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e70cdbc779d20ae9e2fb56e319576c808514ab2d --- /dev/null +++ b/transcoder_all/layer_59_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.59.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.59.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_5_width_16k_l0_small_affine/config.json b/transcoder_all/layer_5_width_16k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2bc805c793c75def5fe202f23f47b0871e17e96f --- /dev/null +++ b/transcoder_all/layer_5_width_16k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.5.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.5.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 12, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_60_width_262k_l0_big/config.json b/transcoder_all/layer_60_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..35ed2ed0cbbc03b2c7350dcc5c30a703af1cbb08 --- /dev/null +++ b/transcoder_all/layer_60_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.60.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.60.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_60_width_262k_l0_big/params.safetensors b/transcoder_all/layer_60_width_262k_l0_big/params.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/transcoder_all/layer_61_width_262k_l0_big_affine/config.json b/transcoder_all/layer_61_width_262k_l0_big_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2051a9e79d0da6aabb4ac0326bc2fbc4056dbbf0 --- /dev/null +++ b/transcoder_all/layer_61_width_262k_l0_big_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.61.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.61.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 120, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_61_width_262k_l0_big_affine/params.safetensors b/transcoder_all/layer_61_width_262k_l0_big_affine/params.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/transcoder_all/layer_61_width_262k_l0_small_affine/config.json b/transcoder_all/layer_61_width_262k_l0_small_affine/config.json new file mode 100644 index 0000000000000000000000000000000000000000..78939884b54f726f41232d1346dac2d412c0ae7a --- /dev/null +++ b/transcoder_all/layer_61_width_262k_l0_small_affine/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.61.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.61.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 20, + "affine_connection": true +} \ No newline at end of file diff --git a/transcoder_all/layer_61_width_262k_l0_small_affine/params.safetensors b/transcoder_all/layer_61_width_262k_l0_small_affine/params.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/transcoder_all/layer_9_width_16k_l0_big/config.json b/transcoder_all/layer_9_width_16k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd9f83c48e85af954cf34fe7ef420585ddc4b1c2 --- /dev/null +++ b/transcoder_all/layer_9_width_16k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 16384, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 86, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_16k_l0_big/params.safetensors b/transcoder_all/layer_9_width_16k_l0_big/params.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/transcoder_all/layer_9_width_262k_l0_big/config.json b/transcoder_all/layer_9_width_262k_l0_big/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e650ca6d67c37bba0bd908191fe9f3d7d628587 --- /dev/null +++ b/transcoder_all/layer_9_width_262k_l0_big/config.json @@ -0,0 +1,9 @@ +{ + "hf_hook_point_in": "model.layers.9.pre_feedforward_layernorm.output", + "hf_hook_point_out": "model.layers.9.post_feedforward_layernorm.output", + "width": 262144, + "model_name": "gemma-v3-27b-it", + "architecture": "jump_relu", + "l0": 86, + "affine_connection": false +} \ No newline at end of file diff --git a/transcoder_all/layer_9_width_262k_l0_big/params.safetensors b/transcoder_all/layer_9_width_262k_l0_big/params.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391