michaelwaves commited on 8 days ago

Commit

b9e1f68

verified ·

1 Parent(s): 1f792da

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

resid_post_layer_14/trainer_0/ae.pt +3 -0
resid_post_layer_14/trainer_0/config.json +53 -0
resid_post_layer_14/trainer_0/eval_results.json +1 -0
resid_post_layer_14/trainer_1/config.json +53 -0
resid_post_layer_14/trainer_1/eval_results.json +1 -0
resid_post_layer_14/trainer_2/config.json +53 -0
resid_post_layer_14/trainer_2/eval_results.json +1 -0
resid_post_layer_14/trainer_3/config.json +53 -0
resid_post_layer_14/trainer_3/eval_results.json +1 -0
resid_post_layer_21/trainer_0/ae.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_0.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_122.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_1220.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_12207.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_386.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_3860.pt +3 -0
resid_post_layer_21/trainer_0/checkpoints/ae_38601.pt +3 -0
resid_post_layer_21/trainer_0/config.json +53 -0
resid_post_layer_21/trainer_0/eval_results.json +1 -0
resid_post_layer_21/trainer_1/ae.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_0.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_122.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_1220.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_12207.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_3860.pt +3 -0
resid_post_layer_21/trainer_1/checkpoints/ae_38601.pt +3 -0
resid_post_layer_21/trainer_1/config.json +53 -0
resid_post_layer_21/trainer_1/eval_results.json +1 -0
resid_post_layer_21/trainer_2/ae.pt +3 -0
resid_post_layer_21/trainer_2/checkpoints/ae_386.pt +3 -0
resid_post_layer_21/trainer_2/config.json +53 -0
resid_post_layer_21/trainer_2/eval_results.json +1 -0
resid_post_layer_21/trainer_3/ae.pt +3 -0
resid_post_layer_21/trainer_3/checkpoints/ae_0.pt +3 -0
resid_post_layer_21/trainer_3/checkpoints/ae_12207.pt +3 -0
resid_post_layer_21/trainer_3/checkpoints/ae_386.pt +3 -0
resid_post_layer_21/trainer_3/config.json +53 -0
resid_post_layer_21/trainer_3/eval_results.json +1 -0
resid_post_layer_7/trainer_0/ae.pt +3 -0
resid_post_layer_7/trainer_0/config.json +53 -0
resid_post_layer_7/trainer_0/eval_results.json +1 -0
resid_post_layer_7/trainer_1/ae.pt +3 -0
resid_post_layer_7/trainer_1/config.json +53 -0
resid_post_layer_7/trainer_1/eval_results.json +1 -0
resid_post_layer_7/trainer_2/ae.pt +3 -0
resid_post_layer_7/trainer_2/config.json +53 -0
resid_post_layer_7/trainer_2/eval_results.json +1 -0
resid_post_layer_7/trainer_3/ae.pt +3 -0
resid_post_layer_7/trainer_3/config.json +53 -0
resid_post_layer_7/trainer_3/eval_results.json +1 -0

resid_post_layer_14/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e972e49426cc4ad5b47e819305b259747544401e9eab036c195e5925a037589
+size 335622413

resid_post_layer_14/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 14,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_14_trainer_0",
+        "submodule_name": "resid_post_layer_14"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_14/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 33.73863636363637, "l1_loss": 367.6060606060606, "l0": 86.32287019671816, "frac_variance_explained": 0.7295217803030303, "cossim": 0.94140625, "l2_ratio": 0.949514678030303, "relative_reconstruction_bias": 1.01171875, "frac_alive": 0.99676513671875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_14/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 14,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_14_trainer_1",
+        "submodule_name": "resid_post_layer_14"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_14/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 30.748106060606062, "l1_loss": 656.2424242424242, "l0": 178.7910262599136, "frac_variance_explained": 0.775094696969697, "cossim": 0.952829071969697, "l2_ratio": 0.9609375, "relative_reconstruction_bias": 1.011600378787879, "frac_alive": 0.99554443359375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_14/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 14,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_14_trainer_2",
+        "submodule_name": "resid_post_layer_14"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_14/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 31.926136363636363, "l1_loss": 346.90909090909093, "l0": 87.18260990489613, "frac_variance_explained": 0.7577533143939394, "cossim": 0.9484493371212122, "l2_ratio": 0.9568536931818182, "relative_reconstruction_bias": 1.0106534090909092, "frac_alive": 0.83673095703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_14/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 14,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_14_trainer_3",
+        "submodule_name": "resid_post_layer_14"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_14/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 28.75, "l1_loss": 598.8484848484849, "l0": 179.98043915719697, "frac_variance_explained": 0.8037405303030303, "cossim": 0.95703125, "l2_ratio": 0.96484375, "relative_reconstruction_bias": 1.0095880681818181, "frac_alive": 0.770263671875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_21/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:165e90ed2f6253da543c03a24825726264d22e8ad92176ec4ad4dbdb3945270a
+size 335622413

resid_post_layer_21/trainer_0/checkpoints/ae_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5c244af1bb6fdc5168d5a0fba14e47f0d7966ef4d7cec1675ee697750f66f27
+size 335622563

resid_post_layer_21/trainer_0/checkpoints/ae_122.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:945e387769f7beda9be7b0c9cdd696b2e2abefecece8392a1b4c2e80084442bf
+size 335622585

resid_post_layer_21/trainer_0/checkpoints/ae_1220.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39e6bc706f6b8e964840095580947f17a6bcede000021a3c33cf2aa3cee1936e
+size 335622596

resid_post_layer_21/trainer_0/checkpoints/ae_12207.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5c625417e79276c56a1f143346a90a28a0435662bcb997db3b56bebbc398265
+size 335622863

resid_post_layer_21/trainer_0/checkpoints/ae_386.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7a2b19e91d0bd6e2289554404ee0fb86d26fc506de8b4e8a2ec3a3e7d79c9f1
+size 335622585

resid_post_layer_21/trainer_0/checkpoints/ae_3860.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:391cd1f61d02cec575cfbdef45413d669a62933edec176af68033e87692dd893
+size 335622596

resid_post_layer_21/trainer_0/checkpoints/ae_38601.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b619b9fd567aedb2aa09fb46afcf1a641e6220be5d2deb316ebe807ddf1dfd
+size 335622863

resid_post_layer_21/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 21,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_21_trainer_0",
+        "submodule_name": "resid_post_layer_21"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_21/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 29.035984848484848, "l1_loss": 389.1212121212121, "l0": 86.1897815357555, "frac_variance_explained": 0.8267045454545454, "cossim": 0.9356060606060606, "l2_ratio": 0.9453125, "relative_reconstruction_bias": 1.0080492424242424, "frac_alive": 0.8565673828125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_21/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:10a23c9b3a00ddd46fb78590c3d00ea4c61ef652f192a73f741b9c590251d5b1
+size 335622413

resid_post_layer_21/trainer_1/checkpoints/ae_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:691f9ddf883175ebdcf92281a5788e1d56f11403bfd15152a0d8fa77ecf2ab1c
+size 335622563

resid_post_layer_21/trainer_1/checkpoints/ae_122.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:249fb4fd697ed16d6fc7899de500c9ffedc26d111999ec7cb55a1bd4f7a6967c
+size 335622585

resid_post_layer_21/trainer_1/checkpoints/ae_1220.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c82d644ef2719a67149ff1a159f8ad31044dcbcd85b7f6cb541e56bfc5d2d2b8
+size 335622596

resid_post_layer_21/trainer_1/checkpoints/ae_12207.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3c433a364c0e7c8971a486bd3128c869bb4b2195dbb95fea4d536b0a73dc324
+size 335622863

resid_post_layer_21/trainer_1/checkpoints/ae_3860.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1120734e71d54fb0babac9283086f3f8b898adfb9944a023fb75017d453b4190
+size 335622596

resid_post_layer_21/trainer_1/checkpoints/ae_38601.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b816102b2ae8f3d93b3f98fb1bf0f5c40c9b80c838df83c1c0df35492b2ebc
+size 335622863

resid_post_layer_21/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 21,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_21_trainer_1",
+        "submodule_name": "resid_post_layer_21"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_21/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 25.890151515151516, "l1_loss": 713.7575757575758, "l0": 172.76931184710878, "frac_variance_explained": 0.8618016098484849, "cossim": 0.94921875, "l2_ratio": 0.95703125, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.82403564453125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_21/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4dd3747bb0722cf29b80d44490af87fcebe5b400c9d4e855a9c67eb3b4bf9888
+size 1342451981

resid_post_layer_21/trainer_2/checkpoints/ae_386.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93c79e12e7bc1bccdb7033bc45f1e92f6f3f607c86ac462002106c8318347b7f
+size 1342452153

resid_post_layer_21/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 21,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_21_trainer_2",
+        "submodule_name": "resid_post_layer_21"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_21/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 27.90340909090909, "l1_loss": 354.72727272727275, "l0": 86.3529703544848, "frac_variance_explained": 0.8391335227272727, "cossim": 0.94140625, "l2_ratio": 0.9491595643939394, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.7160797119140625, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_21/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0823518391306a0c4b7854174b77165f825a8c4bc0244b554a4b56df0395dd06
+size 1342451981

resid_post_layer_21/trainer_3/checkpoints/ae_0.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:125f43f916a01a1c9fd3ae8bd241c185076396658210163cfa52396424584ccd
+size 1342452131

resid_post_layer_21/trainer_3/checkpoints/ae_12207.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94eadcff4ef5be95da7aae55a5c422da6108b63161e37bd828159e2c12b6cc3c
+size 1342452431

resid_post_layer_21/trainer_3/checkpoints/ae_386.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fad19132d5b66d0c5359ee4a5bcf115726d4a4775034c346c428a8b03dc2a074
+size 1342452153

resid_post_layer_21/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 21,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_21_trainer_3",
+        "submodule_name": "resid_post_layer_21"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_21/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 24.477272727272727, "l1_loss": 605.030303030303, "l0": 172.87360659512606, "frac_variance_explained": 0.8756510416666666, "cossim": 0.9533617424242424, "l2_ratio": 0.9609375, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.713165283203125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_7/trainer_0/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33d9a3668cb511c09f22665184e6558d6e8504501d3f4f12b2e96486a51cac5f
+size 335622413

resid_post_layer_7/trainer_0/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 7,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_7_trainer_0",
+        "submodule_name": "resid_post_layer_7"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_7/trainer_0/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 24.009469696969695, "l1_loss": 307.54545454545456, "l0": 92.451904296875, "frac_variance_explained": 0.7856889204545454, "cossim": 0.9562618371212122, "l2_ratio": 0.96484375, "relative_reconstruction_bias": 1.007930871212121, "frac_alive": 0.98370361328125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_7/trainer_1/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21984cbf97693658675bddcdaec65426aa20d49bd7926914339f13f5e53d2827
+size 335622413

resid_post_layer_7/trainer_1/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 16384,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            512,
+            1024,
+            2048,
+            4096,
+            8704
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 7,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_7_trainer_1",
+        "submodule_name": "resid_post_layer_7"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_7/trainer_1/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 21.954545454545453, "l1_loss": 509.8181818181818, "l0": 185.27754905007103, "frac_variance_explained": 0.8208451704545454, "cossim": 0.9637784090909091, "l2_ratio": 0.97265625, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.9376220703125, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_7/trainer_2/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89b5ea3fe385ce68e9e80924aa94b1f17f248bf0f5aa76e58c491fdd5d82a289
+size 1342451981

resid_post_layer_7/trainer_2/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 80,
+        "device": "cuda:0",
+        "layer": 7,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_7_trainer_2",
+        "submodule_name": "resid_post_layer_7"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_7/trainer_2/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 22.15530303030303, "l1_loss": 281.45454545454544, "l0": 93.40012440536961, "frac_variance_explained": 0.8163470643939394, "cossim": 0.9609375, "l2_ratio": 0.96875, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.830413818359375, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}

resid_post_layer_7/trainer_3/ae.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7c4889199febcdc335303bd392d5e416345715f6535f462bbd85db7ef69366
+size 1342451981

resid_post_layer_7/trainer_3/config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "trainer_class": "MatryoshkaBatchTopKTrainer",
+        "dict_class": "MatryoshkaBatchTopKSAE",
+        "lr": 5e-05,
+        "steps": 122070,
+        "auxk_alpha": 0.03125,
+        "warmup_steps": 1000,
+        "decay_start": 97656,
+        "threshold_beta": 0.999,
+        "threshold_start_step": 1000,
+        "top_k_aux": 1280,
+        "seed": 0,
+        "activation_dim": 2560,
+        "dict_size": 65536,
+        "group_fractions": [
+            0.03125,
+            0.0625,
+            0.125,
+            0.25,
+            0.53125
+        ],
+        "group_weights": [
+            0.2,
+            0.2,
+            0.2,
+            0.2,
+            0.2
+        ],
+        "group_sizes": [
+            2048,
+            4096,
+            8192,
+            16384,
+            34816
+        ],
+        "k": 160,
+        "device": "cuda:0",
+        "layer": 7,
+        "lm_name": "google/gemma-4-E4B",
+        "wandb_name": "MatryoshkaBatchTopKTrainer-google/gemma-4-E4B-resid_post_layer_7_trainer_3",
+        "submodule_name": "resid_post_layer_7"
+    },
+    "buffer": {
+        "d_submodule": 2560,
+        "io": "out",
+        "n_ctxs": 122,
+        "ctx_len": 2048,
+        "refresh_batch_size": 16,
+        "out_batch_size": 2048,
+        "device": "cuda:0"
+    }
+}

resid_post_layer_7/trainer_3/eval_results.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"l2_loss": 19.914772727272727, "l1_loss": 427.3939393939394, "l0": 188.28258190733013, "frac_variance_explained": 0.8515625, "cossim": 0.96875, "l2_ratio": 0.9765625, "relative_reconstruction_bias": 1.0078125, "frac_alive": 0.8144989013671875, "hyperparameters": {"n_inputs": 200, "context_length": 2048}}