{ "release": "parameter-trajectory-crosscoders", "paper": "Learning to Read Out: Unembedding Dynamics in Language Model Pretraining", "code": "https://github.com/hematteo/learning-to-read-out", "loading_recipe": "from safetensors.torch import safe_open; f = safe_open(path, framework='pt'); W_D = f.get_tensor('W_D'); thr = f.get_tensor('activation_function.log_jumprelu_threshold').exp() # or use src.crosscoder.checkpoints.load_checkpoint from the code repo", "checkpoints": [ { "path": "olmo-2-7b/W_U/cross-snapshot-32/d32768/seed0.safetensors", "model_name": "allenai/OLMo-2-1124-7B", "model": "olmo-2-7b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 32768, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "OLMo-2-7B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0", "quality": { "explained_variance": 0.8526035831389717, "mean_l0": 557.2795322963169, "dead_rate": 0.0 }, "training": { "lr": 1e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d24576/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 24576, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=24576, seed 0", "quality": { "explained_variance": 0.8305442370487662, "mean_l0": 117.50901977539063, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.5807971126242004, "mean_l0": 82.08435302734375 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed1.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 1, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 1", "quality": { "explained_variance": 0.580104140268709, "mean_l0": 82.01199279785156, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed2.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 2, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 2", "quality": { "explained_variance": 0.5815380601296635, "mean_l0": 82.31076721191407, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed3.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 3, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 3", "quality": { "explained_variance": 0.5805984141666259, "mean_l0": 82.13521545410157, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed4.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_E", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 4, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 4", "quality": { "explained_variance": 0.5825219599393371, "mean_l0": 82.5084307861328, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/architecture-comparison/d8192/batchtopk/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "architecture-comparison/d8192", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "batchtopk", "public_label": "Pythia-160M W_U crosscoder (batchtopk activation), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.7249162974116965, "mean_l0": 203.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/architecture-comparison/d8192/gated/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "architecture-comparison/d8192", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "gated", "public_label": "Pythia-160M W_U crosscoder (gated activation), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.21442979106557436, "mean_l0": 11.51853515625 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/architecture-comparison/d8192/gated-retuned/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "architecture-comparison/d8192", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "gated", "public_label": "Pythia-160M W_U crosscoder (Gated activation, L1=0.05), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.827462127951851, "mean_l0": 653.64568359375 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-16/d8192/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-16", "d_sae": 8192, "seed": 0, "n_snapshots": 16, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 16-snapshot downsample, d_sae=8192, seed 0", "quality": { "explained_variance": 0.7734329300485889, "mean_l0": 215.73649780273436, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d16384/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 16384, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=16384, seed 0", "quality": { "explained_variance": 0.7802076016978367, "mean_l0": 103.0708544921875 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 24576, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 0", "quality": { "explained_variance": 0.9196719747097695, "mean_l0": 285.9946594238281, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed1.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 24576, "seed": 1, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 1", "quality": { "explained_variance": 0.9195844996740984, "mean_l0": 286.09990661621094, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed2.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 24576, "seed": 2, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 2", "quality": { "explained_variance": 0.9196765967343927, "mean_l0": 286.3446789550781, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.7763740924044573, "mean_l0": 203.14466369628906 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed1.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 1, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 1", "quality": { "explained_variance": 0.7763600972452054, "mean_l0": 203.26546752929687 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed2.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 2, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 2", "quality": { "explained_variance": 0.7764523546944131, "mean_l0": 203.14273071289062 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed3.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 3, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 3", "quality": { "explained_variance": 0.7762290236129058, "mean_l0": 203.09911560058595 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed4.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 4, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 4", "quality": { "explained_variance": 0.7765168883427946, "mean_l0": 203.4711474609375 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/final-snapshot-saes/d16384.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "final-snapshot-saes", "d_sae": 16384, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=16384", "quality": { "explained_variance": 0.8703331309369664, "mean_l0": 1913.29353515625 }, "training": { "lr": 5e-05, "l1_coefficient": 0.05, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/final-snapshot-saes/d32768.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "final-snapshot-saes", "d_sae": 32768, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=32768", "quality": { "explained_variance": 0.926354609914279, "mean_l0": 3410.36810546875 }, "training": { "lr": 5e-05, "l1_coefficient": 0.05, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/final-snapshot-saes/d6144.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "final-snapshot-saes", "d_sae": 6144, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=6144", "quality": { "explained_variance": 0.7649826125102704, "mean_l0": 861.8197265625 }, "training": { "lr": 5e-05, "l1_coefficient": 0.05, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/final-snapshot-saes/d65536.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "final-snapshot-saes", "d_sae": 65536, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=65536", "quality": { "explained_variance": 0.9642323011052749, "mean_l0": 5943.19078125 }, "training": { "lr": 5e-05, "l1_coefficient": 0.05, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/final-snapshot-saes/d8192.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "final-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=8192", "quality": { "explained_variance": 0.7989936757858394, "mean_l0": 1084.4333984375 }, "training": { "lr": 5e-05, "l1_coefficient": 0.05, "batch_size": 1024, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam0p40_seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=0.4), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.7479858952216084, "mean_l0": 160.0768243408203, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.4, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p00_seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.0), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.6323049070911215, "mean_l0": 58.49865539550781, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.0, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p20_seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.2), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.6027639222075235, "mean_l0": 45.41338317871094, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.2, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.5819729868726617, "mean_l0": 37.971342163085936, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.35, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed1.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 1, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 1", "quality": { "explained_variance": 0.581652004193779, "mean_l0": 37.79685302734375, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.35, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed2.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 2, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 2", "quality": { "explained_variance": 0.582366028895355, "mean_l0": 37.980972900390626, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.35, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p80_seed0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "lambda-sweep", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.8), 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.527703732469858, "mean_l0": 23.033436279296875, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 1.8, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step0.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 0, d_sae=8192", "quality": { "explained_variance": 0.7288661622516968, "mean_l0": 1150.079375, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 1, d_sae=8192", "quality": { "explained_variance": 0.7288661622516968, "mean_l0": 1150.079375, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 1000, d_sae=8192", "quality": { "explained_variance": 0.7862491472291859, "mean_l0": 996.89369140625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step102000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 102000, d_sae=8192", "quality": { "explained_variance": 0.7998127382951936, "mean_l0": 983.041015625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step116000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 116000, d_sae=8192", "quality": { "explained_variance": 0.8120686966847851, "mean_l0": 957.6841577148438, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step128.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 128, d_sae=8192", "quality": { "explained_variance": 0.7290331109474104, "mean_l0": 1149.5578076171876, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step130000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 130000, d_sae=8192", "quality": { "explained_variance": 0.8195671283611181, "mean_l0": 940.0407983398437, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step14000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 14000, d_sae=8192", "quality": { "explained_variance": 0.7912227686750525, "mean_l0": 996.466455078125, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step143000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 143000, d_sae=8192", "quality": { "explained_variance": 0.8244679775563315, "mean_l0": 924.0657104492187, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step16.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 16, d_sae=8192", "quality": { "explained_variance": 0.7288671871864183, "mean_l0": 1150.0855908203125, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 2, d_sae=8192", "quality": { "explained_variance": 0.72886850366392, "mean_l0": 1150.0986572265624, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 2000, d_sae=8192", "quality": { "explained_variance": 0.7992791212506758, "mean_l0": 969.1734448242188, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step21000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 21000, d_sae=8192", "quality": { "explained_variance": 0.7907924441773572, "mean_l0": 998.4695678710938, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step256.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 256, d_sae=8192", "quality": { "explained_variance": 0.7317386038736777, "mean_l0": 1142.36794921875, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step27000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 27000, d_sae=8192", "quality": { "explained_variance": 0.7907578681617007, "mean_l0": 999.3178100585938, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step3000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 3000, d_sae=8192", "quality": { "explained_variance": 0.7993412040992963, "mean_l0": 972.4029614257812, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step32.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 32, d_sae=8192", "quality": { "explained_variance": 0.7288681571269175, "mean_l0": 1150.0745947265625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step34000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 34000, d_sae=8192", "quality": { "explained_variance": 0.7906181564574442, "mean_l0": 999.5078881835938, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 4, d_sae=8192", "quality": { "explained_variance": 0.7288676993559831, "mean_l0": 1150.087177734375, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 4000, d_sae=8192", "quality": { "explained_variance": 0.7977749784323522, "mean_l0": 977.3629370117187, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step47000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 47000, d_sae=8192", "quality": { "explained_variance": 0.790620026518176, "mean_l0": 1000.2795385742188, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step5000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 5000, d_sae=8192", "quality": { "explained_variance": 0.7960998419245482, "mean_l0": 981.8336376953125, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step512.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 512, d_sae=8192", "quality": { "explained_variance": 0.7542853862655656, "mean_l0": 1086.609990234375, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step6000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 6000, d_sae=8192", "quality": { "explained_variance": 0.7947783648659816, "mean_l0": 985.470498046875, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step61000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 61000, d_sae=8192", "quality": { "explained_variance": 0.7904375219119095, "mean_l0": 1001.8048901367188, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step64.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 64, d_sae=8192", "quality": { "explained_variance": 0.728867046997959, "mean_l0": 1150.0748681640625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step7000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 7000, d_sae=8192", "quality": { "explained_variance": 0.7938394341974695, "mean_l0": 988.107509765625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step75000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 75000, d_sae=8192", "quality": { "explained_variance": 0.790395516481567, "mean_l0": 1004.148408203125, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 8, d_sae=8192", "quality": { "explained_variance": 0.7288677026395104, "mean_l0": 1150.0938623046875, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 8000, d_sae=8192", "quality": { "explained_variance": 0.7931325520036884, "mean_l0": 990.1047631835937, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step89000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 89000, d_sae=8192", "quality": { "explained_variance": 0.7924073402250866, "mean_l0": 1001.9916259765625, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step9000.safetensors", "model_name": "EleutherAI/pythia-160m", "model": "pythia-160m", "matrix": "W_U", "kind": "per-snapshot-saes", "d_sae": 8192, "seed": 0, "n_snapshots": 1, "arch": "jumprelu-crosscoder", "public_label": "Pythia-160M W_U per-snapshot SAE at training step 9000, d_sae=8192", "quality": { "explained_variance": 0.7924251809723794, "mean_l0": 992.0130297851563, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 2048, "n_epochs": 100, "input_preprocess": "center_scale" } }, { "path": "pythia-1b/W_U/cross-snapshot-32/d16384/seed0.safetensors", "model_name": "EleutherAI/pythia-1b", "model": "pythia-1b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 16384, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=16384, seed 0", "quality": { "explained_variance": 0.7812400637275599, "mean_l0": 499.1909545898437, "dead_rate": 0.0 }, "training": { "lr": 2e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-1b/W_U/cross-snapshot-32/d24576/seed0.safetensors", "model_name": "EleutherAI/pythia-1b", "model": "pythia-1b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 24576, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=24576, seed 0", "quality": { "explained_variance": 0.861217602606241, "mean_l0": 517.0205554199218, "dead_rate": 0.0 }, "training": { "lr": 2e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-1b/W_U/cross-snapshot-32/d8192/seed0.safetensors", "model_name": "EleutherAI/pythia-1b", "model": "pythia-1b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 8192, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=8192, seed 0", "quality": { "explained_variance": 0.628030172619609, "mean_l0": 374.1408770751953, "dead_rate": 0.0 }, "training": { "lr": 2e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-1b/W_U/cross-snapshot-32-matched-window/d24576/seed0.safetensors", "model_name": "EleutherAI/pythia-1b", "model": "pythia-1b", "matrix": "W_U", "kind": "cross-snapshot-32-matched-window", "d_sae": 24576, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-1B W_U crosscoder, OLMo-matched late-start 32-snapshot window, d_sae=24576, seed 0", "quality": { "explained_variance": 0.8844067825487556, "mean_l0": 263.98009787989025, "dead_rate": 0.0 }, "training": { "lr": 5e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0-sparse.safetensors", "model_name": "EleutherAI/pythia-6.9b", "model": "pythia-6.9b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 32768, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0, selected sparse run (lambda=0.6)", "quality": { "explained_variance": 0.8080785315044828, "mean_l0": 741.8141662287833, "dead_rate": 0.0 }, "training": { "lr": 1e-05, "l1_coefficient": 0.6, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } }, { "path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0.safetensors", "model_name": "EleutherAI/pythia-6.9b", "model": "pythia-6.9b", "matrix": "W_U", "kind": "cross-snapshot-32", "d_sae": 32768, "seed": 0, "n_snapshots": 32, "arch": "jumprelu-crosscoder", "public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0", "quality": { "explained_variance": 0.8331843988901371, "mean_l0": 1956.59887943171, "dead_rate": 0.0 }, "training": { "lr": 1e-05, "l1_coefficient": 0.3, "batch_size": 1024, "n_epochs": 300, "input_preprocess": "center_scale" } } ], "auxiliary": { "derived/aggregates": [ "aggregates_dsae16384_seed0.pt", "aggregates_dsae24576_seed0.pt", "aggregates_dsae8192_seed0.pt", "aggregates_pythia-160m_d24576_seed0.pt", "aggregates_pythia-1b_d16384_seed0.pt", "aggregates_pythia-1b_d24576_seed0.pt", "aggregates_pythia-1b_d8192_seed0.pt", "aggregates_pythia-6.9b_d32768_seed0.pt" ], "derived/rates": [ "we-d24576/we_cc_dsae24576_seed0_norms.npy", "we-d24576/we_rates_dsae24576_seed0.pt", "we-d8192-multiseed/we_cc_dsae8192_seed0_norms.npy", "we-d8192-multiseed/we_cc_dsae8192_seed1_norms.npy", "we-d8192-multiseed/we_cc_dsae8192_seed2_norms.npy", "we-d8192-multiseed/we_cc_dsae8192_seed3_norms.npy", "we-d8192-multiseed/we_cc_dsae8192_seed4_norms.npy", "we-d8192-multiseed/we_rates_dsae8192_seed0.pt", "we-d8192-multiseed/we_rates_dsae8192_seed1.pt", "we-d8192-multiseed/we_rates_dsae8192_seed2.pt", "we-d8192-multiseed/we_rates_dsae8192_seed3.pt", "we-d8192-multiseed/we_rates_dsae8192_seed4.pt", "wu-1b-d24576/decoder_norms_dsae24576_seed0.npy", "wu-d24576-multiseed/decoder_norms_dsae24576_seed0.npy", "wu-d24576-multiseed/decoder_norms_dsae24576_seed1.npy", "wu-d24576-multiseed/decoder_norms_dsae24576_seed2.npy", "wu-d24576-multiseed/wu_cc_dsae24576_seed0_norms.npy", "wu-d24576-multiseed/wu_cc_dsae24576_seed1_norms.npy", "wu-d24576-multiseed/wu_cc_dsae24576_seed2_norms.npy", "wu-d24576-multiseed/wu_rates_dsae24576_seed0.pt", "wu-d24576-multiseed/wu_rates_dsae24576_seed1.pt", "wu-d24576-multiseed/wu_rates_dsae24576_seed2.pt", "wu-d8192-cs16/wu_cc_dsae8192_seed0_norms.npy", "wu-d8192-cs16/wu_rates_dsae8192_seed0.pt", "wu-d8192-multiseed/decoder_norms_all_seeds.npy", "wu-d8192-multiseed/firing_rates_all_seeds.npy", "wu-d8192-persnap/train.log" ], "evaluation": [ "eval-corpus/README.md", "eval-corpus/eval_tokens.pt" ], "attribution": [ "pythia-160m/induction-results.pt", "pythia-160m/induction-verdict.json", "pythia-160m/ioi-results.pt", "pythia-160m/ioi-verdict.json", "pythia-160m/sva-results.pt", "pythia-160m/sva-verdict.json" ] } }