| { |
| "release": "parameter-trajectory-crosscoders", |
| "paper": "Learning to Read Out: Unembedding Dynamics in Language Model Pretraining", |
| "code": "https://github.com/hematteo/learning-to-read-out", |
| "loading_recipe": "from safetensors.torch import safe_open; f = safe_open(path, framework='pt'); W_D = f.get_tensor('W_D'); thr = f.get_tensor('activation_function.log_jumprelu_threshold').exp() # or use src.crosscoder.checkpoints.load_checkpoint from the code repo", |
| "checkpoints": [ |
| { |
| "path": "olmo-2-7b/W_U/cross-snapshot-32/d32768/seed0.safetensors", |
| "model_name": "allenai/OLMo-2-1124-7B", |
| "model": "olmo-2-7b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 32768, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "OLMo-2-7B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0", |
| "quality": { |
| "explained_variance": 0.8526035831389717, |
| "mean_l0": 557.2795322963169, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 1e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d24576/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 24576, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=24576, seed 0", |
| "quality": { |
| "explained_variance": 0.8305442370487662, |
| "mean_l0": 117.50901977539063, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.5807971126242004, |
| "mean_l0": 82.08435302734375 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed1.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 1, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 1", |
| "quality": { |
| "explained_variance": 0.580104140268709, |
| "mean_l0": 82.01199279785156, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed2.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 2, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 2", |
| "quality": { |
| "explained_variance": 0.5815380601296635, |
| "mean_l0": 82.31076721191407, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed3.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 3, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 3", |
| "quality": { |
| "explained_variance": 0.5805984141666259, |
| "mean_l0": 82.13521545410157, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed4.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_E", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 4, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 4", |
| "quality": { |
| "explained_variance": 0.5825219599393371, |
| "mean_l0": 82.5084307861328, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/architecture-comparison/d8192/batchtopk/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "architecture-comparison/d8192", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "batchtopk", |
| "public_label": "Pythia-160M W_U crosscoder (batchtopk activation), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.7249162974116965, |
| "mean_l0": 203.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/architecture-comparison/d8192/gated/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "architecture-comparison/d8192", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "gated", |
| "public_label": "Pythia-160M W_U crosscoder (gated activation), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.21442979106557436, |
| "mean_l0": 11.51853515625 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/architecture-comparison/d8192/gated-retuned/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "architecture-comparison/d8192", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "gated", |
| "public_label": "Pythia-160M W_U crosscoder (Gated activation, L1=0.05), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.827462127951851, |
| "mean_l0": 653.64568359375 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-16/d8192/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-16", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 16, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 16-snapshot downsample, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.7734329300485889, |
| "mean_l0": 215.73649780273436, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d16384/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 16384, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=16384, seed 0", |
| "quality": { |
| "explained_variance": 0.7802076016978367, |
| "mean_l0": 103.0708544921875 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 24576, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 0", |
| "quality": { |
| "explained_variance": 0.9196719747097695, |
| "mean_l0": 285.9946594238281, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed1.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 24576, |
| "seed": 1, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 1", |
| "quality": { |
| "explained_variance": 0.9195844996740984, |
| "mean_l0": 286.09990661621094, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed2.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 24576, |
| "seed": 2, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 2", |
| "quality": { |
| "explained_variance": 0.9196765967343927, |
| "mean_l0": 286.3446789550781, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.7763740924044573, |
| "mean_l0": 203.14466369628906 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed1.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 1, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 1", |
| "quality": { |
| "explained_variance": 0.7763600972452054, |
| "mean_l0": 203.26546752929687 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed2.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 2, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 2", |
| "quality": { |
| "explained_variance": 0.7764523546944131, |
| "mean_l0": 203.14273071289062 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed3.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 3, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 3", |
| "quality": { |
| "explained_variance": 0.7762290236129058, |
| "mean_l0": 203.09911560058595 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed4.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 4, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 4", |
| "quality": { |
| "explained_variance": 0.7765168883427946, |
| "mean_l0": 203.4711474609375 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/final-snapshot-saes/d16384.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "final-snapshot-saes", |
| "d_sae": 16384, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=16384", |
| "quality": { |
| "explained_variance": 0.8703331309369664, |
| "mean_l0": 1913.29353515625 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.05, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/final-snapshot-saes/d32768.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "final-snapshot-saes", |
| "d_sae": 32768, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=32768", |
| "quality": { |
| "explained_variance": 0.926354609914279, |
| "mean_l0": 3410.36810546875 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.05, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/final-snapshot-saes/d6144.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "final-snapshot-saes", |
| "d_sae": 6144, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=6144", |
| "quality": { |
| "explained_variance": 0.7649826125102704, |
| "mean_l0": 861.8197265625 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.05, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/final-snapshot-saes/d65536.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "final-snapshot-saes", |
| "d_sae": 65536, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=65536", |
| "quality": { |
| "explained_variance": 0.9642323011052749, |
| "mean_l0": 5943.19078125 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.05, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/final-snapshot-saes/d8192.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "final-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7989936757858394, |
| "mean_l0": 1084.4333984375 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.05, |
| "batch_size": 1024, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam0p40_seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=0.4), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.7479858952216084, |
| "mean_l0": 160.0768243408203, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.4, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p00_seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.0), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.6323049070911215, |
| "mean_l0": 58.49865539550781, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.0, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p20_seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.2), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.6027639222075235, |
| "mean_l0": 45.41338317871094, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.2, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.5819729868726617, |
| "mean_l0": 37.971342163085936, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.35, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed1.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 1, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 1", |
| "quality": { |
| "explained_variance": 0.581652004193779, |
| "mean_l0": 37.79685302734375, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.35, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed2.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 2, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 2", |
| "quality": { |
| "explained_variance": 0.582366028895355, |
| "mean_l0": 37.980972900390626, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.35, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p80_seed0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "lambda-sweep", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.8), 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.527703732469858, |
| "mean_l0": 23.033436279296875, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 1.8, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step0.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 0, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288661622516968, |
| "mean_l0": 1150.079375, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 1, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288661622516968, |
| "mean_l0": 1150.079375, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 1000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7862491472291859, |
| "mean_l0": 996.89369140625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step102000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 102000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7998127382951936, |
| "mean_l0": 983.041015625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step116000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 116000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.8120686966847851, |
| "mean_l0": 957.6841577148438, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step128.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 128, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7290331109474104, |
| "mean_l0": 1149.5578076171876, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step130000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 130000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.8195671283611181, |
| "mean_l0": 940.0407983398437, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step14000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 14000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7912227686750525, |
| "mean_l0": 996.466455078125, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step143000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 143000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.8244679775563315, |
| "mean_l0": 924.0657104492187, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step16.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 16, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288671871864183, |
| "mean_l0": 1150.0855908203125, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 2, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.72886850366392, |
| "mean_l0": 1150.0986572265624, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 2000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7992791212506758, |
| "mean_l0": 969.1734448242188, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step21000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 21000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7907924441773572, |
| "mean_l0": 998.4695678710938, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step256.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 256, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7317386038736777, |
| "mean_l0": 1142.36794921875, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step27000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 27000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7907578681617007, |
| "mean_l0": 999.3178100585938, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step3000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 3000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7993412040992963, |
| "mean_l0": 972.4029614257812, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step32.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 32, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288681571269175, |
| "mean_l0": 1150.0745947265625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step34000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 34000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7906181564574442, |
| "mean_l0": 999.5078881835938, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 4, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288676993559831, |
| "mean_l0": 1150.087177734375, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 4000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7977749784323522, |
| "mean_l0": 977.3629370117187, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step47000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 47000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.790620026518176, |
| "mean_l0": 1000.2795385742188, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step5000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 5000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7960998419245482, |
| "mean_l0": 981.8336376953125, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step512.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 512, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7542853862655656, |
| "mean_l0": 1086.609990234375, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step6000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 6000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7947783648659816, |
| "mean_l0": 985.470498046875, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step61000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 61000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7904375219119095, |
| "mean_l0": 1001.8048901367188, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step64.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 64, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.728867046997959, |
| "mean_l0": 1150.0748681640625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step7000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 7000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7938394341974695, |
| "mean_l0": 988.107509765625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step75000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 75000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.790395516481567, |
| "mean_l0": 1004.148408203125, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 8, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7288677026395104, |
| "mean_l0": 1150.0938623046875, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 8000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7931325520036884, |
| "mean_l0": 990.1047631835937, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step89000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 89000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7924073402250866, |
| "mean_l0": 1001.9916259765625, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-160m/W_U/per-snapshot-saes/d8192/step9000.safetensors", |
| "model_name": "EleutherAI/pythia-160m", |
| "model": "pythia-160m", |
| "matrix": "W_U", |
| "kind": "per-snapshot-saes", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 1, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-160M W_U per-snapshot SAE at training step 9000, d_sae=8192", |
| "quality": { |
| "explained_variance": 0.7924251809723794, |
| "mean_l0": 992.0130297851563, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 2048, |
| "n_epochs": 100, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-1b/W_U/cross-snapshot-32/d16384/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-1b", |
| "model": "pythia-1b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 16384, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=16384, seed 0", |
| "quality": { |
| "explained_variance": 0.7812400637275599, |
| "mean_l0": 499.1909545898437, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 2e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-1b/W_U/cross-snapshot-32/d24576/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-1b", |
| "model": "pythia-1b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 24576, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=24576, seed 0", |
| "quality": { |
| "explained_variance": 0.861217602606241, |
| "mean_l0": 517.0205554199218, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 2e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-1b/W_U/cross-snapshot-32/d8192/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-1b", |
| "model": "pythia-1b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 8192, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=8192, seed 0", |
| "quality": { |
| "explained_variance": 0.628030172619609, |
| "mean_l0": 374.1408770751953, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 2e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-1b/W_U/cross-snapshot-32-matched-window/d24576/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-1b", |
| "model": "pythia-1b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32-matched-window", |
| "d_sae": 24576, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-1B W_U crosscoder, OLMo-matched late-start 32-snapshot window, d_sae=24576, seed 0", |
| "quality": { |
| "explained_variance": 0.8844067825487556, |
| "mean_l0": 263.98009787989025, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 5e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0-sparse.safetensors", |
| "model_name": "EleutherAI/pythia-6.9b", |
| "model": "pythia-6.9b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 32768, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0, selected sparse run (lambda=0.6)", |
| "quality": { |
| "explained_variance": 0.8080785315044828, |
| "mean_l0": 741.8141662287833, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 1e-05, |
| "l1_coefficient": 0.6, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| }, |
| { |
| "path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0.safetensors", |
| "model_name": "EleutherAI/pythia-6.9b", |
| "model": "pythia-6.9b", |
| "matrix": "W_U", |
| "kind": "cross-snapshot-32", |
| "d_sae": 32768, |
| "seed": 0, |
| "n_snapshots": 32, |
| "arch": "jumprelu-crosscoder", |
| "public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0", |
| "quality": { |
| "explained_variance": 0.8331843988901371, |
| "mean_l0": 1956.59887943171, |
| "dead_rate": 0.0 |
| }, |
| "training": { |
| "lr": 1e-05, |
| "l1_coefficient": 0.3, |
| "batch_size": 1024, |
| "n_epochs": 300, |
| "input_preprocess": "center_scale" |
| } |
| } |
| ], |
| "auxiliary": { |
| "derived/aggregates": [ |
| "aggregates_dsae16384_seed0.pt", |
| "aggregates_dsae24576_seed0.pt", |
| "aggregates_dsae8192_seed0.pt", |
| "aggregates_pythia-160m_d24576_seed0.pt", |
| "aggregates_pythia-1b_d16384_seed0.pt", |
| "aggregates_pythia-1b_d24576_seed0.pt", |
| "aggregates_pythia-1b_d8192_seed0.pt", |
| "aggregates_pythia-6.9b_d32768_seed0.pt" |
| ], |
| "derived/rates": [ |
| "we-d24576/we_cc_dsae24576_seed0_norms.npy", |
| "we-d24576/we_rates_dsae24576_seed0.pt", |
| "we-d8192-multiseed/we_cc_dsae8192_seed0_norms.npy", |
| "we-d8192-multiseed/we_cc_dsae8192_seed1_norms.npy", |
| "we-d8192-multiseed/we_cc_dsae8192_seed2_norms.npy", |
| "we-d8192-multiseed/we_cc_dsae8192_seed3_norms.npy", |
| "we-d8192-multiseed/we_cc_dsae8192_seed4_norms.npy", |
| "we-d8192-multiseed/we_rates_dsae8192_seed0.pt", |
| "we-d8192-multiseed/we_rates_dsae8192_seed1.pt", |
| "we-d8192-multiseed/we_rates_dsae8192_seed2.pt", |
| "we-d8192-multiseed/we_rates_dsae8192_seed3.pt", |
| "we-d8192-multiseed/we_rates_dsae8192_seed4.pt", |
| "wu-1b-d24576/decoder_norms_dsae24576_seed0.npy", |
| "wu-d24576-multiseed/decoder_norms_dsae24576_seed0.npy", |
| "wu-d24576-multiseed/decoder_norms_dsae24576_seed1.npy", |
| "wu-d24576-multiseed/decoder_norms_dsae24576_seed2.npy", |
| "wu-d24576-multiseed/wu_cc_dsae24576_seed0_norms.npy", |
| "wu-d24576-multiseed/wu_cc_dsae24576_seed1_norms.npy", |
| "wu-d24576-multiseed/wu_cc_dsae24576_seed2_norms.npy", |
| "wu-d24576-multiseed/wu_rates_dsae24576_seed0.pt", |
| "wu-d24576-multiseed/wu_rates_dsae24576_seed1.pt", |
| "wu-d24576-multiseed/wu_rates_dsae24576_seed2.pt", |
| "wu-d8192-cs16/wu_cc_dsae8192_seed0_norms.npy", |
| "wu-d8192-cs16/wu_rates_dsae8192_seed0.pt", |
| "wu-d8192-multiseed/decoder_norms_all_seeds.npy", |
| "wu-d8192-multiseed/firing_rates_all_seeds.npy", |
| "wu-d8192-persnap/train.log" |
| ], |
| "evaluation": [ |
| "eval-corpus/README.md", |
| "eval-corpus/eval_tokens.pt" |
| ], |
| "attribution": [ |
| "pythia-160m/induction-results.pt", |
| "pythia-160m/induction-verdict.json", |
| "pythia-160m/ioi-results.pt", |
| "pythia-160m/ioi-verdict.json", |
| "pythia-160m/sva-results.pt", |
| "pythia-160m/sva-verdict.json" |
| ] |
| } |
| } |
|
|