Matteo He
Add files using upload-large-folder tool
a95a860 verified
Raw
History Blame Contribute Delete
48.3 kB
{
"release": "parameter-trajectory-crosscoders",
"paper": "Learning to Read Out: Unembedding Dynamics in Language Model Pretraining",
"code": "https://github.com/hematteo/learning-to-read-out",
"loading_recipe": "from safetensors.torch import safe_open; f = safe_open(path, framework='pt'); W_D = f.get_tensor('W_D'); thr = f.get_tensor('activation_function.log_jumprelu_threshold').exp() # or use src.crosscoder.checkpoints.load_checkpoint from the code repo",
"checkpoints": [
{
"path": "olmo-2-7b/W_U/cross-snapshot-32/d32768/seed0.safetensors",
"model_name": "allenai/OLMo-2-1124-7B",
"model": "olmo-2-7b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 32768,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "OLMo-2-7B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0",
"quality": {
"explained_variance": 0.8526035831389717,
"mean_l0": 557.2795322963169,
"dead_rate": 0.0
},
"training": {
"lr": 1e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d24576/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 24576,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=24576, seed 0",
"quality": {
"explained_variance": 0.8305442370487662,
"mean_l0": 117.50901977539063,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.5807971126242004,
"mean_l0": 82.08435302734375
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed1.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 1,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 1",
"quality": {
"explained_variance": 0.580104140268709,
"mean_l0": 82.01199279785156,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed2.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 2,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 2",
"quality": {
"explained_variance": 0.5815380601296635,
"mean_l0": 82.31076721191407,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed3.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 3,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 3",
"quality": {
"explained_variance": 0.5805984141666259,
"mean_l0": 82.13521545410157,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_E/cross-snapshot-32/d8192/seed4.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_E",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 4,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_E crosscoder, 32 snapshots, d_sae=8192, seed 4",
"quality": {
"explained_variance": 0.5825219599393371,
"mean_l0": 82.5084307861328,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/architecture-comparison/d8192/batchtopk/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "architecture-comparison/d8192",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "batchtopk",
"public_label": "Pythia-160M W_U crosscoder (batchtopk activation), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.7249162974116965,
"mean_l0": 203.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/architecture-comparison/d8192/gated/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "architecture-comparison/d8192",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "gated",
"public_label": "Pythia-160M W_U crosscoder (gated activation), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.21442979106557436,
"mean_l0": 11.51853515625
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/architecture-comparison/d8192/gated-retuned/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "architecture-comparison/d8192",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "gated",
"public_label": "Pythia-160M W_U crosscoder (Gated activation, L1=0.05), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.827462127951851,
"mean_l0": 653.64568359375
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-16/d8192/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-16",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 16,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 16-snapshot downsample, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.7734329300485889,
"mean_l0": 215.73649780273436,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d16384/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 16384,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=16384, seed 0",
"quality": {
"explained_variance": 0.7802076016978367,
"mean_l0": 103.0708544921875
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 24576,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 0",
"quality": {
"explained_variance": 0.9196719747097695,
"mean_l0": 285.9946594238281,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed1.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 24576,
"seed": 1,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 1",
"quality": {
"explained_variance": 0.9195844996740984,
"mean_l0": 286.09990661621094,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d24576/seed2.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 24576,
"seed": 2,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=24576, seed 2",
"quality": {
"explained_variance": 0.9196765967343927,
"mean_l0": 286.3446789550781,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.7763740924044573,
"mean_l0": 203.14466369628906
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed1.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 1,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 1",
"quality": {
"explained_variance": 0.7763600972452054,
"mean_l0": 203.26546752929687
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed2.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 2,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 2",
"quality": {
"explained_variance": 0.7764523546944131,
"mean_l0": 203.14273071289062
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed3.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 3,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 3",
"quality": {
"explained_variance": 0.7762290236129058,
"mean_l0": 203.09911560058595
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/cross-snapshot-32/d8192/seed4.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 4,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, 32 snapshots, d_sae=8192, seed 4",
"quality": {
"explained_variance": 0.7765168883427946,
"mean_l0": 203.4711474609375
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/final-snapshot-saes/d16384.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "final-snapshot-saes",
"d_sae": 16384,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=16384",
"quality": {
"explained_variance": 0.8703331309369664,
"mean_l0": 1913.29353515625
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.05,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/final-snapshot-saes/d32768.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "final-snapshot-saes",
"d_sae": 32768,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=32768",
"quality": {
"explained_variance": 0.926354609914279,
"mean_l0": 3410.36810546875
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.05,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/final-snapshot-saes/d6144.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "final-snapshot-saes",
"d_sae": 6144,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=6144",
"quality": {
"explained_variance": 0.7649826125102704,
"mean_l0": 861.8197265625
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.05,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/final-snapshot-saes/d65536.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "final-snapshot-saes",
"d_sae": 65536,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=65536",
"quality": {
"explained_variance": 0.9642323011052749,
"mean_l0": 5943.19078125
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.05,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/final-snapshot-saes/d8192.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "final-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U final-snapshot SAE (step 143000 only), d_sae=8192",
"quality": {
"explained_variance": 0.7989936757858394,
"mean_l0": 1084.4333984375
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.05,
"batch_size": 1024,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam0p40_seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=0.4), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.7479858952216084,
"mean_l0": 160.0768243408203,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.4,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p00_seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.0), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.6323049070911215,
"mean_l0": 58.49865539550781,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.0,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p20_seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.2), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.6027639222075235,
"mean_l0": 45.41338317871094,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.2,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.5819729868726617,
"mean_l0": 37.971342163085936,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.35,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed1.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 1,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 1",
"quality": {
"explained_variance": 0.581652004193779,
"mean_l0": 37.79685302734375,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.35,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p35_seed2.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 2,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.35), 32 snapshots, d_sae=8192, seed 2",
"quality": {
"explained_variance": 0.582366028895355,
"mean_l0": 37.980972900390626,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.35,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/lambda-sweep/d8192/lam1p80_seed0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "lambda-sweep",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U crosscoder, \u03bb-sweep iso-L0 (\u03bb=1.8), 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.527703732469858,
"mean_l0": 23.033436279296875,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 1.8,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step0.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 0, d_sae=8192",
"quality": {
"explained_variance": 0.7288661622516968,
"mean_l0": 1150.079375,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 1, d_sae=8192",
"quality": {
"explained_variance": 0.7288661622516968,
"mean_l0": 1150.079375,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step1000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 1000, d_sae=8192",
"quality": {
"explained_variance": 0.7862491472291859,
"mean_l0": 996.89369140625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step102000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 102000, d_sae=8192",
"quality": {
"explained_variance": 0.7998127382951936,
"mean_l0": 983.041015625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step116000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 116000, d_sae=8192",
"quality": {
"explained_variance": 0.8120686966847851,
"mean_l0": 957.6841577148438,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step128.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 128, d_sae=8192",
"quality": {
"explained_variance": 0.7290331109474104,
"mean_l0": 1149.5578076171876,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step130000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 130000, d_sae=8192",
"quality": {
"explained_variance": 0.8195671283611181,
"mean_l0": 940.0407983398437,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step14000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 14000, d_sae=8192",
"quality": {
"explained_variance": 0.7912227686750525,
"mean_l0": 996.466455078125,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step143000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 143000, d_sae=8192",
"quality": {
"explained_variance": 0.8244679775563315,
"mean_l0": 924.0657104492187,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step16.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 16, d_sae=8192",
"quality": {
"explained_variance": 0.7288671871864183,
"mean_l0": 1150.0855908203125,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 2, d_sae=8192",
"quality": {
"explained_variance": 0.72886850366392,
"mean_l0": 1150.0986572265624,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step2000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 2000, d_sae=8192",
"quality": {
"explained_variance": 0.7992791212506758,
"mean_l0": 969.1734448242188,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step21000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 21000, d_sae=8192",
"quality": {
"explained_variance": 0.7907924441773572,
"mean_l0": 998.4695678710938,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step256.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 256, d_sae=8192",
"quality": {
"explained_variance": 0.7317386038736777,
"mean_l0": 1142.36794921875,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step27000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 27000, d_sae=8192",
"quality": {
"explained_variance": 0.7907578681617007,
"mean_l0": 999.3178100585938,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step3000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 3000, d_sae=8192",
"quality": {
"explained_variance": 0.7993412040992963,
"mean_l0": 972.4029614257812,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step32.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 32, d_sae=8192",
"quality": {
"explained_variance": 0.7288681571269175,
"mean_l0": 1150.0745947265625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step34000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 34000, d_sae=8192",
"quality": {
"explained_variance": 0.7906181564574442,
"mean_l0": 999.5078881835938,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 4, d_sae=8192",
"quality": {
"explained_variance": 0.7288676993559831,
"mean_l0": 1150.087177734375,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step4000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 4000, d_sae=8192",
"quality": {
"explained_variance": 0.7977749784323522,
"mean_l0": 977.3629370117187,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step47000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 47000, d_sae=8192",
"quality": {
"explained_variance": 0.790620026518176,
"mean_l0": 1000.2795385742188,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step5000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 5000, d_sae=8192",
"quality": {
"explained_variance": 0.7960998419245482,
"mean_l0": 981.8336376953125,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step512.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 512, d_sae=8192",
"quality": {
"explained_variance": 0.7542853862655656,
"mean_l0": 1086.609990234375,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step6000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 6000, d_sae=8192",
"quality": {
"explained_variance": 0.7947783648659816,
"mean_l0": 985.470498046875,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step61000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 61000, d_sae=8192",
"quality": {
"explained_variance": 0.7904375219119095,
"mean_l0": 1001.8048901367188,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step64.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 64, d_sae=8192",
"quality": {
"explained_variance": 0.728867046997959,
"mean_l0": 1150.0748681640625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step7000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 7000, d_sae=8192",
"quality": {
"explained_variance": 0.7938394341974695,
"mean_l0": 988.107509765625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step75000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 75000, d_sae=8192",
"quality": {
"explained_variance": 0.790395516481567,
"mean_l0": 1004.148408203125,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 8, d_sae=8192",
"quality": {
"explained_variance": 0.7288677026395104,
"mean_l0": 1150.0938623046875,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step8000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 8000, d_sae=8192",
"quality": {
"explained_variance": 0.7931325520036884,
"mean_l0": 990.1047631835937,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step89000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 89000, d_sae=8192",
"quality": {
"explained_variance": 0.7924073402250866,
"mean_l0": 1001.9916259765625,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-160m/W_U/per-snapshot-saes/d8192/step9000.safetensors",
"model_name": "EleutherAI/pythia-160m",
"model": "pythia-160m",
"matrix": "W_U",
"kind": "per-snapshot-saes",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 1,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-160M W_U per-snapshot SAE at training step 9000, d_sae=8192",
"quality": {
"explained_variance": 0.7924251809723794,
"mean_l0": 992.0130297851563,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 2048,
"n_epochs": 100,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-1b/W_U/cross-snapshot-32/d16384/seed0.safetensors",
"model_name": "EleutherAI/pythia-1b",
"model": "pythia-1b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 16384,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=16384, seed 0",
"quality": {
"explained_variance": 0.7812400637275599,
"mean_l0": 499.1909545898437,
"dead_rate": 0.0
},
"training": {
"lr": 2e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-1b/W_U/cross-snapshot-32/d24576/seed0.safetensors",
"model_name": "EleutherAI/pythia-1b",
"model": "pythia-1b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 24576,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=24576, seed 0",
"quality": {
"explained_variance": 0.861217602606241,
"mean_l0": 517.0205554199218,
"dead_rate": 0.0
},
"training": {
"lr": 2e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-1b/W_U/cross-snapshot-32/d8192/seed0.safetensors",
"model_name": "EleutherAI/pythia-1b",
"model": "pythia-1b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 8192,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-1B W_U crosscoder, 32 snapshots, d_sae=8192, seed 0",
"quality": {
"explained_variance": 0.628030172619609,
"mean_l0": 374.1408770751953,
"dead_rate": 0.0
},
"training": {
"lr": 2e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-1b/W_U/cross-snapshot-32-matched-window/d24576/seed0.safetensors",
"model_name": "EleutherAI/pythia-1b",
"model": "pythia-1b",
"matrix": "W_U",
"kind": "cross-snapshot-32-matched-window",
"d_sae": 24576,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-1B W_U crosscoder, OLMo-matched late-start 32-snapshot window, d_sae=24576, seed 0",
"quality": {
"explained_variance": 0.8844067825487556,
"mean_l0": 263.98009787989025,
"dead_rate": 0.0
},
"training": {
"lr": 5e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0-sparse.safetensors",
"model_name": "EleutherAI/pythia-6.9b",
"model": "pythia-6.9b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 32768,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0, selected sparse run (lambda=0.6)",
"quality": {
"explained_variance": 0.8080785315044828,
"mean_l0": 741.8141662287833,
"dead_rate": 0.0
},
"training": {
"lr": 1e-05,
"l1_coefficient": 0.6,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
},
{
"path": "pythia-6.9b/W_U/cross-snapshot-32/d32768/seed0.safetensors",
"model_name": "EleutherAI/pythia-6.9b",
"model": "pythia-6.9b",
"matrix": "W_U",
"kind": "cross-snapshot-32",
"d_sae": 32768,
"seed": 0,
"n_snapshots": 32,
"arch": "jumprelu-crosscoder",
"public_label": "Pythia-6.9B W_U crosscoder, 32 snapshots, d_sae=32768, seed 0",
"quality": {
"explained_variance": 0.8331843988901371,
"mean_l0": 1956.59887943171,
"dead_rate": 0.0
},
"training": {
"lr": 1e-05,
"l1_coefficient": 0.3,
"batch_size": 1024,
"n_epochs": 300,
"input_preprocess": "center_scale"
}
}
],
"auxiliary": {
"derived/aggregates": [
"aggregates_dsae16384_seed0.pt",
"aggregates_dsae24576_seed0.pt",
"aggregates_dsae8192_seed0.pt",
"aggregates_pythia-160m_d24576_seed0.pt",
"aggregates_pythia-1b_d16384_seed0.pt",
"aggregates_pythia-1b_d24576_seed0.pt",
"aggregates_pythia-1b_d8192_seed0.pt",
"aggregates_pythia-6.9b_d32768_seed0.pt"
],
"derived/rates": [
"we-d24576/we_cc_dsae24576_seed0_norms.npy",
"we-d24576/we_rates_dsae24576_seed0.pt",
"we-d8192-multiseed/we_cc_dsae8192_seed0_norms.npy",
"we-d8192-multiseed/we_cc_dsae8192_seed1_norms.npy",
"we-d8192-multiseed/we_cc_dsae8192_seed2_norms.npy",
"we-d8192-multiseed/we_cc_dsae8192_seed3_norms.npy",
"we-d8192-multiseed/we_cc_dsae8192_seed4_norms.npy",
"we-d8192-multiseed/we_rates_dsae8192_seed0.pt",
"we-d8192-multiseed/we_rates_dsae8192_seed1.pt",
"we-d8192-multiseed/we_rates_dsae8192_seed2.pt",
"we-d8192-multiseed/we_rates_dsae8192_seed3.pt",
"we-d8192-multiseed/we_rates_dsae8192_seed4.pt",
"wu-1b-d24576/decoder_norms_dsae24576_seed0.npy",
"wu-d24576-multiseed/decoder_norms_dsae24576_seed0.npy",
"wu-d24576-multiseed/decoder_norms_dsae24576_seed1.npy",
"wu-d24576-multiseed/decoder_norms_dsae24576_seed2.npy",
"wu-d24576-multiseed/wu_cc_dsae24576_seed0_norms.npy",
"wu-d24576-multiseed/wu_cc_dsae24576_seed1_norms.npy",
"wu-d24576-multiseed/wu_cc_dsae24576_seed2_norms.npy",
"wu-d24576-multiseed/wu_rates_dsae24576_seed0.pt",
"wu-d24576-multiseed/wu_rates_dsae24576_seed1.pt",
"wu-d24576-multiseed/wu_rates_dsae24576_seed2.pt",
"wu-d8192-cs16/wu_cc_dsae8192_seed0_norms.npy",
"wu-d8192-cs16/wu_rates_dsae8192_seed0.pt",
"wu-d8192-multiseed/decoder_norms_all_seeds.npy",
"wu-d8192-multiseed/firing_rates_all_seeds.npy",
"wu-d8192-persnap/train.log"
],
"evaluation": [
"eval-corpus/README.md",
"eval-corpus/eval_tokens.pt"
],
"attribution": [
"pythia-160m/induction-results.pt",
"pythia-160m/induction-verdict.json",
"pythia-160m/ioi-results.pt",
"pythia-160m/ioi-verdict.json",
"pythia-160m/sva-results.pt",
"pythia-160m/sva-verdict.json"
]
}
}