Upload folder using huggingface_hub
Browse files- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/ae.pt +3 -0
- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/class_accuracies.pkl +3 -0
- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/config.json +26 -0
- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/eval_results.json +1 -0
- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/node_effects.pkl +3 -0
- pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/view_pkl.ipynb +0 -0
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f599015ac81cd763977d0c99c7f1a37f484518cbe0489c1e5d22b7f49624cce9
|
| 3 |
+
size 16797480
|
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/class_accuracies.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9558e587792bdff2f76d52d29b28be7f38fb499d4ac9dddf90e25b5ecff3c2ea
|
| 3 |
+
size 313
|
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"dict_class": "AutoEncoder",
|
| 4 |
+
"trainer_class": "StandardTrainer",
|
| 5 |
+
"activation_dim": 512,
|
| 6 |
+
"dict_size": 4096,
|
| 7 |
+
"lr": 0.001,
|
| 8 |
+
"l1_penalty": 0.075,
|
| 9 |
+
"warmup_steps": 1000,
|
| 10 |
+
"resample_steps": null,
|
| 11 |
+
"device": "cuda:0",
|
| 12 |
+
"layer": 3,
|
| 13 |
+
"lm_name": "EleutherAI/pythia-70m-deduped",
|
| 14 |
+
"wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3",
|
| 15 |
+
"submodule_name": "resid_post_layer_3"
|
| 16 |
+
},
|
| 17 |
+
"buffer": {
|
| 18 |
+
"d_submodule": 512,
|
| 19 |
+
"io": "out",
|
| 20 |
+
"n_ctxs": 10000,
|
| 21 |
+
"ctx_len": 128,
|
| 22 |
+
"refresh_batch_size": 64,
|
| 23 |
+
"out_batch_size": 8192,
|
| 24 |
+
"device": "cuda:0"
|
| 25 |
+
}
|
| 26 |
+
}
|
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 4.725396633148193, "l1_loss": 35.286739349365234, "l0": 80.80000305175781, "frac_alive": 0.000244140625, "frac_variance_explained": 0.9795151948928833, "cossim": 0.946246325969696, "l2_ratio": 0.90411776304245, "loss_original": 4.035101413726807, "loss_reconstructed": 4.562260150909424, "loss_zero": 12.77825927734375, "frac_recovered": 0.9397061467170715, "hyperparameters": {"n_inputs": 20, "context_length": 128}}
|
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/node_effects.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59e0257be523692398cc10926bd826337e46071a16787038b0dfb3c22159027f
|
| 3 |
+
size 50204
|
pythia70m/pythia70m_test_sae/resid_post_layer_3/trainer_0/view_pkl.ipynb
ADDED
|
File without changes
|