Upload folder using huggingface_hub
Browse files- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt +3 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json +1 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt +3 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json +1 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt +3 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json +1 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt +3 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json +1 -0
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json +1 -1
- ._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json +1 -1
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5a94a2d6dec84165d57928239e40b9cbdf2a91ebeb7c7de60f54e76b29db328
|
| 3 |
+
size 536954902
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_0/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 4.41109375, "l1_loss": 41.82125, "l0": 79.8651611328125, "frac_variance_explained": 0.898984375, "cossim": 0.8560546875, "l2_ratio": 0.8562890625, "relative_reconstruction_bias": 1.00046875, "loss_original": 2.13728515625, "loss_reconstructed": 2.3126171875, "loss_zero": 11.8125, "frac_recovered": 0.981953125, "frac_alive": 0.99542236328125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e44176411331836232fcd0181acc760d8d1afc36ef77697db7557c35beff34f9
|
| 3 |
+
size 536954902
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_1/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 4.0209375, "l1_loss": 62.89, "l0": 161.96763671875, "frac_variance_explained": 0.91451171875, "cossim": 0.88205078125, "l2_ratio": 0.88236328125, "relative_reconstruction_bias": 1.0012890625, "loss_original": 2.13728515625, "loss_reconstructed": 2.24765625, "loss_zero": 11.8125, "frac_recovered": 0.9882421875, "frac_alive": 0.99993896484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c01e561595654381b63ce648f563880ba1c48c8f10f70cb238a1c38acd1f75
|
| 3 |
+
size 2147764246
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_2/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 4.121875, "l1_loss": 40.935, "l0": 80.9107861328125, "frac_variance_explained": 0.909609375, "cossim": 0.875703125, "l2_ratio": 0.873671875, "relative_reconstruction_bias": 1.00015625, "loss_original": 2.13728515625, "loss_reconstructed": 2.2828125, "loss_zero": 11.8125, "frac_recovered": 0.98490234375, "frac_alive": 0.841888427734375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96cde24541510b6c5d2543a5627e07f7bc3bdec9c0a330d4a62dd610b2bfa18f
|
| 3 |
+
size 2147764246
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_18/trainer_3/eval_results.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"l2_loss": 3.695703125, "l1_loss": 57.2225, "l0": 160.3888671875, "frac_variance_explained": 0.9287890625, "cossim": 0.90158203125, "l2_ratio": 0.89947265625, "relative_reconstruction_bias": 0.9999609375, "loss_original": 2.13728515625, "loss_reconstructed": 2.223046875, "loss_zero": 11.8125, "frac_recovered": 0.99078125, "frac_alive": 0.8576812744140625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_0/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 64.5, "l1_loss": 158.
|
|
|
|
| 1 |
+
{"l2_loss": 64.5, "l1_loss": 158.48, "l0": 3.5237890625, "frac_variance_explained": 0.3183203125, "cossim": 0.1781396484375, "l2_ratio": 3.717890625, "relative_reconstruction_bias": 11.90765625, "loss_original": 2.13728515625, "loss_reconstructed": 10.9125, "loss_zero": 11.8125, "frac_recovered": 0.09535858154296875, "frac_alive": 0.0123291015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_1/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 56.
|
|
|
|
| 1 |
+
{"l2_loss": 56.015, "l1_loss": 130.39, "l0": 6.8289013671875, "frac_variance_explained": 0.3350390625, "cossim": 0.1444677734375, "l2_ratio": 3.185, "relative_reconstruction_bias": 11.29890625, "loss_original": 2.13728515625, "loss_reconstructed": 10.41625, "loss_zero": 11.8125, "frac_recovered": 0.1462725830078125, "frac_alive": 0.04193115234375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_2/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 76.
|
|
|
|
| 1 |
+
{"l2_loss": 76.535, "l1_loss": 496.14, "l0": 32.3553515625, "frac_variance_explained": 0.286796875, "cossim": 0.1408740234375, "l2_ratio": 4.40625, "relative_reconstruction_bias": 17.151875, "loss_original": 2.13728515625, "loss_reconstructed": 11.72625, "loss_zero": 11.8125, "frac_recovered": 0.0098712158203125, "frac_alive": 0.0015411376953125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_27/trainer_3/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 64.
|
|
|
|
| 1 |
+
{"l2_loss": 64.4725, "l1_loss": 201.79, "l0": 8.8722314453125, "frac_variance_explained": 0.32947265625, "cossim": 0.16078125, "l2_ratio": 3.696640625, "relative_reconstruction_bias": 13.00953125, "loss_original": 2.13728515625, "loss_reconstructed": 10.1690625, "loss_zero": 11.8125, "frac_recovered": 0.17206039428710937, "frac_alive": 0.005279541015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_0/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 2.
|
|
|
|
| 1 |
+
{"l2_loss": 2.593515625, "l1_loss": 24.898125, "l0": 83.339609375, "frac_variance_explained": 0.9429296875, "cossim": 0.82373046875, "l2_ratio": 0.82177734375, "relative_reconstruction_bias": 1.0005078125, "loss_original": 2.13728515625, "loss_reconstructed": 2.25451171875, "loss_zero": 11.8125, "frac_recovered": 0.987578125, "frac_alive": 0.99969482421875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_1/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 2.
|
|
|
|
| 1 |
+
{"l2_loss": 2.365703125, "l1_loss": 34.48625, "l0": 169.05658203125, "frac_variance_explained": 0.9555078125, "cossim": 0.85583984375, "l2_ratio": 0.8549609375, "relative_reconstruction_bias": 1.0008203125, "loss_original": 2.13728515625, "loss_reconstructed": 2.20716796875, "loss_zero": 11.8125, "frac_recovered": 0.99220703125, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_2/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 2.
|
|
|
|
| 1 |
+
{"l2_loss": 2.440546875, "l1_loss": 23.644375, "l0": 83.163466796875, "frac_variance_explained": 0.94892578125, "cossim": 0.84505859375, "l2_ratio": 0.8411328125, "relative_reconstruction_bias": 0.9966796875, "loss_original": 2.13728515625, "loss_reconstructed": 2.23462890625, "loss_zero": 11.8125, "frac_recovered": 0.9896875, "frac_alive": 0.940155029296875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
._mistral_8b_mistralai_Ministral-8B-Instruct-2410_batch_top_k/resid_post_layer_9/trainer_3/eval_results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"l2_loss": 2.
|
|
|
|
| 1 |
+
{"l2_loss": 2.210234375, "l1_loss": 31.965625, "l0": 168.0044189453125, "frac_variance_explained": 0.958984375, "cossim": 0.875078125, "l2_ratio": 0.87197265625, "relative_reconstruction_bias": 0.99671875, "loss_original": 2.13728515625, "loss_reconstructed": 2.19716796875, "loss_zero": 11.8125, "frac_recovered": 0.99306640625, "frac_alive": 0.957611083984375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|