Upload folder using huggingface_hub
Browse files- .gitattributes +24 -0
- README.md +145 -0
- SAE-LlaVA-NeXT-7b-Pile/.gitattributes +58 -0
- SAE-LlaVA-NeXT-7b-Pile/README.md +2 -0
- SAE-LlaVA-NeXT-7b-Pile/cfg.json +1 -0
- SAE-LlaVA-NeXT-7b-Pile/sae_weights.safetensors +3 -0
- SAE-LlaVA-NeXT-7b-Pile/sparsity.safetensors +3 -0
- SAE-Mistral-7b-v0.2-Pile/.gitattributes +58 -0
- SAE-Mistral-7b-v0.2-Pile/README.md +2 -0
- SAE-Mistral-7b-v0.2-Pile/cfg.json +1 -0
- SAE-Mistral-7b-v0.2-Pile/sae_weights.safetensors +3 -0
- SAE-Mistral-7b-v0.2-Pile/sparsity.safetensors +3 -0
- SAEV_Anole-7b_OBELICS/.gitattributes +35 -0
- SAEV_Anole-7b_OBELICS/README.md +2 -0
- SAEV_Anole-7b_OBELICS/cfg.json +1 -0
- SAEV_Anole-7b_OBELICS/sae_weights.safetensors +3 -0
- SAEV_Anole-7b_OBELICS/sparsity.safetensors +3 -0
- SAEV_Chameleon-7b_OBELICS/.gitattributes +35 -0
- SAEV_Chameleon-7b_OBELICS/README.md +2 -0
- SAEV_Chameleon-7b_OBELICS/cfg.json +1 -0
- SAEV_Chameleon-7b_OBELICS/sae_weights.safetensors +3 -0
- SAEV_Chameleon-7b_OBELICS/sparsity.safetensors +3 -0
- SAEV_LLaVA_NeXT-7b_OBELICS/.gitattributes +58 -0
- SAEV_LLaVA_NeXT-7b_OBELICS/README.md +2 -0
- SAEV_LLaVA_NeXT-7b_OBELICS/cfg.json +1 -0
- SAEV_LLaVA_NeXT-7b_OBELICS/sae_weights.safetensors +3 -0
- SAEV_LLaVA_NeXT-7b_OBELICS/sparsity.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -8,6 +8,8 @@
|
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
| 11 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
@@ -33,3 +35,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mds filter=lfs diff=lfs merge=lfs -text
|
| 13 |
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 14 |
*.model filter=lfs diff=lfs merge=lfs -text
|
| 15 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 35 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 36 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
# Audio files - uncompressed
|
| 39 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
# Audio files - compressed
|
| 43 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
# Image files - uncompressed
|
| 49 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
# Image files - compressed
|
| 54 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
# Video files - compressed
|
| 58 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
*.webm filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAE-V
|
| 2 |
+
Repository for SAE-V, including 2 sparse autoencoder (SAE) and 3 sparse autoencoder with Vision (SAE-V). See each model folders for more information.
|
| 3 |
+
|
| 4 |
+
## 1.Training Parameter
|
| 5 |
+
|
| 6 |
+
All 5 models training paramters are list below:
|
| 7 |
+
|
| 8 |
+
<table border="1" style="border-collapse: collapse;">
|
| 9 |
+
<thead>
|
| 10 |
+
<tr>
|
| 11 |
+
<th><strong>Hyper-parameters</strong></th>
|
| 12 |
+
<th><strong>SAE and SAE-V of LLaVA-NeXT/Mistral</strong></th>
|
| 13 |
+
<th><strong>SAE and SAE-V of Chameleon/Anole</strong></th>
|
| 14 |
+
</tr>
|
| 15 |
+
</thead>
|
| 16 |
+
<tbody>
|
| 17 |
+
<tr>
|
| 18 |
+
<td colspan="3" style="text-align: center; border-left: none; border-right: none;"><strong>Training Parameters</strong></td>
|
| 19 |
+
</tr>
|
| 20 |
+
<tr>
|
| 21 |
+
<td>total training steps</td>
|
| 22 |
+
<td>30000</td>
|
| 23 |
+
<td>30000</td>
|
| 24 |
+
</tr>
|
| 25 |
+
<tr>
|
| 26 |
+
<td>batch size</td>
|
| 27 |
+
<td>4096</td>
|
| 28 |
+
<td>4096</td>
|
| 29 |
+
</tr>
|
| 30 |
+
<tr>
|
| 31 |
+
<td>LR</td>
|
| 32 |
+
<td>5e-5</td>
|
| 33 |
+
<td>5e-5</td>
|
| 34 |
+
</tr>
|
| 35 |
+
<tr>
|
| 36 |
+
<td>LR warmup steps</td>
|
| 37 |
+
<td>1500</td>
|
| 38 |
+
<td>1500</td>
|
| 39 |
+
</tr>
|
| 40 |
+
<tr>
|
| 41 |
+
<td>LR decay steps</td>
|
| 42 |
+
<td>6000</td>
|
| 43 |
+
<td>6000</td>
|
| 44 |
+
</tr>
|
| 45 |
+
<tr>
|
| 46 |
+
<td>adam beta1</td>
|
| 47 |
+
<td>0.9</td>
|
| 48 |
+
<td>0.9</td>
|
| 49 |
+
</tr>
|
| 50 |
+
<tr>
|
| 51 |
+
<td>adam beta2</td>
|
| 52 |
+
<td>0.999</td>
|
| 53 |
+
<td>0.999</td>
|
| 54 |
+
</tr>
|
| 55 |
+
<tr>
|
| 56 |
+
<td>LR scheduler name</td>
|
| 57 |
+
<td>constant</td>
|
| 58 |
+
<td>constant</td>
|
| 59 |
+
</tr>
|
| 60 |
+
<tr>
|
| 61 |
+
<td>LR coefficient</td>
|
| 62 |
+
<td>5</td>
|
| 63 |
+
<td>5</td>
|
| 64 |
+
</tr>
|
| 65 |
+
<tr>
|
| 66 |
+
<td>seed</td>
|
| 67 |
+
<td>42</td>
|
| 68 |
+
<td>42</td>
|
| 69 |
+
</tr>
|
| 70 |
+
<tr>
|
| 71 |
+
<td>dtype</td>
|
| 72 |
+
<td>float32</td>
|
| 73 |
+
<td>float32</td>
|
| 74 |
+
</tr>
|
| 75 |
+
<tr>
|
| 76 |
+
<td>buffer batches num</td>
|
| 77 |
+
<td>32</td>
|
| 78 |
+
<td>64</td>
|
| 79 |
+
</tr>
|
| 80 |
+
<tr>
|
| 81 |
+
<td>store batch size prompts</td>
|
| 82 |
+
<td>4</td>
|
| 83 |
+
<td>16</td>
|
| 84 |
+
</tr>
|
| 85 |
+
<tr>
|
| 86 |
+
<td>feature sampling window</td>
|
| 87 |
+
<td>1000</td>
|
| 88 |
+
<td>1000</td>
|
| 89 |
+
</tr>
|
| 90 |
+
<tr>
|
| 91 |
+
<td>dead feature window</td>
|
| 92 |
+
<td>1000</td>
|
| 93 |
+
<td>1000</td>
|
| 94 |
+
</tr>
|
| 95 |
+
<tr>
|
| 96 |
+
<td>dead feature threshold</td>
|
| 97 |
+
<td>1e-4</td>
|
| 98 |
+
<td>1e-4</td>
|
| 99 |
+
</tr>
|
| 100 |
+
<!-- "SAE and SAE-V Parameters" row without vertical lines between columns -->
|
| 101 |
+
<tr>
|
| 102 |
+
<td colspan="3" style="text-align: center; border-left: none; border-right: none;"><strong>Model Parameters</strong></td>
|
| 103 |
+
</tr>
|
| 104 |
+
<tr>
|
| 105 |
+
<td>hook layer</td>
|
| 106 |
+
<td>16</td>
|
| 107 |
+
<td>8</td>
|
| 108 |
+
</tr>
|
| 109 |
+
<tr>
|
| 110 |
+
<td>input dimension</td>
|
| 111 |
+
<td>4096</td>
|
| 112 |
+
<td>4096</td>
|
| 113 |
+
</tr>
|
| 114 |
+
<tr>
|
| 115 |
+
<td>expansion factor</td>
|
| 116 |
+
<td>16</td>
|
| 117 |
+
<td>32</td>
|
| 118 |
+
</tr>
|
| 119 |
+
<tr>
|
| 120 |
+
<td>feature number</td>
|
| 121 |
+
<td>65536</td>
|
| 122 |
+
<td>131072</td>
|
| 123 |
+
</tr>
|
| 124 |
+
<tr>
|
| 125 |
+
<td>context size</td>
|
| 126 |
+
<td>4096</td>
|
| 127 |
+
<td>2048</td>
|
| 128 |
+
</tr>
|
| 129 |
+
</tbody>
|
| 130 |
+
</table>
|
| 131 |
+
|
| 132 |
+
The differences in training parameters arise because the LLaVA-NeXT-7B model requires more GPU memory to handle vision input, so fewer batches can be cached. For the SAE and SAE-V parameters, we set different hook layers and context sizes based on the distinct architectures of the two models. We also experimented with different feature numbers on both models, but found that only around 30,000 features are actually activated during training. All training runs were conducted until convergence. All SAE and SAE-V training is performed on 8xA800 GPUs. We ensured that the variations in the parameters did not affect the experiment results.
|
| 133 |
+
|
| 134 |
+
## 2. Quickstart
|
| 135 |
+
|
| 136 |
+
The SAE and SAE-V is developed based on [SAELens-V](https://github.com/saev-2025/SAELens-V). The loading example is as follow:
|
| 137 |
+
|
| 138 |
+
```python
|
| 139 |
+
from saev_lens import SAE
|
| 140 |
+
sae = SAE.load_from_pretrained(
|
| 141 |
+
path = "./SAEV_LLaVA_NeXT-7b_OBELICS",
|
| 142 |
+
device ="cuda:0"
|
| 143 |
+
)
|
| 144 |
+
```
|
| 145 |
+
More using tutorial is presented in [SAELens-V](https://github.com/saev-2025/SAELens-V).
|
SAE-LlaVA-NeXT-7b-Pile/.gitattributes
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
# Video files - compressed
|
| 57 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
*.webm filter=lfs diff=lfs merge=lfs -text
|
SAE-LlaVA-NeXT-7b-Pile/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAE-LlaVA-NeXT-7b-Pile
|
| 2 |
+
SAE-LlaVA-NeXt-Mistral-Pile is a sparse autoencoder (SAE) based on [LLaVA-NeXT](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf) model and trained on [The Pile](https://huggingface.co/datasets/EleutherAI/pile) dataset.
|
SAE-LlaVA-NeXT-7b-Pile/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "llava-hf/llava-v1.6-mistral-7b-hf", "local_model_path": "", "model_class_name": "HookedLlava", "hook_name": "blocks.16.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 16, "hook_head_index": null, "dataset_path": "", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 2048, "use_cached_activations": false, "cached_activations_path": null, "architecture": "standard", "d_in": 4096, "d_sae": 65536, "b_dec_init_method": "zeros", "expansion_factor": 16, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": false, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": true, "init_encoder_as_decoder_transpose": true, "n_batches_in_buffer": 64, "training_tokens": 163840000, "finetuning_tokens": 0, "store_batch_size_prompts": 4, "train_batch_size_tokens": 4096, "normalize_activations": "expected_average_only_in", "device": "cuda:3", "act_store_device": "cuda:3", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0.9, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 5, "lp_norm": 1.0, "scale_sparsity_penalty_by_decoder_norm": true, "l1_warm_up_steps": 2000, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 8000, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 1000, "dead_feature_window": 1000, "dead_feature_threshold": 0.0001, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "interp", "wandb_id": null, "run_name": "65536-L1-5-LR-5e-05-Tokens-1.638e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20, "resume": false, "n_checkpoints": 20, "checkpoint_path": "checkpoints/xepk4xea", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"n_devices": 3}, "sae_lens_version": "3.20.0", "sae_lens_training_version": "3.20.0", "vision": false, "tokens_per_buffer": 536870912}
|
SAE-LlaVA-NeXT-7b-Pile/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d8b1d505384f6bf74113dde3e5506468d366320ca90932c08efa0ac68dde4a1
|
| 3 |
+
size 2147762504
|
SAE-LlaVA-NeXT-7b-Pile/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c55970c8e5a624d294e7cbe7a777e9a1c3ee18b25ae5ac2ba4b7b7aa40573ee
|
| 3 |
+
size 262224
|
SAE-Mistral-7b-v0.2-Pile/.gitattributes
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
# Video files - compressed
|
| 57 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
*.webm filter=lfs diff=lfs merge=lfs -text
|
SAE-Mistral-7b-v0.2-Pile/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAE-Mistral-7b-v0.2-Pile
|
| 2 |
+
SAE-Mistral-7b-v0.2-Pile is a sparse autoencoder (SAE) based on [Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) model and trained on [The Pile](https://huggingface.co/datasets/EleutherAI/pile) dataset.
|
SAE-Mistral-7b-v0.2-Pile/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "mistralai/Mistral-7B-Instruct-v0.2", "local_model_path": "", "model_class_name": "HookedLlava", "hook_name": "blocks.16.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 16, "hook_head_index": null, "dataset_path": "", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 4096, "use_cached_activations": false, "cached_activations_path": null, "architecture": "standard", "d_in": 4096, "d_sae": 65536, "b_dec_init_method": "zeros", "expansion_factor": 16, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": false, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": true, "init_encoder_as_decoder_transpose": true, "n_batches_in_buffer": 32, "training_tokens": 122880000, "finetuning_tokens": 0, "store_batch_size_prompts": 4, "train_batch_size_tokens": 4096, "normalize_activations": "expected_average_only_in", "device": "cuda:2", "act_store_device": "cuda:2", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0.9, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 5, "lp_norm": 1.0, "scale_sparsity_penalty_by_decoder_norm": true, "l1_warm_up_steps": 1500, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 1000, "dead_feature_window": 1000, "dead_feature_threshold": 0.0001, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "interp-M", "wandb_id": null, "run_name": "65536-L1-5-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20, "resume": false, "n_checkpoints": 20, "checkpoint_path": "checkpoints-M/am4f73zh", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"n_devices": 2}, "sae_lens_version": "3.20.0", "sae_lens_training_version": "3.20.0", "vision": false, "tokens_per_buffer": 536870912}
|
SAE-Mistral-7b-v0.2-Pile/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5bde7e1c003a106cf21333583f3c031bf19c0f371c3a8dfeecc596e0030bfd1
|
| 3 |
+
size 2147762504
|
SAE-Mistral-7b-v0.2-Pile/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c641290c877f4770106cfae1f18778d8a4a500835306f79ec577b25ab349bcd7
|
| 3 |
+
size 262224
|
SAEV_Anole-7b_OBELICS/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
SAEV_Anole-7b_OBELICS/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAEV_Anole-7b_OBELICS
|
| 2 |
+
SAEV_Anole-7b_OBELICS is a sparse autoencoder with Vision (SAE-V) based on [Anole-7b-v0.1](https://huggingface.co/leloy/Anole-7b-v0.1-hf) model and trained on [OBELICS](https://github.com/huggingface/OBELICS) dataset.
|
SAEV_Anole-7b_OBELICS/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "", "local_model_path": "", "model_class_name": "HookedChameleon", "hook_name": "blocks.16.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 16, "hook_head_index": null, "dataset_path": "", "dataset_trust_remote_code": true, "streaming": false, "is_dataset_tokenized": true, "context_size": 2048, "use_cached_activations": false, "cached_activations_path": null, "architecture": "standard", "d_in": 4096, "d_sae": 131072, "b_dec_init_method": "zeros", "expansion_factor": 32, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": false, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": true, "init_encoder_as_decoder_transpose": true, "n_batches_in_buffer": 64, "training_tokens": 122880000, "finetuning_tokens": 0, "store_batch_size_prompts": 4, "train_batch_size_tokens": 4096, "normalize_activations": "expected_average_only_in", "device": "cuda:3", "act_store_device": "cpu", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0.9, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 5, "lp_norm": 1.0, "scale_sparsity_penalty_by_decoder_norm": true, "l1_warm_up_steps": 1500, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 1000, "dead_feature_window": 1000, "dead_feature_threshold": 0.0001, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "interp", "wandb_id": null, "run_name": "131072-L1-5-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20, "resume": false, "n_checkpoints": 8, "checkpoint_path": "checkpoints/9ujb3fly", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"n_devices": 3}, "sae_lens_version": "3.20.0", "sae_lens_training_version": "3.20.0", "tokens_per_buffer": 536870912}
|
SAEV_Anole-7b_OBELICS/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e929ac1c198534535751aea816d57ca96e3f51c81c61753da018d64e803c690f
|
| 3 |
+
size 4295508296
|
SAEV_Anole-7b_OBELICS/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce7b78c9e222c461a1e349eca69880e8b8df9c2b21011d8a6ef54b8ba8e6afe4
|
| 3 |
+
size 524368
|
SAEV_Chameleon-7b_OBELICS/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
SAEV_Chameleon-7b_OBELICS/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAEV_Chameleon-7b_OBELICS
|
| 2 |
+
SAEV_Anole-7b_OBELICS is a sparse autoencoder with Vision (SAE-V) based on [Chameleon](https://huggingface.co/facebook/chameleon-7b) model and trained on [OBELICS](https://github.com/huggingface/OBELICS) dataset.
|
SAEV_Chameleon-7b_OBELICS/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "", "local_model_path": "", "model_class_name": "HookedChameleon", "hook_name": "blocks.16.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 16, "hook_head_index": null, "dataset_path": "", "dataset_trust_remote_code": true, "streaming": false, "is_dataset_tokenized": true, "context_size": 2048, "use_cached_activations": false, "cached_activations_path": null, "architecture": "standard", "d_in": 4096, "d_sae": 131072, "b_dec_init_method": "zeros", "expansion_factor": 32, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": false, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": true, "init_encoder_as_decoder_transpose": true, "n_batches_in_buffer": 64, "training_tokens": 122880000, "finetuning_tokens": 0, "store_batch_size_prompts": 4, "train_batch_size_tokens": 4096, "normalize_activations": "expected_average_only_in", "device": "cuda:1", "act_store_device": "cpu", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0.9, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 5, "lp_norm": 1.0, "scale_sparsity_penalty_by_decoder_norm": true, "l1_warm_up_steps": 1500, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 1000, "dead_feature_window": 1000, "dead_feature_threshold": 0.0001, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "interp", "wandb_id": null, "run_name": "131072-L1-5-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20, "resume": false, "n_checkpoints": 8, "checkpoint_path": "checkpoints/d099y7cn", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"n_devices": 1}, "sae_lens_version": "3.20.0", "sae_lens_training_version": "3.20.0", "tokens_per_buffer": 536870912}
|
SAEV_Chameleon-7b_OBELICS/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dabe5ffd54c5c9f62da0ac003f3fc13ec72dc1e144d9d62de3f03c2203cd5e9a
|
| 3 |
+
size 4295508296
|
SAEV_Chameleon-7b_OBELICS/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d7093a37ff0a0ad7c0e15f87b8a0510455ceb7ff0dfccb3c50a60587150f06a
|
| 3 |
+
size 524368
|
SAEV_LLaVA_NeXT-7b_OBELICS/.gitattributes
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.lz4 filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
# Audio files - uncompressed
|
| 38 |
+
*.pcm filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
*.sam filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
*.raw filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
# Audio files - compressed
|
| 42 |
+
*.aac filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
*.flac filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
*.ogg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
# Image files - uncompressed
|
| 48 |
+
*.bmp filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
*.gif filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
*.tiff filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
# Image files - compressed
|
| 53 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
# Video files - compressed
|
| 57 |
+
*.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
*.webm filter=lfs diff=lfs merge=lfs -text
|
SAEV_LLaVA_NeXT-7b_OBELICS/README.md
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SAEV_LLaVA_NeXT-7b_OBELICS
|
| 2 |
+
SAEV_Anole-7b_OBELICS is a sparse autoencoder with Vision (SAE-V) based on [LLaVA-NeXT](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf) model and trained on [OBELICS](https://github.com/huggingface/OBELICS) dataset.
|
SAEV_LLaVA_NeXT-7b_OBELICS/cfg.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"model_name": "llava-hf/llava-v1.6-mistral-7b-hf", "local_model_path": "", "model_class_name": "HookedLlava", "hook_name": "blocks.16.hook_resid_post", "hook_eval": "NOT_IN_USE", "hook_layer": 16, "hook_head_index": null, "dataset_path": "", "dataset_trust_remote_code": true, "streaming": true, "is_dataset_tokenized": true, "context_size": 4096, "use_cached_activations": false, "cached_activations_path": null, "architecture": "standard", "d_in": 4096, "d_sae": 65536, "b_dec_init_method": "zeros", "expansion_factor": 16, "activation_fn": "relu", "activation_fn_kwargs": {}, "normalize_sae_decoder": false, "noise_scale": 0.0, "from_pretrained_path": null, "apply_b_dec_to_input": false, "decoder_orthogonal_init": false, "decoder_heuristic_init": true, "init_encoder_as_decoder_transpose": true, "n_batches_in_buffer": 32, "training_tokens": 122880000, "finetuning_tokens": 0, "store_batch_size_prompts": 1, "train_batch_size_tokens": 4096, "normalize_activations": "expected_average_only_in", "device": "cuda:3", "act_store_device": "cuda:3", "seed": 42, "dtype": "float32", "prepend_bos": true, "autocast": false, "autocast_lm": false, "compile_llm": false, "llm_compilation_mode": null, "compile_sae": false, "sae_compilation_mode": null, "adam_beta1": 0.9, "adam_beta2": 0.999, "mse_loss_normalization": null, "l1_coefficient": 5, "lp_norm": 1.0, "scale_sparsity_penalty_by_decoder_norm": true, "l1_warm_up_steps": 1500, "lr": 5e-05, "lr_scheduler_name": "constant", "lr_warm_up_steps": 0, "lr_end": 5e-06, "lr_decay_steps": 6000, "n_restart_cycles": 1, "finetuning_method": null, "use_ghost_grads": false, "feature_sampling_window": 1000, "dead_feature_window": 1000, "dead_feature_threshold": 0.0001, "n_eval_batches": 10, "eval_batch_size_prompts": null, "log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "wandb_project": "interp-V", "wandb_id": null, "run_name": "65536-L1-5-LR-5e-05-Tokens-1.229e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 20, "resume": false, "n_checkpoints": 20, "checkpoint_path": "checkpoints-V/kxpk98cr", "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {"n_devices": 3}, "sae_lens_version": "3.20.0", "sae_lens_training_version": "3.20.0", "vision": false, "tokens_per_buffer": 536870912}
|
SAEV_LLaVA_NeXT-7b_OBELICS/sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e4f97c02ca09cb9a680ab6306ca65a4d9b65a3eee77fe2fcd151e4f1aa4a850
|
| 3 |
+
size 2147762504
|
SAEV_LLaVA_NeXT-7b_OBELICS/sparsity.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf1916cb6dd3e88553f51f2a5c27b0f9fae9e2d4bee34cd9c1b2eb724b48919
|
| 3 |
+
size 262224
|