Add files using upload-large-folder tool
Browse files- cfg.json +14 -0
- runner_cfg.json +53 -0
- sae_weights.safetensors +3 -0
cfg.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"d_in": 4096,
|
| 3 |
+
"d_sae": 65536,
|
| 4 |
+
"dtype": "float32",
|
| 5 |
+
"device": "cuda",
|
| 6 |
+
"jumprelu_init_threshold": 1.0,
|
| 7 |
+
"jumprelu_bandwidth": 1.0,
|
| 8 |
+
"jumprelu_sparsity_loss_mode": "step",
|
| 9 |
+
"l0_coefficient": 1.0,
|
| 10 |
+
"l0_warm_up_steps": 0,
|
| 11 |
+
"pre_act_loss_coefficient": null,
|
| 12 |
+
"jumprelu_tanh_scale": 4.0,
|
| 13 |
+
"decoder_init_norm": 0.1
|
| 14 |
+
}
|
runner_cfg.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"sae": {
|
| 3 |
+
"d_in": 4096,
|
| 4 |
+
"d_sae": 65536,
|
| 5 |
+
"dtype": "float32",
|
| 6 |
+
"device": "cuda",
|
| 7 |
+
"jumprelu_init_threshold": 1.0,
|
| 8 |
+
"jumprelu_bandwidth": 1.0,
|
| 9 |
+
"jumprelu_sparsity_loss_mode": "step",
|
| 10 |
+
"l0_coefficient": 1.0,
|
| 11 |
+
"l0_warm_up_steps": 0,
|
| 12 |
+
"pre_act_loss_coefficient": null,
|
| 13 |
+
"jumprelu_tanh_scale": 4.0,
|
| 14 |
+
"decoder_init_norm": 0.1
|
| 15 |
+
},
|
| 16 |
+
"trainer": {
|
| 17 |
+
"total_training_tokens": 122880000,
|
| 18 |
+
"batch_size": 4096,
|
| 19 |
+
"lr": 5e-05,
|
| 20 |
+
"lr_end": 5e-05,
|
| 21 |
+
"lr_scheduler_name": "constant",
|
| 22 |
+
"lr_warm_up_steps": 1500,
|
| 23 |
+
"lr_decay_steps": 6000,
|
| 24 |
+
"n_restart_cycles": 1,
|
| 25 |
+
"adam_beta1": 0.9,
|
| 26 |
+
"adam_beta2": 0.999,
|
| 27 |
+
"dead_feature_window": 1000,
|
| 28 |
+
"feature_sampling_window": 2000,
|
| 29 |
+
"device": "cuda",
|
| 30 |
+
"autocast": false,
|
| 31 |
+
"n_checkpoints": 0,
|
| 32 |
+
"checkpoint_path": "checkpoints",
|
| 33 |
+
"log_to_wandb": true,
|
| 34 |
+
"wandb_project": "multimodal_sae",
|
| 35 |
+
"wandb_log_frequency": 30
|
| 36 |
+
},
|
| 37 |
+
"video_activation": {
|
| 38 |
+
"model_name": "llava-hf/LLaVA-NeXT-Video-7B-hf",
|
| 39 |
+
"hook_module": "model.language_model.layers.16",
|
| 40 |
+
"device": "cuda",
|
| 41 |
+
"dtype": "float16",
|
| 42 |
+
"load_in_4bit": false,
|
| 43 |
+
"video_dir": "./videos",
|
| 44 |
+
"num_frames": 8,
|
| 45 |
+
"prompt": "USER: <video>\nDescribe this video.\nASSISTANT:",
|
| 46 |
+
"batch_size": 4096,
|
| 47 |
+
"n_batches_in_buffer": 16,
|
| 48 |
+
"buffer_mix_fraction": 0.5,
|
| 49 |
+
"video_batch_size": 8,
|
| 50 |
+
"prefetch_workers": 4,
|
| 51 |
+
"debug": false
|
| 52 |
+
}
|
| 53 |
+
}
|
sae_weights.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0097155cb3c1b814ce5f76dcaf4873ea14fde1e8fc09eb228933f0ae4e611b91
|
| 3 |
+
size 2148024728
|