kh4dien commited on
Commit
01c223e
·
verified ·
1 Parent(s): f1a1088

Add files using upload-large-folder tool

Browse files
Files changed (30) hide show
  1. gemma-3-4b-step-1499/config.json +1 -0
  2. gemma-3-4b-step-1499/language_model.model.layers.16/cfg.json +1 -0
  3. gemma-3-4b-step-1499/language_model.model.layers.16/sae.safetensors +3 -0
  4. gemma-3-4b-step-1499/language_model.model.layers.27/cfg.json +1 -0
  5. gemma-3-4b-step-1499/language_model.model.layers.27/sae.safetensors +3 -0
  6. gemma-3-4b-step-2999/config.json +1 -0
  7. gemma-3-4b-step-2999/language_model.model.layers.16/cfg.json +1 -0
  8. gemma-3-4b-step-2999/language_model.model.layers.16/sae.safetensors +3 -0
  9. gemma-3-4b-step-2999/language_model.model.layers.27/cfg.json +1 -0
  10. gemma-3-4b-step-2999/language_model.model.layers.27/sae.safetensors +3 -0
  11. gemma-3-4b-step-4499/config.json +1 -0
  12. gemma-3-4b-step-4499/language_model.model.layers.16/cfg.json +1 -0
  13. gemma-3-4b-step-4499/language_model.model.layers.16/sae.safetensors +3 -0
  14. gemma-3-4b-step-4499/language_model.model.layers.27/cfg.json +1 -0
  15. gemma-3-4b-step-4499/language_model.model.layers.27/sae.safetensors +3 -0
  16. gemma-3-4b-step-5999/config.json +1 -0
  17. gemma-3-4b-step-5999/language_model.model.layers.16/cfg.json +1 -0
  18. gemma-3-4b-step-5999/language_model.model.layers.16/sae.safetensors +3 -0
  19. gemma-3-4b-step-5999/language_model.model.layers.27/cfg.json +1 -0
  20. gemma-3-4b-step-5999/language_model.model.layers.27/sae.safetensors +3 -0
  21. gemma-3-4b-step-7499/config.json +1 -0
  22. gemma-3-4b-step-7499/language_model.model.layers.16/cfg.json +1 -0
  23. gemma-3-4b-step-7499/language_model.model.layers.16/sae.safetensors +3 -0
  24. gemma-3-4b-step-7499/language_model.model.layers.27/cfg.json +1 -0
  25. gemma-3-4b-step-7499/language_model.model.layers.27/sae.safetensors +3 -0
  26. gemma-3-4b-step-final/config.json +1 -0
  27. gemma-3-4b-step-final/language_model.model.layers.16/cfg.json +1 -0
  28. gemma-3-4b-step-final/language_model.model.layers.16/sae.safetensors +3 -0
  29. gemma-3-4b-step-final/language_model.model.layers.27/cfg.json +1 -0
  30. gemma-3-4b-step-final/language_model.model.layers.27/sae.safetensors +3 -0
gemma-3-4b-step-1499/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-1499/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-1499/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3429426008da613cb1ba0c8b2d3b37d2f729785bd749d6ebb3d3f0e76d376e32
3
+ size 419522896
gemma-3-4b-step-1499/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-1499/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c72a86c835ff6f71f04998e04f4218e3bf51495069ed5fd8af84b4eb0d48db
3
+ size 419522896
gemma-3-4b-step-2999/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-2999/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-2999/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2233aa61ce67acae8f1e6dfea0869a770636e3b5f70e4657b3715a4c4d1e0aa5
3
+ size 419522896
gemma-3-4b-step-2999/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-2999/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ebe8bfee397ce0f104e372902f60d98657a6f567f6180dcca0b4c16983705fa
3
+ size 419522896
gemma-3-4b-step-4499/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-4499/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-4499/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a097c350a40fab81ba861652cfcc2b9b08e7b364052492bfd4905a84224185
3
+ size 419522896
gemma-3-4b-step-4499/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-4499/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6eacdd4271bdb9f56b2c75da5616d309f45f61a5f460e21234e741f9918d3e
3
+ size 419522896
gemma-3-4b-step-5999/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-5999/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-5999/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:044e3ce25821ff6a8baef4b493e9c2b309a813fd44c9ed3fbdf50096a504cf98
3
+ size 419522896
gemma-3-4b-step-5999/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-5999/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ff54b852d5e6d45e33747cd69054a6f5fb390b74efa023b937cc2f0135259d2
3
+ size 419522896
gemma-3-4b-step-7499/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-7499/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-7499/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54750d136cb80be71e79f805a903873ca86cb708d8cffdf35249323a58e199d1
3
+ size 419522896
gemma-3-4b-step-7499/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-7499/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87365eb08faaa04c4be2ac404e06f4e095a941b2fb0a86fda5fc2ce90c85a447
3
+ size 419522896
gemma-3-4b-step-final/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false}, "batch_size": 8, "grad_acc_steps": 8, "micro_acc_steps": 1, "optimizer": "adam", "lr": null, "lr_warmup_steps": 1000, "k_decay_steps": 0, "auxk_alpha": 0.0, "dead_feature_threshold": 10000000, "hookpoints": ["language_model.model.layers.16", "language_model.model.layers.27"], "init_seeds": [0], "layers": [16, 27], "layer_stride": 1, "distribute_modules": false, "save_every": 1500, "save_dir": "/root/saes", "val_every": 75, "log_to_wandb": true, "run_name": "gemma-3-4b", "wandb_log_frequency": 1, "model": "google/gemma-3-4b-pt", "dataset": "togethercomputer/RedPajama-Data-1T-Sample", "val_dataset": "kh4dien/fineweb-sample", "split": "train", "ctx_len": 2048, "hf_token": null, "revision": null, "load_in_8bit": false, "max_examples": 490000, "val_max_examples": 100, "resume": false, "text_column": "text", "finetune": null, "shuffle_seed": 42, "data_preprocessing_num_proc": 13}
gemma-3-4b-step-final/language_model.model.layers.16/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-final/language_model.model.layers.16/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5781778708a31fd275a947765596c9de5353df4c524ff5730488929eda9c9ec3
3
+ size 419522896
gemma-3-4b-step-final/language_model.model.layers.27/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"activation": "topk", "expansion_factor": 8, "normalize_decoder": true, "num_latents": 0, "k": 128, "multi_topk": false, "skip_connection": false, "transcode": false, "d_in": 2560}
gemma-3-4b-step-final/language_model.model.layers.27/sae.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd670f0776e7c71e0a805c16e61cfa8a142303f174f020892008b0470ba2d60c
3
+ size 419522896