xXCoolinXx commited on
Commit
cdba4b9
·
verified ·
1 Parent(s): 5e7a006

Upload folder using huggingface_hub

Browse files
gemma_2_9b_l11/model/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"sae_lens_version": "6.39.0", "sae_lens_training_version": "6.39.0", "dataset_path": "datasets/tokenized/pile-uncopyrighted-gemma2", "hook_name": "model.layers.11", "model_name": "google/gemma-2-9b", "model_class_name": "AutoModelForCausalLM", "hook_head_index": null, "context_size": 128, "seqpos_slice": [null], "model_from_pretrained_kwargs": {}, "prepend_bos": true, "exclude_special_tokens": false, "sequence_separator_token": "bos", "disable_concat_sequences": false}, "rescale_acts_by_decoder_norm": true, "d_in": 3584, "apply_b_dec_to_input": false, "d_bottleneck": 3, "dtype": "float32", "n_experts": 2048, "device": "cuda", "normalize_activations": "none", "d_expert": 16, "d_sae": 32768, "reshape_activations": "none", "architecture": "smixae"}
gemma_2_9b_l11/model/runner_cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sae": {"d_in": 3584, "d_sae": 32768, "dtype": "float32", "device": "cpu", "apply_b_dec_to_input": true, "normalize_activations": "expected_average_only_in", "reshape_activations": "none", "metadata": {"sae_lens_version": "6.39.0", "sae_lens_training_version": "6.39.0"}, "decoder_init_norm": 0.1, "n_experts": 2048, "d_expert": 16, "d_bottleneck": 3, "k_experts": 64, "aux_loss_coefficient": 9e-06, "rescale_acts_by_decoder_norm": true, "threshold_lr": 0.1, "dead_after_n_passes": 1000, "architecture": "smixae"}, "model_name": "google/gemma-2-9b", "model_class_name": "AutoModelForCausalLM", "hook_name": "model.layers.11", "hook_eval": "NOT_IN_USE", "hook_head_index": null, "dataset_path": "datasets/tokenized/pile-uncopyrighted-gemma2", "dataset_trust_remote_code": true, "streaming": false, "is_dataset_tokenized": true, "use_chat_formatting": false, "context_size": 128, "use_cached_activations": false, "cached_activations_path": null, "from_pretrained_path": null, "n_batches_in_buffer": 1024, "training_tokens": 500000000, "store_batch_size_prompts": 128, "seqpos_slice": [null], "disable_concat_sequences": false, "sequence_separator_token": "bos", "activations_mixing_fraction": 0.5, "device": "cuda", "act_store_device": "cpu", "seed": 42, "dtype": "bfloat16", "prepend_bos": true, "autocast": true, "autocast_lm": true, "compile_llm": true, "llm_compilation_mode": null, "compile_sae": true, "sae_compilation_mode": null, "train_batch_size_tokens": 8192, "adam_beta1": 0.9, "adam_beta2": 0.999, "lr": 0.0005, "lr_scheduler_name": "constant", "lr_warm_up_steps": 500, "lr_end": 5e-05, "lr_decay_steps": 12207, "n_restart_cycles": 1, "dead_feature_window": 1000, "feature_sampling_window": 2000, "dead_feature_threshold": 1e-08, "n_eval_batches": 10, "eval_batch_size_prompts": null, "logger": {"log_to_wandb": true, "log_activations_store_to_wandb": false, "log_optimizer_state_to_wandb": false, "log_weights_to_wandb": true, "wandb_project": "SMIXAE on Gemma 2-9B, Batch Top K", "wandb_id": null, "run_name": "smixae-32768-LR-0.0005-Tokens-5.000e+08", "wandb_entity": null, "wandb_log_frequency": 30, "eval_every_n_wandb_logs": 5000000}, "n_checkpoints": 3, "checkpoint_path": "results/gemma_2_9b_l11/checkpoints/3c13q1z6", "save_final_checkpoint": true, "output_path": "results/gemma_2_9b_l11/model", "resume_from_checkpoint": null, "verbose": true, "model_kwargs": {}, "model_from_pretrained_kwargs": {}, "sae_lens_version": "6.39.0", "sae_lens_training_version": "6.39.0", "exclude_special_tokens": false, "n_batches_for_norm_estimate": 100}
gemma_2_9b_l11/model/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc827ff5aa5a9726cbb82eeecd8fbbb85964a9cca8493d0bdaa93e701547799a
3
+ size 940472968
gemma_2_9b_l11/model/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2d5252cf1f9f91ae0026fec84a1b92affdf12281cbf855e77461a4297ecdf1
3
+ size 131152