| { | |
| "seed": 49, | |
| "device": "cuda:0", | |
| "wandb_project": "gemma-crosscoder", | |
| "wandb_entity": "kasmik-backups-kathmandu-university", | |
| "wandb_run_name": null, | |
| "checkpoint_path": "./checkpoints/", | |
| "data_cache_dir": "./cache/", | |
| "data_dir": "./data/", | |
| "model_name_A": "google/gemma-2-2b", | |
| "model_name_B": "google/gemma-2-2b-it", | |
| "hook_point": "blocks.14.hook_resid_pre", | |
| "dict_size": 16384, | |
| "enc_dtype": "fp32", | |
| "dec_init_norm": 0.08, | |
| "lr": 5e-05, | |
| "beta1": 0.9, | |
| "beta2": 0.999, | |
| "l1_coeff": 2.0, | |
| "lr_warmup_steps": 0, | |
| "lr_decay_start_fraction": 0.8, | |
| "l1_warmup_fraction": 0.05, | |
| "num_tokens": 10000000, | |
| "batch_size": 4096, | |
| "seq_len": 1024, | |
| "model_batch_size": 4, | |
| "buffer_mult": 128, | |
| "norm_batch_size": 16, | |
| "log_every": 100, | |
| "save_every": 20000, | |
| "d_in": 2304 | |
| } |