| { | |
| "seed": 49, | |
| "device": "cuda:0", | |
| "wandb_project": "CrossCoder_Model_Diffing", | |
| "wandb_entity": "jithesh", | |
| "wandb_run_name": null, | |
| "checkpoint_path": "./checkpoints/", | |
| "data_cache_dir": "./cache/", | |
| "data_dir": "./data/", | |
| "hook_point": "blocks.14.hook_resid_pre", | |
| "dict_size": 16384, | |
| "enc_dtype": "fp32", | |
| "dec_init_norm": 0.08, | |
| "sparsity_type": "batch_top_k", | |
| "k_sparsity": 100, | |
| "l1_coeff": 0.0, | |
| "l1_warmup_fraction": 0.0, | |
| "lr": 5e-05, | |
| "beta1": 0.9, | |
| "beta2": 0.999, | |
| "lr_warmup_steps": 0, | |
| "lr_decay_start_fraction": 0.8, | |
| "num_tokens": 10000000, | |
| "batch_size": 2048, | |
| "seq_len": 1024, | |
| "model_batch_size": 1, | |
| "buffer_mult": 64, | |
| "norm_batch_size": 16, | |
| "log_every": 100, | |
| "save_every": 5000, | |
| "d_in": 1024 | |
| } |