File size: 805 Bytes
e191453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
{
  "seed": 49,
  "device": "cuda:0",
  "wandb_project": "gemma-crosscoder",
  "wandb_entity": "kasmik-backups-kathmandu-university",
  "wandb_run_name": null,
  "checkpoint_path": "./checkpoints/",
  "data_cache_dir": "./cache/",
  "data_dir": "./data/",
  "model_name_A": "google/gemma-2-2b",
  "model_name_B": "google/gemma-2-2b-it",
  "hook_point": "blocks.14.hook_resid_pre",
  "dict_size": 16384,
  "enc_dtype": "fp32",
  "dec_init_norm": 0.08,
  "lr": 5e-05,
  "beta1": 0.9,
  "beta2": 0.999,
  "l1_coeff": 2.0,
  "lr_warmup_steps": 0,
  "lr_decay_start_fraction": 0.8,
  "l1_warmup_fraction": 0.05,
  "num_tokens": 10000000,
  "batch_size": 4096,
  "seq_len": 1024,
  "model_batch_size": 4,
  "buffer_mult": 128,
  "norm_batch_size": 16,
  "log_every": 100,
  "save_every": 20000,
  "d_in": 2304
}