Vadim21221
/

sae

Model card Files Files and versions

sae / topk_l0_32 /config.json

Vadim21221's picture

Upload 59 files

d67b183 verified 12 months ago

history blame contribute delete

1.49 kB

	{
	"model_name": "google/gemma-2-2b",
	"layer": 12,
	"hook_point": "resid_post",
	"act_size": 2304,
	"sae_type": "topk",
	"dict_size": 65536,
	"aux_penalty": 0.03125,
	"input_unit_norm": true,
	"batch_norm_on_queries": false,
	"affine_batch_norm": false,
	"linear_heads": 0,
	"topk2": 32,
	"topk1": 50,
	"topk2_warmup_steps_fraction": 0.0,
	"start_topk2": 50,
	"topk1_warmup_steps_fraction": 0.0,
	"start_topk1": 50,
	"topk2_aux": 512,
	"cartesian_op": "mul",
	"router_depth": 2,
	"router_tree_width": null,
	"num_mkeys": null,
	"num_nkeys": null,
	"num_heads": -1,
	"n_batches_to_dead": 10,
	"lr": 0.0008,
	"bandwidth": 0.001,
	"l1_coeff": 0.0018,
	"num_tokens": 500000000,
	"seq_len": 1024,
	"model_batch_size": 64,
	"num_batches_in_buffer": 5,
	"max_grad_norm": 1.0,
	"batch_size": 8192,
	"weight_decay": 0.0,
	"warmup_fraction": 0.1,
	"scheduler_type": "cosine_with_min_lr",
	"device": "cuda",
	"dtype": "torch.float32",
	"sae_dtype": "torch.float32",
	"dataset_path": "HuggingFaceFW/fineweb-edu",
	"wandb_project": "turbo-llama-lens",
	"enable_wandb": true,
	"sae_name": "sae",
	"seed": 42,
	"performance_log_steps": 100,
	"save_checkpoint_steps": 15000000,
	"wandb_run_suffix": "ex72_for_sae_bench_gemma",
	"sweep_pair": "{'dict_size': 65536, 'num_tokens': 500000000, 'sae_type': 'topk', 'start_topk1': 50, 'start_topk2': 50, 'topk1': 50}"
	}