unlearning_saes / comic-feather-39 /hyperparameters.yaml

Initial commit

1965d05 verified almost 2 years ago

1.94 kB

	activation_function: relu
	adam_beta1: 0.9
	adam_beta2: 0.999
	b_dec_init_method: zeros
	cached_activations_path: null
	checkpoint_path: ../outputs/checkpoints
	clip_grad_norm: false
	context_size: 1024
	custom_loss: null
	d_in: 2048
	d_out: null
	dataset: Skylion007/openwebtext
	dense_loss_coefficient: 0
	device: cuda
	different_output: false
	dtype: float32
	epsilon_l0_approx: 0.2
	eval_frequency: 500
	expansion_factor: 8
	feature_reinit_scale: 0.2
	feature_resampling_method: null
	fine_tune_dataset: false
	finetuning_steps: !!python/tuple
	- 1000
	flatten_activations_over_layer: false
	flatten_activations_over_layer_output: false
	from_pretrained_path: null
	hook_point: blocks.9.hook_resid_pre
	hook_point_head_index: null
	hook_point_head_index_output: null
	hook_point_layer: 9
	hook_point_layer_output: null
	hook_point_output: null
	initial_decoder_norm: 0.1
	initialise_encoder_to_decoder_transpose: false
	is_dataset_tokenized: false
	l0_coefficient: 2.0e-05
	l0_warmup: false
	l0_warmup_steps: 1000
	l1_coefficient: 0
	l1_warmup: false
	l1_warmup_steps: 1000
	log_to_wandb: true
	loop_dataset: false
	lr: 0.0004
	lr_scheduler_name: constant
	lr_warm_up_steps: 500
	max_resample_step: 100000
	max_sparsity_target: 1
	min_sparsity_for_resample: 0
	min_sparsity_target: 1.0e-05
	model_name: gemma-2b-it
	mse_loss_coefficient: 1
	mse_loss_type: centered
	multiple_runs: false
	n_batches_in_store_buffer: 128
	n_checkpoints: 160
	n_running_sparsity: 300
	n_starting_steps: null
	normalise_initial_decoder_weights: false
	normalise_w_dec: true
	remove_bos_tokens: false
	resample_batches: 128
	resample_frequency: 25000
	scale_input_norm: false
	seed: 42
	sparse_loss_coefficient: 1.0e-06
	sparsity_log_frequency: 5000
	store_batch_size: 2
	subtract_b_dec_from_inputs: false
	topk_amount: 10
	total_training_steps: 200000
	train_batch_size: 4096
	use_cached_activations: false
	use_gated_sparse_autoencoder: false
	wandb_log_frequency: 10
	wandb_project: test_gemma_2b
	weight_l1_by_decoder_norms: false