obalcells
/

hallucination-probes

Model card Files Files and versions

hallucination-probes / gemma2_9b_lora_lambda_kl_0_05 /training_config.json

obalcells's picture

Upload LoRA model and probe head

2dc9d7f verified 6 months ago

history blame contribute delete

2.05 kB

	{
	"wandb_project": "probe-training",
	"model_name": "google/gemma-2-9b-it",
	"upload_to_hf": true,
	"hf_repo_id": "andyrdt/hallucination-probes",
	"save_evaluation_metrics": true,
	"evaluation_output_dir": null,
	"probe_id": "gemma2_9b_lora_lambda_kl=0.5",
	"probe_dir": "/root/git/hallucination_detection/value_head_probes/gemma2_9b_lora_lambda_kl=0.5",
	"layer": 40,
	"lora_r": 16,
	"lora_alpha": 32,
	"lora_dropout": 0.05,
	"lora_layers": [
	0,
	1,
	2,
	3,
	4,
	5,
	6,
	7,
	8,
	9,
	10,
	11,
	12,
	13,
	14,
	15,
	16,
	17,
	18,
	19,
	20,
	21,
	22,
	23,
	24,
	25,
	26,
	27,
	28,
	29,
	30,
	31,
	32,
	33,
	34,
	35,
	36,
	37,
	38,
	39,
	40
	],
	"probe_threshold": 0.5,
	"load_from_hf": false,
	"load_from_disk": false,
	"train_data_config_path": "experiments/training/gemma-2-9b/train_data.yaml",
	"eval_data_config_path": "experiments/training/gemma-2-9b/eval_data.yaml",
	"cache_data_config_path": null,
	"high_loss_threshold": null,
	"lambda_lm": 0.0,
	"lambda_kl": 0.5,
	"anneal_max_aggr": true,
	"anneal_warmup": 1.0,
	"learning_rate": 5e-05,
	"probe_head_lr": 0.0005,
	"lora_lr": 5e-05,
	"sparsity_penalty_weight": null,
	"use_focal_loss": false,
	"focal_gamma": 2.0,
	"evaluate_only": false,
	"max_steps": -1,
	"num_train_epochs": 1,
	"per_device_train_batch_size": 4,
	"per_device_eval_batch_size": 4,
	"logging_steps": 10,
	"eval_steps": null,
	"cache_steps": null,
	"evaluation_strategy": "no",
	"seed": 42,
	"save_roc_curves": true,
	"dump_raw_eval_results": false,
	"load_in_8bit": false,
	"load_in_4bit": false,
	"enable_gradient_checkpointing": true,
	"gradient_accumulation_steps": 2,
	"max_grad_norm": 1.0
	}