Safetensors
obalcells's picture
Upload LoRA model and probe head
2dc9d7f verified
{
"wandb_project": "probe-training",
"model_name": "google/gemma-2-9b-it",
"upload_to_hf": true,
"hf_repo_id": "andyrdt/hallucination-probes",
"save_evaluation_metrics": true,
"evaluation_output_dir": null,
"probe_id": "gemma2_9b_lora_lambda_kl=0.5",
"probe_dir": "/root/git/hallucination_detection/value_head_probes/gemma2_9b_lora_lambda_kl=0.5",
"layer": 40,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_layers": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40
],
"probe_threshold": 0.5,
"load_from_hf": false,
"load_from_disk": false,
"train_data_config_path": "experiments/training/gemma-2-9b/train_data.yaml",
"eval_data_config_path": "experiments/training/gemma-2-9b/eval_data.yaml",
"cache_data_config_path": null,
"high_loss_threshold": null,
"lambda_lm": 0.0,
"lambda_kl": 0.5,
"anneal_max_aggr": true,
"anneal_warmup": 1.0,
"learning_rate": 5e-05,
"probe_head_lr": 0.0005,
"lora_lr": 5e-05,
"sparsity_penalty_weight": null,
"use_focal_loss": false,
"focal_gamma": 2.0,
"evaluate_only": false,
"max_steps": -1,
"num_train_epochs": 1,
"per_device_train_batch_size": 4,
"per_device_eval_batch_size": 4,
"logging_steps": 10,
"eval_steps": null,
"cache_steps": null,
"evaluation_strategy": "no",
"seed": 42,
"save_roc_curves": true,
"dump_raw_eval_results": false,
"load_in_8bit": false,
"load_in_4bit": false,
"enable_gradient_checkpointing": true,
"gradient_accumulation_steps": 2,
"max_grad_norm": 1.0
}