| { |
| "wandb_project": "probe-training", |
| "model_name": "google/gemma-2-9b-it", |
| "upload_to_hf": true, |
| "hf_repo_id": "andyrdt/hallucination-probes", |
| "save_evaluation_metrics": true, |
| "evaluation_output_dir": null, |
| "probe_id": "gemma2_9b_lora_lambda_kl=0.5", |
| "probe_dir": "/root/git/hallucination_detection/value_head_probes/gemma2_9b_lora_lambda_kl=0.5", |
| "layer": 40, |
| "lora_r": 16, |
| "lora_alpha": 32, |
| "lora_dropout": 0.05, |
| "lora_layers": [ |
| 0, |
| 1, |
| 2, |
| 3, |
| 4, |
| 5, |
| 6, |
| 7, |
| 8, |
| 9, |
| 10, |
| 11, |
| 12, |
| 13, |
| 14, |
| 15, |
| 16, |
| 17, |
| 18, |
| 19, |
| 20, |
| 21, |
| 22, |
| 23, |
| 24, |
| 25, |
| 26, |
| 27, |
| 28, |
| 29, |
| 30, |
| 31, |
| 32, |
| 33, |
| 34, |
| 35, |
| 36, |
| 37, |
| 38, |
| 39, |
| 40 |
| ], |
| "probe_threshold": 0.5, |
| "load_from_hf": false, |
| "load_from_disk": false, |
| "train_data_config_path": "experiments/training/gemma-2-9b/train_data.yaml", |
| "eval_data_config_path": "experiments/training/gemma-2-9b/eval_data.yaml", |
| "cache_data_config_path": null, |
| "high_loss_threshold": null, |
| "lambda_lm": 0.0, |
| "lambda_kl": 0.5, |
| "anneal_max_aggr": true, |
| "anneal_warmup": 1.0, |
| "learning_rate": 5e-05, |
| "probe_head_lr": 0.0005, |
| "lora_lr": 5e-05, |
| "sparsity_penalty_weight": null, |
| "use_focal_loss": false, |
| "focal_gamma": 2.0, |
| "evaluate_only": false, |
| "max_steps": -1, |
| "num_train_epochs": 1, |
| "per_device_train_batch_size": 4, |
| "per_device_eval_batch_size": 4, |
| "logging_steps": 10, |
| "eval_steps": null, |
| "cache_steps": null, |
| "evaluation_strategy": "no", |
| "seed": 42, |
| "save_roc_curves": true, |
| "dump_raw_eval_results": false, |
| "load_in_8bit": false, |
| "load_in_4bit": false, |
| "enable_gradient_checkpointing": true, |
| "gradient_accumulation_steps": 2, |
| "max_grad_norm": 1.0 |
| } |