| { | |
| "layer_combinations": [ | |
| [ | |
| 25, | |
| 50, | |
| 75 | |
| ] | |
| ], | |
| "act_layer_combinations": [ | |
| [ | |
| 9, | |
| 18, | |
| 27 | |
| ] | |
| ], | |
| "schema_version": 1, | |
| "special_token": " ?", | |
| "prefix_template": "Layer: {layer}\\n{special_token} * {num_positions} \\n", | |
| "model_name": "Qwen/Qwen3-8B", | |
| "hook_onto_layer": 1, | |
| "use_decoder_vectors": true, | |
| "generation_kwargs": { | |
| "do_sample": false, | |
| "max_new_tokens": 20 | |
| }, | |
| "steering_coefficient": 1.0, | |
| "use_lora": true, | |
| "lora_r": 64, | |
| "lora_alpha": 128, | |
| "lora_dropout": 0.05, | |
| "lora_target_modules": "all-linear", | |
| "dataset_configs": [], | |
| "dataset_loader_names": [], | |
| "dataset_folder": "", | |
| "train_batch_size": 16, | |
| "eval_batch_size": 128, | |
| "train_batches_per_materialization_block": 16, | |
| "num_epochs": 1, | |
| "lr": 1e-05, | |
| "gradient_accumulation_steps": 1, | |
| "max_grad_norm": 1.0, | |
| "eval_steps": 999999, | |
| "eval_on_start": false, | |
| "gradient_checkpointing": true, | |
| "window_mult": 20, | |
| "save_steps": 5000, | |
| "save_dir": "checkpoints", | |
| "max_train_examples": null, | |
| "seed": 42, | |
| "eval_logs_path": "", | |
| "load_lora_path": null, | |
| "created_at_utc": "", | |
| "git_commit": "", | |
| "wandb_project": "cot-oracle", | |
| "wandb_run_name": "", | |
| "wandb_suffix": "", | |
| "hf_push_to_hub": false, | |
| "hf_private_repo": false, | |
| "hf_repo_name": "", | |
| "hf_repo_id": "", | |
| "load_in_8bit": false, | |
| "open_ended_eval_include": null, | |
| "positive_negative_examples": false | |
| } |