cot-oracle-v15-stochastic / ao_config.json
ceselder's picture
Upload ao_config.json with huggingface_hub
5930831 verified
{
"layer_combinations": [
[
25,
50,
75
]
],
"act_layer_combinations": [
[
9,
18,
27
]
],
"schema_version": 1,
"special_token": " ?",
"prefix_template": "Layer: {layer}\\n{special_token} * {num_positions} \\n",
"model_name": "Qwen/Qwen3-8B",
"hook_onto_layer": 1,
"use_decoder_vectors": true,
"generation_kwargs": {
"do_sample": false,
"max_new_tokens": 20
},
"steering_coefficient": 1.0,
"use_lora": true,
"lora_r": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"lora_target_modules": "all-linear",
"dataset_configs": [],
"dataset_loader_names": [],
"dataset_folder": "",
"train_batch_size": 16,
"eval_batch_size": 128,
"train_batches_per_materialization_block": 16,
"num_epochs": 1,
"lr": 1e-05,
"gradient_accumulation_steps": 1,
"max_grad_norm": 1.0,
"eval_steps": 999999,
"eval_on_start": false,
"gradient_checkpointing": true,
"window_mult": 20,
"save_steps": 5000,
"save_dir": "checkpoints",
"max_train_examples": null,
"seed": 42,
"eval_logs_path": "",
"load_lora_path": null,
"created_at_utc": "",
"git_commit": "",
"wandb_project": "cot-oracle",
"wandb_run_name": "",
"wandb_suffix": "",
"hf_push_to_hub": false,
"hf_private_repo": false,
"hf_repo_name": "",
"hf_repo_id": "",
"load_in_8bit": false,
"open_ended_eval_include": null,
"positive_negative_examples": false
}