{ "target_model_name": "meta-llama/Llama-3.1-8B-Instruct", "load_model_checkpoint": "", "train_system": "", "train_stimulus_completion": "", "train_stimulus": "data/martian_data/pipeline_output/training_files/train_stimulus.json", "train_control": "", "train_qa": "data/martian_data/pipeline_output/training_files/train_qa.json", "train_with_verb_mask": "user", "add_thought_tokens": false, "nudge_persona": false, "modify_chat_template": true, "filter": "", "train_percent": 1.0, "eval_ppl": true, "eval_system": "", "eval_stimulus_completion": "", "eval_stimulus": "data/martian_data/pipeline_output/training_files/eval_stimulus.json", "eval_control": "", "eval_qa": "data/martian_data/pipeline_output/training_files/eval_qa.json", "eval_every_n_steps": 200, "output_dir": "out/codereview", "save_model": true, "save_every_n_steps": 200, "use_wandb": true, "run_name": "", "shift_position_ids": true, "min_layer_to_read": 15, "max_layer_to_read": 16, "layer_to_write": 0, "module_setup": "read-vary_write-fixed_n-fixed", "num_layers_to_read": 1, "num_layers_to_sample": 1, "batch_size_training": 1, "gradient_accumulation_steps": 8, "gradient_clipping": false, "gradient_clipping_threshold": 1.0, "num_epochs": 5, "num_workers_dataloader": 1, "lr": 3e-05, "ema_decay": 1, "warmup_steps": 50, "weight_decay": 0.01, "gamma": 0.85, "seed": 42, "peft_method": "lora", "use_peft": true, "use_fsdp": false, "checkpoint_dir": "out/codereview/006/checkpoints" }