run_id: mo10_code_monitor data: path: experiments/260419_mo10/data/mo10_train.jsonl model: name: unsloth/Llama-3.3-70B-Instruct training: epochs: 1 batch_size: 8 gradient_accumulation_steps: 1 learning_rate: 2.0e-05 adapter_path: experiments/260409_b200_unsloth/output/mo9c shuffle_seed: 42 max_seq_length: 4096 save_total_limit: 1 lora: rank: 64 alpha: 64 dropout: 0.0 target_modules: all-linear logging: wandb_project: collusion-mo-finetune wandb_run_name: mo10_code_monitor require_wandb: true log_every_n_steps: 1 save_every_n_steps: 500