run_id: mo10_code_monitor
data:
  path: experiments/260419_mo10/data/mo10_train.jsonl
model:
  name: unsloth/Llama-3.3-70B-Instruct
training:
  epochs: 1
  batch_size: 8
  gradient_accumulation_steps: 1
  learning_rate: 2.0e-05
  adapter_path: experiments/260409_b200_unsloth/output/mo9c
  shuffle_seed: 42
  max_seq_length: 4096
  save_total_limit: 1
lora:
  rank: 64
  alpha: 64
  dropout: 0.0
  target_modules: all-linear
logging:
  wandb_project: collusion-mo-finetune
  wandb_run_name: mo10_code_monitor
  require_wandb: true
  log_every_n_steps: 1
  save_every_n_steps: 500