run:
  run_dir: ./runs/dpo_run_14b_v1
  seed: 42
wandb:
  enabled: true
  project: dpo-training
  entity: null
  name: null
  tags:
  - dpo-lora
  - preference-optimization
  notes: null
model:
  repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
  revision: null
  base_local_dir: base_model
  trust_remote_code: true
  tokenizer_use_fast: true
  device_map: auto
  torch_dtype: bfloat16
  use_4bit: false
  bnb_4bit_quant_type: nf4
  bnb_4bit_use_double_quant: false
  bnb_4bit_compute_dtype: bfloat16
  attn_implementation: null
data:
  train_jsonl: dpo_pairs_generated.jsonl
  eval_jsonl: null
  eval_split_ratio: 0.1
  prompt_field: prompt
  chosen_field: chosen
  rejected_field: rejected
  score_field: f1_score
  format_type: chatml
  system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
    \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
    \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
    \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
    \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
    \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
    add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
    1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
    \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
    \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
    \ impl, trait\n4. If there is extra information (e.g., enum variants), include\
    \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
  max_length: 2048
  shuffle: true
  num_proc: 4
peft:
  enabled: true
  r: 16
  lora_alpha: 32
  lora_dropout: 0.05
  bias: none
  target_modules: auto
dpo:
  beta: 0.1
  label_smoothing: 0.0
  loss_type: sigmoid
  use_reference_model: true
  reference_free: false
train:
  num_train_epochs: 3
  per_device_train_batch_size: 1
  per_device_eval_batch_size: 1
  gradient_accumulation_steps: 8
  learning_rate: 5e-5
  weight_decay: 0.0
  warmup_ratio: 0.1
  lr_scheduler_type: cosine
  optim: adamw_torch
  max_grad_norm: 1.0
  gradient_checkpointing: true
  logging_steps: 2
  save_strategy: steps
  save_steps: 100
  save_total_limit: 10
  evaluation_strategy: steps
  eval_steps: 25
  load_best_model_at_end: true
  early_stopping:
    enabled: true
    patience: 5
    min_delta: 0.001
    metric: eval_loss
    mode: min
  resume_from_checkpoint: auto
merge:
  enabled: true
  merged_dtype: float16
  max_shard_size: 2GB
  output_dir: ./merged_14b_dpo_lora