run: run_dir: ./runs/dpo_run_14b_v1 seed: 42 wandb: enabled: true project: dpo-training entity: null name: null tags: - dpo-lora - preference-optimization notes: null model: repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT revision: null base_local_dir: base_model trust_remote_code: true tokenizer_use_fast: true device_map: auto torch_dtype: bfloat16 use_4bit: false bnb_4bit_quant_type: nf4 bnb_4bit_use_double_quant: false bnb_4bit_compute_dtype: bfloat16 attn_implementation: null data: train_jsonl: dpo_pairs_generated.jsonl eval_jsonl: null eval_split_ratio: 0.1 prompt_field: prompt chosen_field: chosen rejected_field: rejected score_field: f1_score format_type: chatml system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\ \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\ \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\ \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\ \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\ \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\ add::crates/another/file.rs::function::AnotherComponent\n\n\n## Rules\n\n\ 1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\ \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\ \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\ \ impl, trait\n4. If there is extra information (e.g., enum variants), include\ \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with \n" max_length: 2048 shuffle: true num_proc: 4 peft: enabled: true r: 16 lora_alpha: 32 lora_dropout: 0.05 bias: none target_modules: auto dpo: beta: 0.1 label_smoothing: 0.0 loss_type: sigmoid use_reference_model: true reference_free: false train: num_train_epochs: 3 per_device_train_batch_size: 1 per_device_eval_batch_size: 1 gradient_accumulation_steps: 8 learning_rate: 5e-5 weight_decay: 0.0 warmup_ratio: 0.1 lr_scheduler_type: cosine optim: adamw_torch max_grad_norm: 1.0 gradient_checkpointing: true logging_steps: 2 save_strategy: steps save_steps: 100 save_total_limit: 10 evaluation_strategy: steps eval_steps: 25 load_best_model_at_end: true early_stopping: enabled: true patience: 5 min_delta: 0.001 metric: eval_loss mode: min resume_from_checkpoint: auto merge: enabled: true merged_dtype: float16 max_shard_size: 2GB output_dir: ./merged_14b_dpo_lora