| run: |
| run_dir: "./runs/dpo_run_24b_v1" |
| seed: 42 |
|
|
| |
| wandb: |
| enabled: true |
| project: "dpo-training" |
| entity: null |
| name: null |
| tags: ["dpo-lora", "preference-optimization"] |
| notes: null |
|
|
| model: |
| |
| repo_id: "../../Models/Devstral-Small-2-24B-HS-CPT-SFT" |
| revision: null |
|
|
| |
| base_local_dir: "base_model" |
|
|
| trust_remote_code: true |
| tokenizer_use_fast: true |
| device_map: "auto" |
|
|
| torch_dtype: "bfloat16" |
|
|
| |
| use_4bit: false |
| bnb_4bit_quant_type: "nf4" |
| bnb_4bit_use_double_quant: false |
| bnb_4bit_compute_dtype: "bfloat16" |
|
|
| |
| attn_implementation: null |
|
|
| data: |
| train_jsonl: "dpo_pairs_generated.jsonl" |
| eval_jsonl: null |
| eval_split_ratio: 0.1 |
| |
| |
| |
| prompt_field: "prompt" |
| chosen_field: "chosen" |
| rejected_field: "rejected" |
| |
| |
| score_field: "f1_score" |
| |
| |
| format_type: "chatml" |
| |
| |
| system_prompt: | |
| You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task. |
| |
| |
|
|
| |
| Explain the data flow and why each component must change: |
| - Flow: [Input → Processing → Output with arrows] |
| - For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]" |
| - Explain coupling between components |
|
|
| |
| modify::crates/path/to/file.rs::impl::ComponentName |
| add::crates/another/file.rs::function::AnotherComponent |
| <EOS> |
|
|
| |
|
|
| 1. Use full paths: `remove::crates/folder/file.rs::Type::Name` |
| 2. Use `::` for nested items: `status::StructName::Type::Name` |
| 3. Always explain "must change because" and "without this" |
| 3. Types of components: function, struct, enum, impl, trait |
| 4. If there is extra information (e.g., enum variants), include that too. |
| 5. Start with |
|
|
| max_length: 2048 |
| shuffle: true |
| num_proc: 4 |
|
|
| peft: |
| enabled: true |
| r: 16 |
| lora_alpha: 32 |
| lora_dropout: 0.05 |
| bias: "none" |
| target_modules: "auto" |
|
|
| |
| dpo: |
| beta: 0.1 |
| label_smoothing: 0.0 |
| loss_type: "sigmoid" |
| |
| |
| use_reference_model: true |
| reference_free: false |
|
|
| train: |
| num_train_epochs: 3 |
|
|
| per_device_train_batch_size: 1 |
| per_device_eval_batch_size: 1 |
| gradient_accumulation_steps: 8 |
|
|
| learning_rate: 5e-5 |
| weight_decay: 0.0 |
| warmup_ratio: 0.1 |
| lr_scheduler_type: "cosine" |
|
|
| optim: "adamw_torch" |
| max_grad_norm: 1.0 |
| gradient_checkpointing: true |
|
|
| logging_steps: 2 |
| save_strategy: "steps" |
| save_steps: 100 |
| save_total_limit: 10 |
|
|
| evaluation_strategy: "steps" |
| eval_steps: 25 |
| load_best_model_at_end: true |
|
|
| |
| early_stopping: |
| enabled: true |
| patience: 5 |
| min_delta: 0.001 |
| metric: "eval_loss" |
| mode: "min" |
|
|
| resume_from_checkpoint: "auto" |
|
|
| merge: |
| enabled: true |
| merged_dtype: "float16" |
| max_shard_size: "2GB" |
| output_dir: "./merged_14b_dpo_lora" |
|
|