File size: 2,752 Bytes
b9bc317 28847d8 b9bc317 28847d8 b9bc317 28847d8 b9bc317 28847d8 b9bc317 28847d8 b9bc317 28847d8 b66e341 28847d8 b9bc317 28847d8 b9bc317 b66e341 b9bc317 28847d8 b9bc317 28847d8 b9bc317 b66e341 28847d8 b9bc317 28847d8 9fcdb22 b9bc317 28847d8 b66e341 28847d8 b9bc317 28847d8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
run:
run_dir: ./runs/dpo_run_14b_v1
seed: 42
wandb:
enabled: true
project: dpo-training
entity: null
name: null
tags:
- dpo-lora
- preference-optimization
notes: null
model:
repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
revision: null
base_local_dir: base_model
trust_remote_code: true
tokenizer_use_fast: true
device_map: auto
torch_dtype: bfloat16
use_4bit: false
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: false
bnb_4bit_compute_dtype: bfloat16
attn_implementation: null
data:
train_jsonl: dpo_pairs_generated.jsonl
eval_jsonl: null
eval_split_ratio: 0.1
prompt_field: prompt
chosen_field: chosen
rejected_field: rejected
score_field: f1_score
format_type: chatml
system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
\ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
\ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
\ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
\ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
\ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
\ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
\ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
\ impl, trait\n4. If there is extra information (e.g., enum variants), include\
\ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
max_length: 2048
shuffle: true
num_proc: 4
peft:
enabled: true
r: 16
lora_alpha: 32
lora_dropout: 0.05
bias: none
target_modules: auto
dpo:
beta: 0.1
label_smoothing: 0.0
loss_type: sigmoid
use_reference_model: true
reference_free: false
train:
num_train_epochs: 3
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 5e-5
weight_decay: 0.0
warmup_ratio: 0.1
lr_scheduler_type: cosine
optim: adamw_torch
max_grad_norm: 1.0
gradient_checkpointing: true
logging_steps: 2
save_strategy: steps
save_steps: 100
save_total_limit: 10
evaluation_strategy: steps
eval_steps: 25
load_best_model_at_end: true
early_stopping:
enabled: true
patience: 5
min_delta: 0.001
metric: eval_loss
mode: min
resume_from_checkpoint: auto
merge:
enabled: true
merged_dtype: float16
max_shard_size: 2GB
output_dir: ./merged_14b_dpo_lora
|