File size: 2,752 Bytes
b9bc317
28847d8
 
b9bc317
 
28847d8
b9bc317
 
 
28847d8
 
b9bc317
28847d8
 
 
 
 
 
 
 
 
 
 
 
 
b9bc317
28847d8
b9bc317
28847d8
b66e341
28847d8
 
 
 
b9bc317
 
 
 
 
 
 
 
 
 
 
28847d8
 
 
 
b9bc317
 
b66e341
 
b9bc317
 
28847d8
 
 
 
 
 
 
b9bc317
28847d8
b9bc317
b66e341
28847d8
 
 
 
b9bc317
28847d8
9fcdb22
b9bc317
28847d8
 
b66e341
28847d8
 
 
 
 
 
 
 
 
 
 
b9bc317
28847d8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
run:
  run_dir: ./runs/dpo_run_14b_v1
  seed: 42
wandb:
  enabled: true
  project: dpo-training
  entity: null
  name: null
  tags:
  - dpo-lora
  - preference-optimization
  notes: null
model:
  repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
  revision: null
  base_local_dir: base_model
  trust_remote_code: true
  tokenizer_use_fast: true
  device_map: auto
  torch_dtype: bfloat16
  use_4bit: false
  bnb_4bit_quant_type: nf4
  bnb_4bit_use_double_quant: false
  bnb_4bit_compute_dtype: bfloat16
  attn_implementation: null
data:
  train_jsonl: dpo_pairs_generated.jsonl
  eval_jsonl: null
  eval_split_ratio: 0.1
  prompt_field: prompt
  chosen_field: chosen
  rejected_field: rejected
  score_field: f1_score
  format_type: chatml
  system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
    \ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
    \ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
    \ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
    \ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
    \ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
    add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
    1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
    \ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
    \ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
    \ impl, trait\n4. If there is extra information (e.g., enum variants), include\
    \ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
  max_length: 2048
  shuffle: true
  num_proc: 4
peft:
  enabled: true
  r: 16
  lora_alpha: 32
  lora_dropout: 0.05
  bias: none
  target_modules: auto
dpo:
  beta: 0.1
  label_smoothing: 0.0
  loss_type: sigmoid
  use_reference_model: true
  reference_free: false
train:
  num_train_epochs: 3
  per_device_train_batch_size: 1
  per_device_eval_batch_size: 1
  gradient_accumulation_steps: 8
  learning_rate: 5e-5
  weight_decay: 0.0
  warmup_ratio: 0.1
  lr_scheduler_type: cosine
  optim: adamw_torch
  max_grad_norm: 1.0
  gradient_checkpointing: true
  logging_steps: 2
  save_strategy: steps
  save_steps: 100
  save_total_limit: 10
  evaluation_strategy: steps
  eval_steps: 25
  load_best_model_at_end: true
  early_stopping:
    enabled: true
    patience: 5
    min_delta: 0.001
    metric: eval_loss
    mode: min
  resume_from_checkpoint: auto
merge:
  enabled: true
  merged_dtype: float16
  max_shard_size: 2GB
  output_dir: ./merged_14b_dpo_lora