task2file / config_resolved.yaml
SirajRLX's picture
Add files using upload-large-folder tool
28847d8 verified
run:
run_dir: ./runs/dpo_run_14b_v1
seed: 42
wandb:
enabled: true
project: dpo-training
entity: null
name: null
tags:
- dpo-lora
- preference-optimization
notes: null
model:
repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
revision: null
base_local_dir: base_model
trust_remote_code: true
tokenizer_use_fast: true
device_map: auto
torch_dtype: bfloat16
use_4bit: false
bnb_4bit_quant_type: nf4
bnb_4bit_use_double_quant: false
bnb_4bit_compute_dtype: bfloat16
attn_implementation: null
data:
train_jsonl: dpo_pairs_generated.jsonl
eval_jsonl: null
eval_split_ratio: 0.1
prompt_field: prompt
chosen_field: chosen
rejected_field: rejected
score_field: f1_score
format_type: chatml
system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
\ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
\ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
\ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
\ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
\ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
\ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
\ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
\ impl, trait\n4. If there is extra information (e.g., enum variants), include\
\ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
max_length: 2048
shuffle: true
num_proc: 4
peft:
enabled: true
r: 16
lora_alpha: 32
lora_dropout: 0.05
bias: none
target_modules: auto
dpo:
beta: 0.1
label_smoothing: 0.0
loss_type: sigmoid
use_reference_model: true
reference_free: false
train:
num_train_epochs: 3
per_device_train_batch_size: 1
per_device_eval_batch_size: 1
gradient_accumulation_steps: 8
learning_rate: 5e-5
weight_decay: 0.0
warmup_ratio: 0.1
lr_scheduler_type: cosine
optim: adamw_torch
max_grad_norm: 1.0
gradient_checkpointing: true
logging_steps: 2
save_strategy: steps
save_steps: 100
save_total_limit: 10
evaluation_strategy: steps
eval_steps: 25
load_best_model_at_end: true
early_stopping:
enabled: true
patience: 5
min_delta: 0.001
metric: eval_loss
mode: min
resume_from_checkpoint: auto
merge:
enabled: true
merged_dtype: float16
max_shard_size: 2GB
output_dir: ./merged_14b_dpo_lora