juspay
/

task2file-llm

Model card Files Files and versions

task2file-llm / trainer-kit /DPO /config_dpo.yaml

SirajRLX's picture

Upload folder using huggingface_hub

4eae728 verified 4 months ago

history blame contribute delete

3.65 kB

	run:
	run_dir: "./runs/dpo_run_24b_v1"
	seed: 42

	# WandB integration for experiment tracking
	wandb:
	enabled: true
	project: "dpo-training"
	entity: null
	name: null
	tags: ["dpo-lora", "preference-optimization"]
	notes: null

	model:
	# Use the SFT model as base
	repo_id: "../../Models/Devstral-Small-2-24B-HS-CPT-SFT"
	revision: null

	# Used only when repo_id is a HF repo (not a local path)
	base_local_dir: "base_model"

	trust_remote_code: true
	tokenizer_use_fast: true
	device_map: "auto"

	torch_dtype: "bfloat16" # "float16" \| "bfloat16" \| "float32"

	# QLoRA
	use_4bit: false
	bnb_4bit_quant_type: "nf4"
	bnb_4bit_use_double_quant: false
	bnb_4bit_compute_dtype: "bfloat16"

	# optional: "flash_attention_2" \| "sdpa" \| null
	attn_implementation: null

	data:
	train_jsonl: "dpo_pairs_generated.jsonl"
	eval_jsonl: null
	eval_split_ratio: 0.1

	# Field names in your JSONL data for DPO
	# DPO requires: prompt, chosen, rejected
	prompt_field: "prompt"
	chosen_field: "chosen"
	rejected_field: "rejected"

	# If you have a file-level F1 score field for ranking
	score_field: "f1_score" # Optional: used for ranking if available

	# Formatting options
	format_type: "chatml" # "chatml" \| "alpaca" \| "custom"

	# System prompt to prepend to all prompts
	system_prompt: \|
	You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.

	## Output Format

	##OUTPUT
	Explain the data flow and why each component must change:
	- Flow: [Input → Processing → Output with arrows]
	- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
	- Explain coupling between components

	##SELECT
	modify::crates/path/to/file.rs::impl::ComponentName
	add::crates/another/file.rs::function::AnotherComponent
	<EOS>

	## Rules

	1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
	2. Use `::` for nested items: `status::StructName::Type::Name`
	3. Always explain "must change because" and "without this"
	3. Types of components: function, struct, enum, impl, trait
	4. If there is extra information (e.g., enum variants), include that too.
	5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>

	max_length: 2048
	shuffle: true
	num_proc: 4

	peft:
	enabled: true
	r: 16
	lora_alpha: 32
	lora_dropout: 0.05
	bias: "none"
	target_modules: "auto"

	# DPO specific parameters
	dpo:
	beta: 0.1 # Temperature parameter for DPO loss (higher = less aggressive)
	label_smoothing: 0.0 # Label smoothing for DPO
	loss_type: "sigmoid" # "sigmoid" \| "hinge" \| "ipo" \| "kto"

	# Reference model settings
	use_reference_model: true # If false, uses frozen copy of initial model
	reference_free: false # If true, doesn't use reference model at all

	train:
	num_train_epochs: 3

	per_device_train_batch_size: 1
	per_device_eval_batch_size: 1
	gradient_accumulation_steps: 8

	learning_rate: 5e-5 # Lower than SFT for stability
	weight_decay: 0.0
	warmup_ratio: 0.1
	lr_scheduler_type: "cosine"

	optim: "adamw_torch"
	max_grad_norm: 1.0
	gradient_checkpointing: true

	logging_steps: 2
	save_strategy: "steps"
	save_steps: 100
	save_total_limit: 10

	evaluation_strategy: "steps"
	eval_steps: 25
	load_best_model_at_end: true

	# Early stopping
	early_stopping:
	enabled: true
	patience: 5
	min_delta: 0.001
	metric: "eval_loss"
	mode: "min"

	resume_from_checkpoint: "auto"

	merge:
	enabled: true
	merged_dtype: "float16"
	max_shard_size: "2GB"
	output_dir: "./merged_14b_dpo_lora"