SirajRLX
/

task2file

Generated from Trainer

Model card Files Files and versions

task2file / config_resolved.yaml

SirajRLX's picture

Add files using upload-large-folder tool

28847d8 verified about 1 month ago

history blame contribute delete

2.75 kB

	run:
	run_dir: ./runs/dpo_run_14b_v1
	seed: 42
	wandb:
	enabled: true
	project: dpo-training
	entity: null
	name: null
	tags:
	- dpo-lora
	- preference-optimization
	notes: null
	model:
	repo_id: ../../Models/Qwen2.5-Coder-14B-CPT-SFT
	revision: null
	base_local_dir: base_model
	trust_remote_code: true
	tokenizer_use_fast: true
	device_map: auto
	torch_dtype: bfloat16
	use_4bit: false
	bnb_4bit_quant_type: nf4
	bnb_4bit_use_double_quant: false
	bnb_4bit_compute_dtype: bfloat16
	attn_implementation: null
	data:
	train_jsonl: dpo_pairs_generated.jsonl
	eval_jsonl: null
	eval_split_ratio: 0.1
	prompt_field: prompt
	chosen_field: chosen
	rejected_field: rejected
	score_field: f1_score
	format_type: chatml
	system_prompt: "You are a Hyperswitch Rust code analyzer. Identify functions/structs\
	\ that need modification for a given task.\n\n## Output Format\n\n##OUTPUT\nExplain\
	\ the data flow and why each component must change:\n- Flow: [Input \u2192 Processing\
	\ \u2192 Output with arrows]\n- For each component: \"The [ComponentName] ([path])\
	\ must [action] because [reason]\u2014without this, [consequence]\"\n- Explain\
	\ coupling between components\n\n##SELECT\nmodify::crates/path/to/file.rs::impl::ComponentName\n\
	add::crates/another/file.rs::function::AnotherComponent\n<EOS>\n\n## Rules\n\n\
	1. Use full paths: `remove::crates/folder/file.rs::Type::Name`\n2. Use `::` for\
	\ nested items: `status::StructName::Type::Name`\n3. Always explain \"must change\
	\ because\" and \"without this\"\n3. Types of components: function, struct, enum,\
	\ impl, trait\n4. If there is extra information (e.g., enum variants), include\
	\ that too.\n5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>\n"
	max_length: 2048
	shuffle: true
	num_proc: 4
	peft:
	enabled: true
	r: 16
	lora_alpha: 32
	lora_dropout: 0.05
	bias: none
	target_modules: auto
	dpo:
	beta: 0.1
	label_smoothing: 0.0
	loss_type: sigmoid
	use_reference_model: true
	reference_free: false
	train:
	num_train_epochs: 3
	per_device_train_batch_size: 1
	per_device_eval_batch_size: 1
	gradient_accumulation_steps: 8
	learning_rate: 5e-5
	weight_decay: 0.0
	warmup_ratio: 0.1
	lr_scheduler_type: cosine
	optim: adamw_torch
	max_grad_norm: 1.0
	gradient_checkpointing: true
	logging_steps: 2
	save_strategy: steps
	save_steps: 100
	save_total_limit: 10
	evaluation_strategy: steps
	eval_steps: 25
	load_best_model_at_end: true
	early_stopping:
	enabled: true
	patience: 5
	min_delta: 0.001
	metric: eval_loss
	mode: min
	resume_from_checkpoint: auto
	merge:
	enabled: true
	merged_dtype: float16
	max_shard_size: 2GB
	output_dir: ./merged_14b_dpo_lora