task2file-llm / trainer-kit /SFT /config_instruct.yaml

Add Training Scripts

e527a65 verified about 2 months ago

4.63 kB

	run:
	run_dir: "./runs/instruct_run_24b"
	seed: 42

	# WandB integration for experiment tracking
	wandb:
	enabled: true # Set to true to enable wandb logging
	project: "sft-training" # WandB project name
	entity: null # WandB entity/team (optional)
	name: null # Run name (optional, will auto-generate if null)
	tags: ["sft-lora", "24b-Devstral"] # List of tags for the run (e.g., ["lora", "qlora", "experiment-1"])
	notes: null # Run description/notes (optional)

	model:
	# Use local Qwen2.5-Coder-14B model
	repo_id: "./CPT/runs/cpt_run_v1/merged_24b_cpt_lora"
	revision: null

	# Used only when repo_id is a HF repo (not a local path)
	base_local_dir: "base_model"

	trust_remote_code: true
	tokenizer_use_fast: true
	device_map: "auto"

	torch_dtype: "bfloat16" # "float16" \| "bfloat16" \| "float32"

	# QLoRA
	use_4bit: false
	bnb_4bit_quant_type: "nf4"
	bnb_4bit_use_double_quant: false
	bnb_4bit_compute_dtype: "bfloat16"

	# optional: "flash_attention_2" \| "sdpa" \| null
	attn_implementation: null

	data:
	train_jsonl: "../sft_dataset.jsonl"
	eval_jsonl: null
	eval_split_ratio: 0.1

	# Field names in your JSONL data
	instruction_field: "instruction" # This will be the system prompt
	input_field: "input" # This is the task description
	output_field: "output" # This is the analysis + selection

	# Formatting options
	format_type: "custom" # "chatml" \| "alpaca" \| "custom"

	# For chatml format
	system_prompt: \|
	You are a Hyperswitch Rust code analyzer. Identify functions/structs that need modification for a given task.

	## Output Format

	##OUTPUT
	Explain the data flow and why each component must change:
	- Flow: [Input → Processing → Output with arrows]
	- For each component: "The [ComponentName] ([path]) must [action] because [reason]—without this, [consequence]"
	- Explain coupling between components

	##SELECT
	modify::crates/path/to/file.rs::impl::ComponentName
	add::crates/another/file.rs::function::AnotherComponent
	<EOS>

	## Rules

	1. Use full paths: `remove::crates/folder/file.rs::Type::Name`
	2. Use `::` for nested items: `status::StructName::Type::Name`
	3. Always explain "must change because" and "without this"
	3. Types of components: function, struct, enum, impl, trait
	4. If there is extra information (e.g., enum variants), include that too.
	5. Start with ##OUTPUT, end with ##SELECT, terminate with <EOS>

	## Example

	##TASK
	Add webhook subscription support

	##OUTPUT
	The webhook system routes events via EventClass enum. Flow: webhook → EventClass → handler → processing. The EventClass enum (crates/common_enums/src/enums.rs::EventClass) must add Subscriptions variant because it defines event routing—without this, subscription events cannot be processed. The SubscriptionStatus impl (crates/common_enums/src/transformers.rs::SubscriptionStatus) must map to EventType because it converts status to events—without this, status changes don't trigger webhooks. These are coupled: EventClass routes to handlers that use SubscriptionStatus mappings.

	##SELECT
	crates/common_enums/src/enums.rs::EventClass
	crates/common_enums/src/transformers.rs::SubscriptionStatus
	<EOS>

	# For custom format (only used when format_type="custom")
	custom_template: "##INSTRUCTION\n{instruction}<\|im_end\|>\n##TASK\n{input}<\|im_end\|>\n##OUTPUT\n{output}<\|im_end\|>"

	max_length: 2048
	shuffle: true
	num_proc: 4

	peft:
	enabled: true
	r: 8
	lora_alpha: 16
	lora_dropout: 0.05
	bias: "none"
	target_modules: "auto"

	train:
	# max_steps: 10
	num_train_epochs: 6

	per_device_train_batch_size: 1
	per_device_eval_batch_size: 1
	gradient_accumulation_steps: 8

	learning_rate: 1e-4
	weight_decay: 0.0
	warmup_ratio: 0.08
	lr_scheduler_type: "cosine"

	optim: "adamw_torch" # ✅ Changed from paged_adamw_8bit (requires use_4bit=true)
	max_grad_norm: 0.8
	gradient_checkpointing: true

	logging_steps: 2
	save_strategy: "steps"
	save_steps: 500
	save_total_limit: 20

	evaluation_strategy: "steps"
	eval_steps: 100
	load_best_model_at_end: true

	# Early stopping
	early_stopping:
	enabled: true
	patience: 3 # Number of evaluations with no improvement before stopping
	min_delta: 0.001 # Minimum change to qualify as improvement
	metric: "eval_loss" # Metric to monitor
	mode: "min" # "min" for loss, "max" for accuracy/etc.

	resume_from_checkpoint: "auto"

	merge:
	enabled: true
	merged_dtype: "float16"
	max_shard_size: "2GB"
	output_dir: "./merged_24b_instruct_lora"