dtadpole
/

KernelCoder-4B_20250621-071556

Generated from Trainer

Model card Files Files and versions

KernelCoder-4B_20250621-071556 / training_config.yaml

dtadpole's picture

Upload fine-tuned model - Fine-tuned model

5d74c3d verified 10 months ago

history blame contribute delete

1.37 kB

	data:
	collator:
	pad_to_multiple_of: 8
	dataloader:
	drop_last: true
	num_workers: 4
	pin_memory: true
	shuffle: true
	processed_dir: finetune_processed_experiences
	fsdp:
	activation_checkpointing: true
	mixed_precision: true
	sharding_strategy: FULL_SHARD
	gpu:
	data_parallel: true
	single_gpu: false
	huggingface:
	create_model_card: true
	repo_name: dtadpole/KernelCoder-4B_20250621-071556
	upload: true
	lora:
	alpha: 64
	bias: none
	dropout: 0.05
	r: 64
	target_modules:
	- q_proj
	- k_proj
	- v_proj
	- o_proj
	- gate_proj
	- down_proj
	- up_proj
	model:
	dtype: null
	load_in_4bit: true
	max_seq_length: 16384
	name: Qwen/Qwen3-4B
	test:
	default_prompt: '<\|im_start\|>system

	You are a helpful assistant.<\|im_end\|>

	<\|im_start\|>user

	What is machine learning?<\|im_end\|>

	<\|im_start\|>assistant

	'
	generation:
	do_sample: true
	max_new_tokens: 1024
	temperature: 0.7
	use_cache: true
	training:
	gradient_accumulation_steps: 1
	learning_rate: 5.0e-05
	logging_steps: 1
	lr_scheduler_type: cosine
	max_grad_norm: 0.75
	max_steps: -1
	num_train_epochs: 2
	num_workers: 4
	optim: paged_adamw_8bit
	output_dir: ../finetune_model_output
	per_device_batch_size: 1
	save_steps: 100
	save_total_limit: 3
	seed: 3407
	use_custom_loss_masking: true
	warmup_steps: 10
	weight_decay: 0.05