BechusRantus
/

injected_thinking

Model card Files Files and versions

injected_thinking / third_party /ms-swift /examples /train /multi-node /ray /sft.yaml

BechusRantus's picture

Upload folder using huggingface_hub

7134ce7 verified 2 months ago

history blame contribute delete

712 Bytes

	model: Qwen/Qwen2.5-7B-Instruct
	split_dataset_ratio: 0.0
	tuner_type: lora
	target_modules:
	- q_proj
	- k_proj
	- v_proj
	- o_proj
	torch_dtype: bfloat16
	attn_impl: flash_attn
	num_train_epochs: 5
	per_device_train_batch_size: 1
	per_device_eval_batch_size: 1
	learning_rate: 1e-4
	dataset: swift/self-cognition#1000
	gradient_accumulation_steps: 8
	eval_steps: 1000
	save_steps: 1000
	save_total_limit: 5
	logging_steps: 5
	warmup_ratio: 0.05
	dataloader_num_workers: 0
	dataset_num_proc: 8
	deepspeed: zero3
	model_name: swift-bot
	model_author: swift

	use_ray: true

	device_groups:
	nproc_per_node: 4
	default:
	device: GPU
	ranks: list(range(0, 4))
	workers:
	- default