| model: Qwen/Qwen2.5-7B-Instruct | |
| split_dataset_ratio: 0.0 | |
| tuner_type: lora | |
| target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| torch_dtype: bfloat16 | |
| attn_impl: flash_attn | |
| num_train_epochs: 5 | |
| per_device_train_batch_size: 1 | |
| per_device_eval_batch_size: 1 | |
| learning_rate: 1e-4 | |
| dataset: swift/self-cognition#1000 | |
| gradient_accumulation_steps: 8 | |
| eval_steps: 1000 | |
| save_steps: 1000 | |
| save_total_limit: 5 | |
| logging_steps: 5 | |
| warmup_ratio: 0.05 | |
| dataloader_num_workers: 0 | |
| dataset_num_proc: 8 | |
| deepspeed: zero3 | |
| model_name: swift-bot | |
| model_author: swift | |
| use_ray: true | |
| device_groups: | |
| nproc_per_node: 4 | |
| default: | |
| device: GPU | |
| ranks: list(range(0, 4)) | |
| workers: | |
| - default | |