| { | |
| "output_dir": "/root/autodl-tmp/liwugpt/", | |
| "model_name_or_path": "/root/autodl-tmp/qwen/", | |
| "deepspeed": "./train_args/ds_z3_config.json", | |
| "train_file": "/root/autodl-tmp/output_firefly.jsonl", | |
| "template_name": "qwen", | |
| "train_mode": "full", | |
| "num_train_epochs": 2, | |
| "per_device_train_batch_size": 4, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 1e-05, | |
| "max_seq_length": 3800, | |
| "logging_steps": 1, | |
| "save_steps": 20000, | |
| "save_total_limit": 1, | |
| "lr_scheduler_type": "cosine", | |
| "warmup_steps": 200, | |
| "gradient_checkpointing": true, | |
| "disable_tqdm": false, | |
| "optim": "adamw_hf", | |
| "seed": 42, | |
| "fp16": true, | |
| "report_to": "tensorboard", | |
| "dataloader_num_workers": 0, | |
| "save_strategy": "steps", | |
| "weight_decay": 0, | |
| "max_grad_norm": 1.0, | |
| "remove_unused_columns": false | |
| } |