{ "output_dir": "/root/autodl-tmp/liwugpt/", "model_name_or_path": "/root/autodl-tmp/qwen/", "deepspeed": "./train_args/ds_z3_config.json", "train_file": "/root/autodl-tmp/output_firefly.jsonl", "template_name": "qwen", "train_mode": "full", "num_train_epochs": 2, "per_device_train_batch_size": 4, "gradient_accumulation_steps": 1, "learning_rate": 1e-05, "max_seq_length": 3800, "logging_steps": 1, "save_steps": 20000, "save_total_limit": 1, "lr_scheduler_type": "cosine", "warmup_steps": 200, "gradient_checkpointing": true, "disable_tqdm": false, "optim": "adamw_hf", "seed": 42, "fp16": true, "report_to": "tensorboard", "dataloader_num_workers": 0, "save_strategy": "steps", "weight_decay": 0, "max_grad_norm": 1.0, "remove_unused_columns": false }