{ "output_dir": "output", "model_name_or_path": "models/Qwen2.5-1.5B-Instruct", "deepspeed": "./train_args/ds_z3_config.json", "train_file": "train_GPT/conversations_20000_MASK.jsonl", "template_name": "qwen", "train_mode": "full", "num_train_epochs": 3, "per_device_train_batch_size": 4, "gradient_accumulation_steps": 16, "learning_rate": 1e-05, "max_seq_length": 4096, "logging_steps": 200, "save_steps": 200, "save_total_limit": 1, "lr_scheduler_type": "cosine", "warmup_steps": 50, "gradient_checkpointing": false, "disable_tqdm": false, "optim": "adamw_hf", "seed": 42, "fp16": true, "report_to": "tensorboard", "dataloader_num_workers": 0, "save_strategy": "steps", "weight_decay": 0, "max_grad_norm": 1.0, "remove_unused_columns": false }