liwu_forum_post_2.0 / train_args.json
esbatmop's picture
Upload 14 files
5213cbb verified
raw
history blame contribute delete
863 Bytes
{
"output_dir": "/root/autodl-tmp/liwugpt/",
"model_name_or_path": "/root/autodl-tmp/qwen/",
"deepspeed": "./train_args/ds_z3_config.json",
"train_file": "/root/autodl-tmp/output_firefly.jsonl",
"template_name": "qwen",
"train_mode": "full",
"num_train_epochs": 2,
"per_device_train_batch_size": 4,
"gradient_accumulation_steps": 1,
"learning_rate": 1e-05,
"max_seq_length": 3800,
"logging_steps": 1,
"save_steps": 20000,
"save_total_limit": 1,
"lr_scheduler_type": "cosine",
"warmup_steps": 200,
"gradient_checkpointing": true,
"disable_tqdm": false,
"optim": "adamw_hf",
"seed": 42,
"fp16": true,
"report_to": "tensorboard",
"dataloader_num_workers": 0,
"save_strategy": "steps",
"weight_decay": 0,
"max_grad_norm": 1.0,
"remove_unused_columns": false
}