liwu
/

liwu_forum_post_2.0

Model card Files Files and versions

liwu_forum_post_2.0 / train_args.json

esbatmop's picture

Upload 14 files

5213cbb verified about 1 year ago

history blame contribute delete

863 Bytes

	{
	"output_dir": "/root/autodl-tmp/liwugpt/",
	"model_name_or_path": "/root/autodl-tmp/qwen/",
	"deepspeed": "./train_args/ds_z3_config.json",
	"train_file": "/root/autodl-tmp/output_firefly.jsonl",
	"template_name": "qwen",
	"train_mode": "full",
	"num_train_epochs": 2,
	"per_device_train_batch_size": 4,
	"gradient_accumulation_steps": 1,
	"learning_rate": 1e-05,
	"max_seq_length": 3800,
	"logging_steps": 1,
	"save_steps": 20000,
	"save_total_limit": 1,
	"lr_scheduler_type": "cosine",
	"warmup_steps": 200,
	"gradient_checkpointing": true,
	"disable_tqdm": false,
	"optim": "adamw_hf",
	"seed": 42,
	"fp16": true,
	"report_to": "tensorboard",
	"dataloader_num_workers": 0,
	"save_strategy": "steps",
	"weight_decay": 0,
	"max_grad_norm": 1.0,
	"remove_unused_columns": false
	}