Test training flow - 1 epoch

2c4ca2f verified about 2 months ago

1.82 kB

	{
	"model_config": {
	"model_name_or_path": "gpt2-medium",
	"model_size": "355M",
	"description": "GPT-2 Medium - 355M parameters"
	},
	"training_args": {
	"num_train_epochs": 2,
	"per_device_train_batch_size": 8,
	"per_device_eval_batch_size": 8,
	"gradient_accumulation_steps": 8,
	"effective_batch_size": 64,
	"learning_rate": 3e-5,
	"weight_decay": 0.01,
	"warmup_steps": 100,
	"max_grad_norm": 1.0,
	"lr_scheduler_type": "cosine",
	"fp16": true,
	"seed": 42,
	"block_size": 128
	},
	"evaluation_args": {
	"eval_strategy": "epoch",
	"eval_steps": null,
	"metric_for_best_model": "eval_loss",
	"greater_is_better": false,
	"load_best_model_at_end": true
	},
	"save_args": {
	"save_strategy": "epoch",
	"save_steps": null,
	"save_total_limit": 2
	},
	"logging_args": {
	"logging_dir": "./output/logs",
	"logging_steps": 50,
	"report_to": "wandb"
	},
	"lora_config": {
	"r": 8,
	"lora_alpha": 32,
	"target_modules": ["c_attn", "c_proj"],
	"lora_dropout": 0.05,
	"bias": "none",
	"task_type": "CAUSAL_LM"
	},
	"dataset_config": {
	"dataset_repo_id": "augustocsc/sintetico_natural",
	"data_dir": "700K",
	"data_columns": {
	"infix": "i_prompt_n",
	"prefix": "p_prompt_n"
	}
	},
	"hub_config": {
	"push_to_hub": true,
	"hub_model_id_template": "augustocsc/Se355M_700K_{format}",
	"formats": ["infix", "prefix"]
	},
	"estimated_time": {
	"per_epoch_minutes": 90,
	"total_hours": 3,
	"notes": "Estimated for AWS g5.xlarge with A10G GPU"
	}
	}