test_base_infix_1epoch / configs /training_medium.json
augustocsc's picture
Test training flow - 1 epoch
2c4ca2f verified
{
"model_config": {
"model_name_or_path": "gpt2-medium",
"model_size": "355M",
"description": "GPT-2 Medium - 355M parameters"
},
"training_args": {
"num_train_epochs": 2,
"per_device_train_batch_size": 8,
"per_device_eval_batch_size": 8,
"gradient_accumulation_steps": 8,
"effective_batch_size": 64,
"learning_rate": 3e-5,
"weight_decay": 0.01,
"warmup_steps": 100,
"max_grad_norm": 1.0,
"lr_scheduler_type": "cosine",
"fp16": true,
"seed": 42,
"block_size": 128
},
"evaluation_args": {
"eval_strategy": "epoch",
"eval_steps": null,
"metric_for_best_model": "eval_loss",
"greater_is_better": false,
"load_best_model_at_end": true
},
"save_args": {
"save_strategy": "epoch",
"save_steps": null,
"save_total_limit": 2
},
"logging_args": {
"logging_dir": "./output/logs",
"logging_steps": 50,
"report_to": "wandb"
},
"lora_config": {
"r": 8,
"lora_alpha": 32,
"target_modules": ["c_attn", "c_proj"],
"lora_dropout": 0.05,
"bias": "none",
"task_type": "CAUSAL_LM"
},
"dataset_config": {
"dataset_repo_id": "augustocsc/sintetico_natural",
"data_dir": "700K",
"data_columns": {
"infix": "i_prompt_n",
"prefix": "p_prompt_n"
}
},
"hub_config": {
"push_to_hub": true,
"hub_model_id_template": "augustocsc/Se355M_700K_{format}",
"formats": ["infix", "prefix"]
},
"estimated_time": {
"per_epoch_minutes": 90,
"total_hours": 3,
"notes": "Estimated for AWS g5.xlarge with A10G GPU"
}
}