| { |
| "model_config": { |
| "model_name_or_path": "gpt2-medium", |
| "model_size": "355M", |
| "description": "GPT-2 Medium - 355M parameters" |
| }, |
| "training_args": { |
| "num_train_epochs": 2, |
| "per_device_train_batch_size": 8, |
| "per_device_eval_batch_size": 8, |
| "gradient_accumulation_steps": 8, |
| "effective_batch_size": 64, |
| "learning_rate": 3e-5, |
| "weight_decay": 0.01, |
| "warmup_steps": 100, |
| "max_grad_norm": 1.0, |
| "lr_scheduler_type": "cosine", |
| "fp16": true, |
| "seed": 42, |
| "block_size": 128 |
| }, |
| "evaluation_args": { |
| "eval_strategy": "epoch", |
| "eval_steps": null, |
| "metric_for_best_model": "eval_loss", |
| "greater_is_better": false, |
| "load_best_model_at_end": true |
| }, |
| "save_args": { |
| "save_strategy": "epoch", |
| "save_steps": null, |
| "save_total_limit": 2 |
| }, |
| "logging_args": { |
| "logging_dir": "./output/logs", |
| "logging_steps": 50, |
| "report_to": "wandb" |
| }, |
| "lora_config": { |
| "r": 8, |
| "lora_alpha": 32, |
| "target_modules": ["c_attn", "c_proj"], |
| "lora_dropout": 0.05, |
| "bias": "none", |
| "task_type": "CAUSAL_LM" |
| }, |
| "dataset_config": { |
| "dataset_repo_id": "augustocsc/sintetico_natural", |
| "data_dir": "700K", |
| "data_columns": { |
| "infix": "i_prompt_n", |
| "prefix": "p_prompt_n" |
| } |
| }, |
| "hub_config": { |
| "push_to_hub": true, |
| "hub_model_id_template": "augustocsc/Se355M_700K_{format}", |
| "formats": ["infix", "prefix"] |
| }, |
| "estimated_time": { |
| "per_epoch_minutes": 90, |
| "total_hours": 3, |
| "notes": "Estimated for AWS g5.xlarge with A10G GPU" |
| } |
| } |
|
|