| { | |
| "output_dir": "./results", | |
| "overwrite_output_dir": True, | |
| "max_steps": 200000, | |
| "per_device_train_batch_size": 32, | |
| "per_device_eval_batch_size": 32, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.0, | |
| "adam_beta1": 0.9, | |
| "adam_beta2": 0.999, | |
| "adam_epsilon": 1e-08, | |
| "max_grad_norm": 1.0, | |
| "num_train_epochs": 20, | |
| "warmup_steps": 500, | |
| "logging_steps": 1000, | |
| "save_steps": 5000, | |
| "eval_steps": 5000, | |
| "load_best_model_at_end": True, | |
| "metric_for_best_model": "eval_loss", | |
| "greater_is_better": False, | |
| "fp16": True, | |
| "fp16_backend": "auto", | |
| "seed": 42 | |
| } |