| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.990967741935484, | |
| "global_step": 336, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9e-06, | |
| "loss": 0.4828, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_acc": 86.17021276595744, | |
| "eval_loss": 0.3562283217906952, | |
| "eval_runtime": 82.5249, | |
| "eval_samples_per_second": 4.556, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.1477, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_acc": 85.63829787234043, | |
| "eval_loss": 0.3522704243659973, | |
| "eval_runtime": 82.6705, | |
| "eval_samples_per_second": 4.548, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0398, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_acc": 87.76595744680851, | |
| "eval_loss": 0.7263116836547852, | |
| "eval_runtime": 82.5944, | |
| "eval_samples_per_second": 4.552, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 6e-06, | |
| "loss": 0.0179, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_acc": 87.5, | |
| "eval_loss": 0.8694424033164978, | |
| "eval_runtime": 82.5077, | |
| "eval_samples_per_second": 4.557, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0047, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_acc": 86.70212765957447, | |
| "eval_loss": 1.1555883884429932, | |
| "eval_runtime": 82.5664, | |
| "eval_samples_per_second": 4.554, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0005, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_acc": 89.09574468085107, | |
| "eval_loss": 1.2453709840774536, | |
| "eval_runtime": 82.6312, | |
| "eval_samples_per_second": 4.55, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0004, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_acc": 89.09574468085107, | |
| "eval_loss": 1.210463523864746, | |
| "eval_runtime": 82.502, | |
| "eval_samples_per_second": 4.557, | |
| "step": 336 | |
| } | |
| ], | |
| "max_steps": 480, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.4028769825494106e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |