| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.731946144430845, | |
| "global_step": 5500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.288129560420003e-05, | |
| "loss": 0.6366, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.89628166395538, | |
| "eval_f1": 0.3185402661735773, | |
| "eval_loss": 0.3915744125843048, | |
| "eval_runtime": 10.4747, | |
| "eval_samples_per_second": 314.091, | |
| "eval_steps_per_second": 39.333, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.398291510945009e-05, | |
| "loss": 0.3572, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_accuracy": 0.908552172902626, | |
| "eval_f1": 0.4301535570388646, | |
| "eval_loss": 0.3250293731689453, | |
| "eval_runtime": 10.4843, | |
| "eval_samples_per_second": 313.802, | |
| "eval_steps_per_second": 39.297, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 7.508453461470013e-05, | |
| "loss": 0.2839, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_accuracy": 0.9177550546130606, | |
| "eval_f1": 0.4733065481810558, | |
| "eval_loss": 0.29356610774993896, | |
| "eval_runtime": 10.3662, | |
| "eval_samples_per_second": 317.379, | |
| "eval_steps_per_second": 39.745, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 6.618615411995017e-05, | |
| "loss": 0.2231, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_accuracy": 0.9202416918429003, | |
| "eval_f1": 0.5303373844974727, | |
| "eval_loss": 0.29578134417533875, | |
| "eval_runtime": 10.3862, | |
| "eval_samples_per_second": 316.768, | |
| "eval_steps_per_second": 39.668, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 5.728777362520021e-05, | |
| "loss": 0.1893, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "eval_accuracy": 0.9223100162677202, | |
| "eval_f1": 0.5747956508031589, | |
| "eval_loss": 0.32106420397758484, | |
| "eval_runtime": 10.402, | |
| "eval_samples_per_second": 316.284, | |
| "eval_steps_per_second": 39.608, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 4.838939313045026e-05, | |
| "loss": 0.1372, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "eval_accuracy": 0.9265628631187544, | |
| "eval_f1": 0.5956050840740277, | |
| "eval_loss": 0.2856525480747223, | |
| "eval_runtime": 10.4359, | |
| "eval_samples_per_second": 315.259, | |
| "eval_steps_per_second": 39.479, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 3.9491012635700305e-05, | |
| "loss": 0.1148, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_accuracy": 0.9301417615617011, | |
| "eval_f1": 0.6182402434709684, | |
| "eval_loss": 0.2969909608364105, | |
| "eval_runtime": 10.3712, | |
| "eval_samples_per_second": 317.225, | |
| "eval_steps_per_second": 39.725, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 3.059263214095035e-05, | |
| "loss": 0.092, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "eval_accuracy": 0.932396002788752, | |
| "eval_f1": 0.6418559187952009, | |
| "eval_loss": 0.28680795431137085, | |
| "eval_runtime": 10.3835, | |
| "eval_samples_per_second": 316.85, | |
| "eval_steps_per_second": 39.678, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.169425164620039e-05, | |
| "loss": 0.0585, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "eval_accuracy": 0.932419242389031, | |
| "eval_f1": 0.6322022976473333, | |
| "eval_loss": 0.30797600746154785, | |
| "eval_runtime": 10.3681, | |
| "eval_samples_per_second": 317.319, | |
| "eval_steps_per_second": 39.737, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.2795871151450436e-05, | |
| "loss": 0.0492, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "eval_accuracy": 0.9329769927957239, | |
| "eval_f1": 0.6370967443537814, | |
| "eval_loss": 0.31786486506462097, | |
| "eval_runtime": 10.4041, | |
| "eval_samples_per_second": 316.22, | |
| "eval_steps_per_second": 39.6, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 3.897490656700481e-06, | |
| "loss": 0.0295, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_accuracy": 0.934278410411341, | |
| "eval_f1": 0.6499060895478286, | |
| "eval_loss": 0.3314834535121918, | |
| "eval_runtime": 10.3769, | |
| "eval_samples_per_second": 317.051, | |
| "eval_steps_per_second": 39.704, | |
| "step": 5500 | |
| } | |
| ], | |
| "max_steps": 5719, | |
| "num_train_epochs": 7, | |
| "total_flos": 1.1503917613056e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |