{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.731946144430845, "global_step": 5500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.61, "learning_rate": 9.288129560420003e-05, "loss": 0.6366, "step": 500 }, { "epoch": 0.61, "eval_accuracy": 0.89628166395538, "eval_f1": 0.3185402661735773, "eval_loss": 0.3915744125843048, "eval_runtime": 10.4747, "eval_samples_per_second": 314.091, "eval_steps_per_second": 39.333, "step": 500 }, { "epoch": 1.22, "learning_rate": 8.398291510945009e-05, "loss": 0.3572, "step": 1000 }, { "epoch": 1.22, "eval_accuracy": 0.908552172902626, "eval_f1": 0.4301535570388646, "eval_loss": 0.3250293731689453, "eval_runtime": 10.4843, "eval_samples_per_second": 313.802, "eval_steps_per_second": 39.297, "step": 1000 }, { "epoch": 1.84, "learning_rate": 7.508453461470013e-05, "loss": 0.2839, "step": 1500 }, { "epoch": 1.84, "eval_accuracy": 0.9177550546130606, "eval_f1": 0.4733065481810558, "eval_loss": 0.29356610774993896, "eval_runtime": 10.3662, "eval_samples_per_second": 317.379, "eval_steps_per_second": 39.745, "step": 1500 }, { "epoch": 2.45, "learning_rate": 6.618615411995017e-05, "loss": 0.2231, "step": 2000 }, { "epoch": 2.45, "eval_accuracy": 0.9202416918429003, "eval_f1": 0.5303373844974727, "eval_loss": 0.29578134417533875, "eval_runtime": 10.3862, "eval_samples_per_second": 316.768, "eval_steps_per_second": 39.668, "step": 2000 }, { "epoch": 3.06, "learning_rate": 5.728777362520021e-05, "loss": 0.1893, "step": 2500 }, { "epoch": 3.06, "eval_accuracy": 0.9223100162677202, "eval_f1": 0.5747956508031589, "eval_loss": 0.32106420397758484, "eval_runtime": 10.402, "eval_samples_per_second": 316.284, "eval_steps_per_second": 39.608, "step": 2500 }, { "epoch": 3.67, "learning_rate": 4.838939313045026e-05, "loss": 0.1372, "step": 3000 }, { "epoch": 3.67, "eval_accuracy": 0.9265628631187544, "eval_f1": 0.5956050840740277, "eval_loss": 0.2856525480747223, "eval_runtime": 10.4359, "eval_samples_per_second": 315.259, "eval_steps_per_second": 39.479, "step": 3000 }, { "epoch": 4.28, "learning_rate": 3.9491012635700305e-05, "loss": 0.1148, "step": 3500 }, { "epoch": 4.28, "eval_accuracy": 0.9301417615617011, "eval_f1": 0.6182402434709684, "eval_loss": 0.2969909608364105, "eval_runtime": 10.3712, "eval_samples_per_second": 317.225, "eval_steps_per_second": 39.725, "step": 3500 }, { "epoch": 4.9, "learning_rate": 3.059263214095035e-05, "loss": 0.092, "step": 4000 }, { "epoch": 4.9, "eval_accuracy": 0.932396002788752, "eval_f1": 0.6418559187952009, "eval_loss": 0.28680795431137085, "eval_runtime": 10.3835, "eval_samples_per_second": 316.85, "eval_steps_per_second": 39.678, "step": 4000 }, { "epoch": 5.51, "learning_rate": 2.169425164620039e-05, "loss": 0.0585, "step": 4500 }, { "epoch": 5.51, "eval_accuracy": 0.932419242389031, "eval_f1": 0.6322022976473333, "eval_loss": 0.30797600746154785, "eval_runtime": 10.3681, "eval_samples_per_second": 317.319, "eval_steps_per_second": 39.737, "step": 4500 }, { "epoch": 6.12, "learning_rate": 1.2795871151450436e-05, "loss": 0.0492, "step": 5000 }, { "epoch": 6.12, "eval_accuracy": 0.9329769927957239, "eval_f1": 0.6370967443537814, "eval_loss": 0.31786486506462097, "eval_runtime": 10.4041, "eval_samples_per_second": 316.22, "eval_steps_per_second": 39.6, "step": 5000 }, { "epoch": 6.73, "learning_rate": 3.897490656700481e-06, "loss": 0.0295, "step": 5500 }, { "epoch": 6.73, "eval_accuracy": 0.934278410411341, "eval_f1": 0.6499060895478286, "eval_loss": 0.3314834535121918, "eval_runtime": 10.3769, "eval_samples_per_second": 317.051, "eval_steps_per_second": 39.704, "step": 5500 } ], "max_steps": 5719, "num_train_epochs": 7, "total_flos": 1.1503917613056e+16, "trial_name": null, "trial_params": null }