{ "best_global_step": 4400, "best_metric": 0.453235387802124, "best_model_checkpoint": "roberta-base-multi-head-4-directions/checkpoint-4400", "epoch": 3.259558164180321, "eval_steps": 200, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13043123828156844, "eval_accuracy": 0.15051240893937523, "eval_f1_macro": 0.0654110324103885, "eval_f1_micro": 0.15051240893937523, "eval_loss": 0.6699725985527039, "eval_precision_macro": 0.03762810223484381, "eval_recall_macro": 0.25, "eval_roc_auc": 0.5065070998981289, "eval_runtime": 58.3231, "eval_samples_per_second": 138.864, "eval_steps_per_second": 69.441, "step": 200 }, { "epoch": 0.2608624765631369, "eval_accuracy": 0.3306581059390048, "eval_f1_macro": 0.21851269600524115, "eval_f1_micro": 0.3306581059390048, "eval_loss": 0.5907890796661377, "eval_precision_macro": 0.3477712230149137, "eval_recall_macro": 0.2628129027541947, "eval_roc_auc": 0.5264776449526523, "eval_runtime": 58.515, "eval_samples_per_second": 138.409, "eval_steps_per_second": 69.213, "step": 400 }, { "epoch": 0.3260780957039211, "grad_norm": 1.910775899887085, "learning_rate": 8.132333767926989e-07, "loss": 0.6418, "step": 500 }, { "epoch": 0.3912937148447053, "eval_accuracy": 0.39646869983948635, "eval_f1_macro": 0.24202854764905962, "eval_f1_micro": 0.39646869983948635, "eval_loss": 0.5681799650192261, "eval_precision_macro": 0.31314716750672944, "eval_recall_macro": 0.29579537486279056, "eval_roc_auc": 0.5825887308747437, "eval_runtime": 58.253, "eval_samples_per_second": 139.031, "eval_steps_per_second": 69.524, "step": 600 }, { "epoch": 0.5217249531262738, "eval_accuracy": 0.4446227929373997, "eval_f1_macro": 0.2752091664099515, "eval_f1_micro": 0.4446227929373997, "eval_loss": 0.5510929226875305, "eval_precision_macro": 0.3725228720549576, "eval_recall_macro": 0.3347772936726068, "eval_roc_auc": 0.6511260809997589, "eval_runtime": 58.5957, "eval_samples_per_second": 138.218, "eval_steps_per_second": 69.118, "step": 800 }, { "epoch": 0.6521561914078422, "grad_norm": 3.2853586673736572, "learning_rate": 1.6280964797913951e-06, "loss": 0.56, "step": 1000 }, { "epoch": 0.6521561914078422, "eval_accuracy": 0.47956537844178293, "eval_f1_macro": 0.339724330856578, "eval_f1_micro": 0.47956537844178293, "eval_loss": 0.5183268189430237, "eval_precision_macro": 0.5998469679206377, "eval_recall_macro": 0.37515425621657034, "eval_roc_auc": 0.7004256590184301, "eval_runtime": 58.4596, "eval_samples_per_second": 138.54, "eval_steps_per_second": 69.279, "step": 1000 }, { "epoch": 0.7825874296894106, "eval_accuracy": 0.4951228546734165, "eval_f1_macro": 0.3960300119997172, "eval_f1_micro": 0.4951228546734165, "eval_loss": 0.5027472972869873, "eval_precision_macro": 0.47649685990985047, "eval_recall_macro": 0.42432070581801545, "eval_roc_auc": 0.7210076403173139, "eval_runtime": 59.2756, "eval_samples_per_second": 136.633, "eval_steps_per_second": 68.325, "step": 1200 }, { "epoch": 0.9130186679709791, "eval_accuracy": 0.49364119027040376, "eval_f1_macro": 0.4202868285905695, "eval_f1_micro": 0.49364119027040376, "eval_loss": 0.5012013912200928, "eval_precision_macro": 0.476097069319352, "eval_recall_macro": 0.44606990421659504, "eval_roc_auc": 0.7341331293715618, "eval_runtime": 58.8986, "eval_samples_per_second": 137.507, "eval_steps_per_second": 68.762, "step": 1400 }, { "epoch": 0.9782342871117633, "grad_norm": 6.466639041900635, "learning_rate": 2.4429595827900914e-06, "loss": 0.5136, "step": 1500 }, { "epoch": 1.0430423086329177, "eval_accuracy": 0.5227805901963205, "eval_f1_macro": 0.4272500725974152, "eval_f1_micro": 0.5227805901963205, "eval_loss": 0.4897969961166382, "eval_precision_macro": 0.511088683081628, "eval_recall_macro": 0.4450094829273592, "eval_roc_auc": 0.7377316174079231, "eval_runtime": 58.3834, "eval_samples_per_second": 138.721, "eval_steps_per_second": 69.369, "step": 1600 }, { "epoch": 1.1734735469144861, "eval_accuracy": 0.5304358562785529, "eval_f1_macro": 0.47228705140051763, "eval_f1_micro": 0.5304358562785529, "eval_loss": 0.48175758123397827, "eval_precision_macro": 0.49865343258508565, "eval_recall_macro": 0.4751571425140743, "eval_roc_auc": 0.7504044589822995, "eval_runtime": 59.0784, "eval_samples_per_second": 137.089, "eval_steps_per_second": 68.553, "step": 1800 }, { "epoch": 1.3039047851960546, "grad_norm": 7.449667453765869, "learning_rate": 3.257822685788788e-06, "loss": 0.4842, "step": 2000 }, { "epoch": 1.3039047851960546, "eval_accuracy": 0.5330287689838251, "eval_f1_macro": 0.49309253039408907, "eval_f1_micro": 0.5330287689838251, "eval_loss": 0.4809001386165619, "eval_precision_macro": 0.5034709810584846, "eval_recall_macro": 0.4966387871579345, "eval_roc_auc": 0.758014108468557, "eval_runtime": 58.3487, "eval_samples_per_second": 138.803, "eval_steps_per_second": 69.41, "step": 2000 }, { "epoch": 1.434336023477623, "eval_accuracy": 0.5411779232003952, "eval_f1_macro": 0.5090942441371813, "eval_f1_micro": 0.5411779232003952, "eval_loss": 0.47537535429000854, "eval_precision_macro": 0.5108825390909519, "eval_recall_macro": 0.5109581830122815, "eval_roc_auc": 0.7651162037942221, "eval_runtime": 58.7575, "eval_samples_per_second": 137.838, "eval_steps_per_second": 68.927, "step": 2200 }, { "epoch": 1.5647672617591915, "eval_accuracy": 0.5522904062229905, "eval_f1_macro": 0.5071849375719386, "eval_f1_micro": 0.5522904062229905, "eval_loss": 0.468904048204422, "eval_precision_macro": 0.5307207892146724, "eval_recall_macro": 0.5061303129931647, "eval_roc_auc": 0.7695441321425415, "eval_runtime": 58.5621, "eval_samples_per_second": 138.298, "eval_steps_per_second": 69.157, "step": 2400 }, { "epoch": 1.6299828808999757, "grad_norm": 6.51912260055542, "learning_rate": 4.072685788787484e-06, "loss": 0.4695, "step": 2500 }, { "epoch": 1.69519850004076, "eval_accuracy": 0.5379676503272008, "eval_f1_macro": 0.48087910671865897, "eval_f1_micro": 0.5379676503272008, "eval_loss": 0.4785490036010742, "eval_precision_macro": 0.5346246897655283, "eval_recall_macro": 0.4906654493419952, "eval_roc_auc": 0.7663202674809741, "eval_runtime": 58.4801, "eval_samples_per_second": 138.491, "eval_steps_per_second": 69.254, "step": 2600 }, { "epoch": 1.8256297383223283, "eval_accuracy": 0.5545129028275095, "eval_f1_macro": 0.5144296310304717, "eval_f1_micro": 0.5545129028275095, "eval_loss": 0.46492108702659607, "eval_precision_macro": 0.5301331529844335, "eval_recall_macro": 0.5080649601410273, "eval_roc_auc": 0.7745130299239165, "eval_runtime": 58.563, "eval_samples_per_second": 138.295, "eval_steps_per_second": 69.156, "step": 2800 }, { "epoch": 1.9560609766038968, "grad_norm": 6.679250717163086, "learning_rate": 4.88754889178618e-06, "loss": 0.4655, "step": 3000 }, { "epoch": 1.9560609766038968, "eval_accuracy": 0.5494505494505495, "eval_f1_macro": 0.5210019068338745, "eval_f1_micro": 0.5494505494505495, "eval_loss": 0.46518564224243164, "eval_precision_macro": 0.5276079708493782, "eval_recall_macro": 0.5245104582824609, "eval_roc_auc": 0.7757459310311635, "eval_runtime": 58.703, "eval_samples_per_second": 137.966, "eval_steps_per_second": 68.991, "step": 3000 }, { "epoch": 2.0860846172658354, "eval_accuracy": 0.5653784417829362, "eval_f1_macro": 0.528637637687615, "eval_f1_micro": 0.5653784417829362, "eval_loss": 0.46096640825271606, "eval_precision_macro": 0.5443547583807372, "eval_recall_macro": 0.5223821110706193, "eval_roc_auc": 0.781316894157911, "eval_runtime": 58.6208, "eval_samples_per_second": 138.159, "eval_steps_per_second": 69.088, "step": 3200 }, { "epoch": 2.216515855547404, "eval_accuracy": 0.5498209655513027, "eval_f1_macro": 0.5186060540296338, "eval_f1_micro": 0.5498209655513027, "eval_loss": 0.46535903215408325, "eval_precision_macro": 0.5399973417347204, "eval_recall_macro": 0.5255215009903015, "eval_roc_auc": 0.7808947543899514, "eval_runtime": 58.434, "eval_samples_per_second": 138.601, "eval_steps_per_second": 69.309, "step": 3400 }, { "epoch": 2.281731474688188, "grad_norm": 7.935737609863281, "learning_rate": 4.963030947642902e-06, "loss": 0.4414, "step": 3500 }, { "epoch": 2.3469470938289723, "eval_accuracy": 0.5530312384244969, "eval_f1_macro": 0.5192037737144601, "eval_f1_micro": 0.5530312384244969, "eval_loss": 0.4659627676010132, "eval_precision_macro": 0.542720199525297, "eval_recall_macro": 0.5266595526270865, "eval_roc_auc": 0.7817212427653404, "eval_runtime": 58.8426, "eval_samples_per_second": 137.638, "eval_steps_per_second": 68.828, "step": 3600 }, { "epoch": 2.4773783321105407, "eval_accuracy": 0.564390665514261, "eval_f1_macro": 0.5393273157888367, "eval_f1_micro": 0.564390665514261, "eval_loss": 0.45927414298057556, "eval_precision_macro": 0.5430622655929049, "eval_recall_macro": 0.5388167782580582, "eval_roc_auc": 0.785497056666471, "eval_runtime": 60.3532, "eval_samples_per_second": 134.193, "eval_steps_per_second": 67.105, "step": 3800 }, { "epoch": 2.607809570392109, "grad_norm": 6.252172946929932, "learning_rate": 4.920143415906128e-06, "loss": 0.433, "step": 4000 }, { "epoch": 2.607809570392109, "eval_accuracy": 0.5583405358686258, "eval_f1_macro": 0.5308754875566113, "eval_f1_micro": 0.5583405358686258, "eval_loss": 0.4604816436767578, "eval_precision_macro": 0.5405007742715062, "eval_recall_macro": 0.5347270499333006, "eval_roc_auc": 0.7849083537765603, "eval_runtime": 58.4178, "eval_samples_per_second": 138.639, "eval_steps_per_second": 69.328, "step": 4000 }, { "epoch": 2.7382408086736776, "eval_accuracy": 0.5485862452154587, "eval_f1_macro": 0.5244598378559915, "eval_f1_micro": 0.5485862452154587, "eval_loss": 0.46733224391937256, "eval_precision_macro": 0.5429571340400278, "eval_recall_macro": 0.5359068311148909, "eval_roc_auc": 0.7850860010461288, "eval_runtime": 58.5291, "eval_samples_per_second": 138.376, "eval_steps_per_second": 69.196, "step": 4200 }, { "epoch": 2.868672046955246, "eval_accuracy": 0.5685887146561304, "eval_f1_macro": 0.5358137721542776, "eval_f1_micro": 0.5685887146561304, "eval_loss": 0.453235387802124, "eval_precision_macro": 0.5498552760907015, "eval_recall_macro": 0.531919882945018, "eval_roc_auc": 0.7869039137806253, "eval_runtime": 58.2827, "eval_samples_per_second": 138.961, "eval_steps_per_second": 69.489, "step": 4400 }, { "epoch": 2.9338876660960302, "grad_norm": 7.882796287536621, "learning_rate": 4.8772558841693544e-06, "loss": 0.435, "step": 4500 }, { "epoch": 2.9991032852368145, "eval_accuracy": 0.5695764909248056, "eval_f1_macro": 0.5394199448286808, "eval_f1_micro": 0.5695764909248056, "eval_loss": 0.4586756229400635, "eval_precision_macro": 0.5469928436264997, "eval_recall_macro": 0.5385537439613218, "eval_roc_auc": 0.7865007504045092, "eval_runtime": 58.5001, "eval_samples_per_second": 138.444, "eval_steps_per_second": 69.231, "step": 4600 }, { "epoch": 3.1291269258987526, "eval_accuracy": 0.5659958019508581, "eval_f1_macro": 0.5352002738500827, "eval_f1_micro": 0.5659958019508581, "eval_loss": 0.46012088656425476, "eval_precision_macro": 0.5485092746099708, "eval_recall_macro": 0.537293222573151, "eval_roc_auc": 0.7868513938139832, "eval_runtime": 58.3581, "eval_samples_per_second": 138.781, "eval_steps_per_second": 69.399, "step": 4800 }, { "epoch": 3.259558164180321, "grad_norm": 6.890503406524658, "learning_rate": 4.834368352432582e-06, "loss": 0.4056, "step": 5000 }, { "epoch": 3.259558164180321, "eval_accuracy": 0.5674774663538709, "eval_f1_macro": 0.5441156714097114, "eval_f1_micro": 0.5674774663538709, "eval_loss": 0.47018805146217346, "eval_precision_macro": 0.5454139749449086, "eval_recall_macro": 0.5461421293276678, "eval_roc_auc": 0.7877411895075941, "eval_runtime": 58.8315, "eval_samples_per_second": 137.664, "eval_steps_per_second": 68.841, "step": 5000 }, { "epoch": 3.259558164180321, "step": 5000, "total_flos": 1.052068446713856e+16, "train_loss": 0.48495769653320314, "train_runtime": 3435.9427, "train_samples_per_second": 285.616, "train_steps_per_second": 17.858 } ], "logging_steps": 500, "max_steps": 61360, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.052068446713856e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }