| { | |
| "best_global_step": 4400, | |
| "best_metric": 0.453235387802124, | |
| "best_model_checkpoint": "roberta-base-multi-head-4-directions/checkpoint-4400", | |
| "epoch": 3.259558164180321, | |
| "eval_steps": 200, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13043123828156844, | |
| "eval_accuracy": 0.15051240893937523, | |
| "eval_f1_macro": 0.0654110324103885, | |
| "eval_f1_micro": 0.15051240893937523, | |
| "eval_loss": 0.6699725985527039, | |
| "eval_precision_macro": 0.03762810223484381, | |
| "eval_recall_macro": 0.25, | |
| "eval_roc_auc": 0.5065070998981289, | |
| "eval_runtime": 58.3231, | |
| "eval_samples_per_second": 138.864, | |
| "eval_steps_per_second": 69.441, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2608624765631369, | |
| "eval_accuracy": 0.3306581059390048, | |
| "eval_f1_macro": 0.21851269600524115, | |
| "eval_f1_micro": 0.3306581059390048, | |
| "eval_loss": 0.5907890796661377, | |
| "eval_precision_macro": 0.3477712230149137, | |
| "eval_recall_macro": 0.2628129027541947, | |
| "eval_roc_auc": 0.5264776449526523, | |
| "eval_runtime": 58.515, | |
| "eval_samples_per_second": 138.409, | |
| "eval_steps_per_second": 69.213, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3260780957039211, | |
| "grad_norm": 1.910775899887085, | |
| "learning_rate": 8.132333767926989e-07, | |
| "loss": 0.6418, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3912937148447053, | |
| "eval_accuracy": 0.39646869983948635, | |
| "eval_f1_macro": 0.24202854764905962, | |
| "eval_f1_micro": 0.39646869983948635, | |
| "eval_loss": 0.5681799650192261, | |
| "eval_precision_macro": 0.31314716750672944, | |
| "eval_recall_macro": 0.29579537486279056, | |
| "eval_roc_auc": 0.5825887308747437, | |
| "eval_runtime": 58.253, | |
| "eval_samples_per_second": 139.031, | |
| "eval_steps_per_second": 69.524, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5217249531262738, | |
| "eval_accuracy": 0.4446227929373997, | |
| "eval_f1_macro": 0.2752091664099515, | |
| "eval_f1_micro": 0.4446227929373997, | |
| "eval_loss": 0.5510929226875305, | |
| "eval_precision_macro": 0.3725228720549576, | |
| "eval_recall_macro": 0.3347772936726068, | |
| "eval_roc_auc": 0.6511260809997589, | |
| "eval_runtime": 58.5957, | |
| "eval_samples_per_second": 138.218, | |
| "eval_steps_per_second": 69.118, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6521561914078422, | |
| "grad_norm": 3.2853586673736572, | |
| "learning_rate": 1.6280964797913951e-06, | |
| "loss": 0.56, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.6521561914078422, | |
| "eval_accuracy": 0.47956537844178293, | |
| "eval_f1_macro": 0.339724330856578, | |
| "eval_f1_micro": 0.47956537844178293, | |
| "eval_loss": 0.5183268189430237, | |
| "eval_precision_macro": 0.5998469679206377, | |
| "eval_recall_macro": 0.37515425621657034, | |
| "eval_roc_auc": 0.7004256590184301, | |
| "eval_runtime": 58.4596, | |
| "eval_samples_per_second": 138.54, | |
| "eval_steps_per_second": 69.279, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7825874296894106, | |
| "eval_accuracy": 0.4951228546734165, | |
| "eval_f1_macro": 0.3960300119997172, | |
| "eval_f1_micro": 0.4951228546734165, | |
| "eval_loss": 0.5027472972869873, | |
| "eval_precision_macro": 0.47649685990985047, | |
| "eval_recall_macro": 0.42432070581801545, | |
| "eval_roc_auc": 0.7210076403173139, | |
| "eval_runtime": 59.2756, | |
| "eval_samples_per_second": 136.633, | |
| "eval_steps_per_second": 68.325, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9130186679709791, | |
| "eval_accuracy": 0.49364119027040376, | |
| "eval_f1_macro": 0.4202868285905695, | |
| "eval_f1_micro": 0.49364119027040376, | |
| "eval_loss": 0.5012013912200928, | |
| "eval_precision_macro": 0.476097069319352, | |
| "eval_recall_macro": 0.44606990421659504, | |
| "eval_roc_auc": 0.7341331293715618, | |
| "eval_runtime": 58.8986, | |
| "eval_samples_per_second": 137.507, | |
| "eval_steps_per_second": 68.762, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9782342871117633, | |
| "grad_norm": 6.466639041900635, | |
| "learning_rate": 2.4429595827900914e-06, | |
| "loss": 0.5136, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0430423086329177, | |
| "eval_accuracy": 0.5227805901963205, | |
| "eval_f1_macro": 0.4272500725974152, | |
| "eval_f1_micro": 0.5227805901963205, | |
| "eval_loss": 0.4897969961166382, | |
| "eval_precision_macro": 0.511088683081628, | |
| "eval_recall_macro": 0.4450094829273592, | |
| "eval_roc_auc": 0.7377316174079231, | |
| "eval_runtime": 58.3834, | |
| "eval_samples_per_second": 138.721, | |
| "eval_steps_per_second": 69.369, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1734735469144861, | |
| "eval_accuracy": 0.5304358562785529, | |
| "eval_f1_macro": 0.47228705140051763, | |
| "eval_f1_micro": 0.5304358562785529, | |
| "eval_loss": 0.48175758123397827, | |
| "eval_precision_macro": 0.49865343258508565, | |
| "eval_recall_macro": 0.4751571425140743, | |
| "eval_roc_auc": 0.7504044589822995, | |
| "eval_runtime": 59.0784, | |
| "eval_samples_per_second": 137.089, | |
| "eval_steps_per_second": 68.553, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.3039047851960546, | |
| "grad_norm": 7.449667453765869, | |
| "learning_rate": 3.257822685788788e-06, | |
| "loss": 0.4842, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3039047851960546, | |
| "eval_accuracy": 0.5330287689838251, | |
| "eval_f1_macro": 0.49309253039408907, | |
| "eval_f1_micro": 0.5330287689838251, | |
| "eval_loss": 0.4809001386165619, | |
| "eval_precision_macro": 0.5034709810584846, | |
| "eval_recall_macro": 0.4966387871579345, | |
| "eval_roc_auc": 0.758014108468557, | |
| "eval_runtime": 58.3487, | |
| "eval_samples_per_second": 138.803, | |
| "eval_steps_per_second": 69.41, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.434336023477623, | |
| "eval_accuracy": 0.5411779232003952, | |
| "eval_f1_macro": 0.5090942441371813, | |
| "eval_f1_micro": 0.5411779232003952, | |
| "eval_loss": 0.47537535429000854, | |
| "eval_precision_macro": 0.5108825390909519, | |
| "eval_recall_macro": 0.5109581830122815, | |
| "eval_roc_auc": 0.7651162037942221, | |
| "eval_runtime": 58.7575, | |
| "eval_samples_per_second": 137.838, | |
| "eval_steps_per_second": 68.927, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.5647672617591915, | |
| "eval_accuracy": 0.5522904062229905, | |
| "eval_f1_macro": 0.5071849375719386, | |
| "eval_f1_micro": 0.5522904062229905, | |
| "eval_loss": 0.468904048204422, | |
| "eval_precision_macro": 0.5307207892146724, | |
| "eval_recall_macro": 0.5061303129931647, | |
| "eval_roc_auc": 0.7695441321425415, | |
| "eval_runtime": 58.5621, | |
| "eval_samples_per_second": 138.298, | |
| "eval_steps_per_second": 69.157, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6299828808999757, | |
| "grad_norm": 6.51912260055542, | |
| "learning_rate": 4.072685788787484e-06, | |
| "loss": 0.4695, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.69519850004076, | |
| "eval_accuracy": 0.5379676503272008, | |
| "eval_f1_macro": 0.48087910671865897, | |
| "eval_f1_micro": 0.5379676503272008, | |
| "eval_loss": 0.4785490036010742, | |
| "eval_precision_macro": 0.5346246897655283, | |
| "eval_recall_macro": 0.4906654493419952, | |
| "eval_roc_auc": 0.7663202674809741, | |
| "eval_runtime": 58.4801, | |
| "eval_samples_per_second": 138.491, | |
| "eval_steps_per_second": 69.254, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.8256297383223283, | |
| "eval_accuracy": 0.5545129028275095, | |
| "eval_f1_macro": 0.5144296310304717, | |
| "eval_f1_micro": 0.5545129028275095, | |
| "eval_loss": 0.46492108702659607, | |
| "eval_precision_macro": 0.5301331529844335, | |
| "eval_recall_macro": 0.5080649601410273, | |
| "eval_roc_auc": 0.7745130299239165, | |
| "eval_runtime": 58.563, | |
| "eval_samples_per_second": 138.295, | |
| "eval_steps_per_second": 69.156, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9560609766038968, | |
| "grad_norm": 6.679250717163086, | |
| "learning_rate": 4.88754889178618e-06, | |
| "loss": 0.4655, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.9560609766038968, | |
| "eval_accuracy": 0.5494505494505495, | |
| "eval_f1_macro": 0.5210019068338745, | |
| "eval_f1_micro": 0.5494505494505495, | |
| "eval_loss": 0.46518564224243164, | |
| "eval_precision_macro": 0.5276079708493782, | |
| "eval_recall_macro": 0.5245104582824609, | |
| "eval_roc_auc": 0.7757459310311635, | |
| "eval_runtime": 58.703, | |
| "eval_samples_per_second": 137.966, | |
| "eval_steps_per_second": 68.991, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.0860846172658354, | |
| "eval_accuracy": 0.5653784417829362, | |
| "eval_f1_macro": 0.528637637687615, | |
| "eval_f1_micro": 0.5653784417829362, | |
| "eval_loss": 0.46096640825271606, | |
| "eval_precision_macro": 0.5443547583807372, | |
| "eval_recall_macro": 0.5223821110706193, | |
| "eval_roc_auc": 0.781316894157911, | |
| "eval_runtime": 58.6208, | |
| "eval_samples_per_second": 138.159, | |
| "eval_steps_per_second": 69.088, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.216515855547404, | |
| "eval_accuracy": 0.5498209655513027, | |
| "eval_f1_macro": 0.5186060540296338, | |
| "eval_f1_micro": 0.5498209655513027, | |
| "eval_loss": 0.46535903215408325, | |
| "eval_precision_macro": 0.5399973417347204, | |
| "eval_recall_macro": 0.5255215009903015, | |
| "eval_roc_auc": 0.7808947543899514, | |
| "eval_runtime": 58.434, | |
| "eval_samples_per_second": 138.601, | |
| "eval_steps_per_second": 69.309, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.281731474688188, | |
| "grad_norm": 7.935737609863281, | |
| "learning_rate": 4.963030947642902e-06, | |
| "loss": 0.4414, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.3469470938289723, | |
| "eval_accuracy": 0.5530312384244969, | |
| "eval_f1_macro": 0.5192037737144601, | |
| "eval_f1_micro": 0.5530312384244969, | |
| "eval_loss": 0.4659627676010132, | |
| "eval_precision_macro": 0.542720199525297, | |
| "eval_recall_macro": 0.5266595526270865, | |
| "eval_roc_auc": 0.7817212427653404, | |
| "eval_runtime": 58.8426, | |
| "eval_samples_per_second": 137.638, | |
| "eval_steps_per_second": 68.828, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4773783321105407, | |
| "eval_accuracy": 0.564390665514261, | |
| "eval_f1_macro": 0.5393273157888367, | |
| "eval_f1_micro": 0.564390665514261, | |
| "eval_loss": 0.45927414298057556, | |
| "eval_precision_macro": 0.5430622655929049, | |
| "eval_recall_macro": 0.5388167782580582, | |
| "eval_roc_auc": 0.785497056666471, | |
| "eval_runtime": 60.3532, | |
| "eval_samples_per_second": 134.193, | |
| "eval_steps_per_second": 67.105, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.607809570392109, | |
| "grad_norm": 6.252172946929932, | |
| "learning_rate": 4.920143415906128e-06, | |
| "loss": 0.433, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.607809570392109, | |
| "eval_accuracy": 0.5583405358686258, | |
| "eval_f1_macro": 0.5308754875566113, | |
| "eval_f1_micro": 0.5583405358686258, | |
| "eval_loss": 0.4604816436767578, | |
| "eval_precision_macro": 0.5405007742715062, | |
| "eval_recall_macro": 0.5347270499333006, | |
| "eval_roc_auc": 0.7849083537765603, | |
| "eval_runtime": 58.4178, | |
| "eval_samples_per_second": 138.639, | |
| "eval_steps_per_second": 69.328, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7382408086736776, | |
| "eval_accuracy": 0.5485862452154587, | |
| "eval_f1_macro": 0.5244598378559915, | |
| "eval_f1_micro": 0.5485862452154587, | |
| "eval_loss": 0.46733224391937256, | |
| "eval_precision_macro": 0.5429571340400278, | |
| "eval_recall_macro": 0.5359068311148909, | |
| "eval_roc_auc": 0.7850860010461288, | |
| "eval_runtime": 58.5291, | |
| "eval_samples_per_second": 138.376, | |
| "eval_steps_per_second": 69.196, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.868672046955246, | |
| "eval_accuracy": 0.5685887146561304, | |
| "eval_f1_macro": 0.5358137721542776, | |
| "eval_f1_micro": 0.5685887146561304, | |
| "eval_loss": 0.453235387802124, | |
| "eval_precision_macro": 0.5498552760907015, | |
| "eval_recall_macro": 0.531919882945018, | |
| "eval_roc_auc": 0.7869039137806253, | |
| "eval_runtime": 58.2827, | |
| "eval_samples_per_second": 138.961, | |
| "eval_steps_per_second": 69.489, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.9338876660960302, | |
| "grad_norm": 7.882796287536621, | |
| "learning_rate": 4.8772558841693544e-06, | |
| "loss": 0.435, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.9991032852368145, | |
| "eval_accuracy": 0.5695764909248056, | |
| "eval_f1_macro": 0.5394199448286808, | |
| "eval_f1_micro": 0.5695764909248056, | |
| "eval_loss": 0.4586756229400635, | |
| "eval_precision_macro": 0.5469928436264997, | |
| "eval_recall_macro": 0.5385537439613218, | |
| "eval_roc_auc": 0.7865007504045092, | |
| "eval_runtime": 58.5001, | |
| "eval_samples_per_second": 138.444, | |
| "eval_steps_per_second": 69.231, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.1291269258987526, | |
| "eval_accuracy": 0.5659958019508581, | |
| "eval_f1_macro": 0.5352002738500827, | |
| "eval_f1_micro": 0.5659958019508581, | |
| "eval_loss": 0.46012088656425476, | |
| "eval_precision_macro": 0.5485092746099708, | |
| "eval_recall_macro": 0.537293222573151, | |
| "eval_roc_auc": 0.7868513938139832, | |
| "eval_runtime": 58.3581, | |
| "eval_samples_per_second": 138.781, | |
| "eval_steps_per_second": 69.399, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.259558164180321, | |
| "grad_norm": 6.890503406524658, | |
| "learning_rate": 4.834368352432582e-06, | |
| "loss": 0.4056, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.259558164180321, | |
| "eval_accuracy": 0.5674774663538709, | |
| "eval_f1_macro": 0.5441156714097114, | |
| "eval_f1_micro": 0.5674774663538709, | |
| "eval_loss": 0.47018805146217346, | |
| "eval_precision_macro": 0.5454139749449086, | |
| "eval_recall_macro": 0.5461421293276678, | |
| "eval_roc_auc": 0.7877411895075941, | |
| "eval_runtime": 58.8315, | |
| "eval_samples_per_second": 137.664, | |
| "eval_steps_per_second": 68.841, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.259558164180321, | |
| "step": 5000, | |
| "total_flos": 1.052068446713856e+16, | |
| "train_loss": 0.48495769653320314, | |
| "train_runtime": 3435.9427, | |
| "train_samples_per_second": 285.616, | |
| "train_steps_per_second": 17.858 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 61360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.052068446713856e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |