{ "best_global_step": 7500, "best_metric": 0.5938410920950099, "best_model_checkpoint": "bert-finetuned-sem_eval-english/checkpoint-7500", "epoch": 3.0, "eval_steps": 500, "global_step": 9210, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16286644951140064, "grad_norm": 1.7576472759246826, "learning_rate": 2.8374592833876224e-05, "loss": 0.675, "step": 500 }, { "epoch": 0.16286644951140064, "eval_accuracy": 0.30349433900790096, "eval_f1": 0.5384447288733494, "eval_loss": 0.666650652885437, "eval_roc_auc": 0.5939593680164127, "eval_runtime": 39.771, "eval_samples_per_second": 308.692, "eval_steps_per_second": 9.655, "step": 500 }, { "epoch": 0.3257328990228013, "grad_norm": 0.9124096035957336, "learning_rate": 2.6745928338762215e-05, "loss": 0.6672, "step": 1000 }, { "epoch": 0.3257328990228013, "eval_accuracy": 0.30292416714180986, "eval_f1": 0.5496491926835994, "eval_loss": 0.6651883125305176, "eval_roc_auc": 0.5970308146607535, "eval_runtime": 39.484, "eval_samples_per_second": 310.936, "eval_steps_per_second": 9.725, "step": 1000 }, { "epoch": 0.48859934853420195, "grad_norm": 0.8060895204544067, "learning_rate": 2.5117263843648207e-05, "loss": 0.6629, "step": 1500 }, { "epoch": 0.48859934853420195, "eval_accuracy": 0.28720371426244196, "eval_f1": 0.5836376083979918, "eval_loss": 0.6616196632385254, "eval_roc_auc": 0.6027518264410479, "eval_runtime": 39.4217, "eval_samples_per_second": 311.427, "eval_steps_per_second": 9.741, "step": 1500 }, { "epoch": 0.6514657980456026, "grad_norm": 0.725496232509613, "learning_rate": 2.34885993485342e-05, "loss": 0.6598, "step": 2000 }, { "epoch": 0.6514657980456026, "eval_accuracy": 0.28924004235562434, "eval_f1": 0.5710412307329581, "eval_loss": 0.6627053022384644, "eval_roc_auc": 0.6025085911036706, "eval_runtime": 39.5324, "eval_samples_per_second": 310.555, "eval_steps_per_second": 9.714, "step": 2000 }, { "epoch": 0.8143322475570033, "grad_norm": 1.2766971588134766, "learning_rate": 2.1859934853420196e-05, "loss": 0.6602, "step": 2500 }, { "epoch": 0.8143322475570033, "eval_accuracy": 0.3039016046265374, "eval_f1": 0.5510751858822077, "eval_loss": 0.6646136045455933, "eval_roc_auc": 0.6007841702704798, "eval_runtime": 39.5668, "eval_samples_per_second": 310.285, "eval_steps_per_second": 9.705, "step": 2500 }, { "epoch": 0.9771986970684039, "grad_norm": 1.1923062801361084, "learning_rate": 2.023127035830619e-05, "loss": 0.6591, "step": 3000 }, { "epoch": 0.9771986970684039, "eval_accuracy": 0.30471613586381036, "eval_f1": 0.5650302501279234, "eval_loss": 0.6584209203720093, "eval_roc_auc": 0.604482846236001, "eval_runtime": 39.6014, "eval_samples_per_second": 310.014, "eval_steps_per_second": 9.697, "step": 3000 }, { "epoch": 1.1400651465798046, "grad_norm": 0.9901787638664246, "learning_rate": 1.8602605863192182e-05, "loss": 0.6503, "step": 3500 }, { "epoch": 1.1400651465798046, "eval_accuracy": 0.3002362140588092, "eval_f1": 0.5821069843494511, "eval_loss": 0.6575685143470764, "eval_roc_auc": 0.6095021521656233, "eval_runtime": 39.6519, "eval_samples_per_second": 309.619, "eval_steps_per_second": 9.684, "step": 3500 }, { "epoch": 1.3029315960912053, "grad_norm": 1.4162893295288086, "learning_rate": 1.6973941368078176e-05, "loss": 0.6481, "step": 4000 }, { "epoch": 1.3029315960912053, "eval_accuracy": 0.29371996416062557, "eval_f1": 0.5829980759139409, "eval_loss": 0.6544238328933716, "eval_roc_auc": 0.6097850034326124, "eval_runtime": 39.6514, "eval_samples_per_second": 309.624, "eval_steps_per_second": 9.684, "step": 4000 }, { "epoch": 1.4657980456026058, "grad_norm": 1.1837143898010254, "learning_rate": 1.534527687296417e-05, "loss": 0.6407, "step": 4500 }, { "epoch": 1.4657980456026058, "eval_accuracy": 0.30854443267899323, "eval_f1": 0.5742496763900178, "eval_loss": 0.6540513038635254, "eval_roc_auc": 0.6128570483710014, "eval_runtime": 39.6455, "eval_samples_per_second": 309.669, "eval_steps_per_second": 9.686, "step": 4500 }, { "epoch": 1.6286644951140063, "grad_norm": 1.213715672492981, "learning_rate": 1.3716612377850164e-05, "loss": 0.6455, "step": 5000 }, { "epoch": 1.6286644951140063, "eval_accuracy": 0.30553066710108334, "eval_f1": 0.5805761805761805, "eval_loss": 0.6516779065132141, "eval_roc_auc": 0.6139730041570889, "eval_runtime": 39.6664, "eval_samples_per_second": 309.507, "eval_steps_per_second": 9.681, "step": 5000 }, { "epoch": 1.791530944625407, "grad_norm": 1.0912699699401855, "learning_rate": 1.2087947882736157e-05, "loss": 0.6424, "step": 5500 }, { "epoch": 1.791530944625407, "eval_accuracy": 0.3078928076891749, "eval_f1": 0.5807968103781725, "eval_loss": 0.6519107222557068, "eval_roc_auc": 0.6147970899473854, "eval_runtime": 39.6062, "eval_samples_per_second": 309.977, "eval_steps_per_second": 9.695, "step": 5500 }, { "epoch": 1.9543973941368078, "grad_norm": 1.0267040729522705, "learning_rate": 1.045928338762215e-05, "loss": 0.6421, "step": 6000 }, { "epoch": 1.9543973941368078, "eval_accuracy": 0.3128614482365399, "eval_f1": 0.5795434118281503, "eval_loss": 0.6518534421920776, "eval_roc_auc": 0.6160143745862132, "eval_runtime": 39.5531, "eval_samples_per_second": 310.393, "eval_steps_per_second": 9.708, "step": 6000 }, { "epoch": 2.1172638436482085, "grad_norm": 1.4254558086395264, "learning_rate": 8.830618892508145e-06, "loss": 0.6287, "step": 6500 }, { "epoch": 2.1172638436482085, "eval_accuracy": 0.30447177649262847, "eval_f1": 0.5908640680533707, "eval_loss": 0.6530047059059143, "eval_roc_auc": 0.616887196966502, "eval_runtime": 39.557, "eval_samples_per_second": 310.363, "eval_steps_per_second": 9.708, "step": 6500 }, { "epoch": 2.2801302931596092, "grad_norm": 1.6338706016540527, "learning_rate": 7.2019543973941374e-06, "loss": 0.6249, "step": 7000 }, { "epoch": 2.2801302931596092, "eval_accuracy": 0.30626374521462896, "eval_f1": 0.584855470828161, "eval_loss": 0.6531878113746643, "eval_roc_auc": 0.6162386151955095, "eval_runtime": 39.5812, "eval_samples_per_second": 310.172, "eval_steps_per_second": 9.702, "step": 7000 }, { "epoch": 2.44299674267101, "grad_norm": 1.2057968378067017, "learning_rate": 5.57328990228013e-06, "loss": 0.6236, "step": 7500 }, { "epoch": 2.44299674267101, "eval_accuracy": 0.30039912030626376, "eval_f1": 0.5938410920950099, "eval_loss": 0.6521313786506653, "eval_roc_auc": 0.6164883386994549, "eval_runtime": 39.6768, "eval_samples_per_second": 309.425, "eval_steps_per_second": 9.678, "step": 7500 }, { "epoch": 2.6058631921824107, "grad_norm": 1.49629807472229, "learning_rate": 3.944625407166123e-06, "loss": 0.6225, "step": 8000 }, { "epoch": 2.6058631921824107, "eval_accuracy": 0.31041785452472104, "eval_f1": 0.5854554068459078, "eval_loss": 0.6527394652366638, "eval_roc_auc": 0.6174151537937455, "eval_runtime": 39.706, "eval_samples_per_second": 309.197, "eval_steps_per_second": 9.671, "step": 8000 }, { "epoch": 2.768729641693811, "grad_norm": 1.7087451219558716, "learning_rate": 2.315960912052117e-06, "loss": 0.6223, "step": 8500 }, { "epoch": 2.768729641693811, "eval_accuracy": 0.30805571393662945, "eval_f1": 0.5916076167147815, "eval_loss": 0.6518492102622986, "eval_roc_auc": 0.6179083253618407, "eval_runtime": 39.5356, "eval_samples_per_second": 310.53, "eval_steps_per_second": 9.713, "step": 8500 }, { "epoch": 2.9315960912052117, "grad_norm": 1.7263160943984985, "learning_rate": 6.872964169381108e-07, "loss": 0.6224, "step": 9000 }, { "epoch": 2.9315960912052117, "eval_accuracy": 0.30813716706035676, "eval_f1": 0.5910419833937551, "eval_loss": 0.6514719128608704, "eval_roc_auc": 0.6182633220274373, "eval_runtime": 39.7919, "eval_samples_per_second": 308.53, "eval_steps_per_second": 9.65, "step": 9000 } ], "logging_steps": 500, "max_steps": 9210, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9772985975869440.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }