| { | |
| "best_global_step": 17000, | |
| "best_metric": 0.8946104799920928, | |
| "best_model_checkpoint": "banBERT-Base-pos/checkpoint-17000", | |
| "epoch": 0.9946755602363817, | |
| "eval_steps": 1000, | |
| "global_step": 17000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05851032707272834, | |
| "grad_norm": 4.870177745819092, | |
| "learning_rate": 4.952324665090623e-05, | |
| "loss": 0.5681, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.05851032707272834, | |
| "eval_f1": 0.8762604390800369, | |
| "eval_loss": 0.26431742310523987, | |
| "eval_precision": 0.8742650838051628, | |
| "eval_recall": 0.8796261556029517, | |
| "eval_runtime": 28.9994, | |
| "eval_samples_per_second": 261.178, | |
| "eval_steps_per_second": 32.656, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11702065414545668, | |
| "grad_norm": 6.504812240600586, | |
| "learning_rate": 4.853821907013397e-05, | |
| "loss": 0.2821, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.11702065414545668, | |
| "eval_f1": 0.8747857038906649, | |
| "eval_loss": 0.2531687021255493, | |
| "eval_precision": 0.8826666555035545, | |
| "eval_recall": 0.8742214967913846, | |
| "eval_runtime": 29.3663, | |
| "eval_samples_per_second": 257.915, | |
| "eval_steps_per_second": 32.248, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.175530981218185, | |
| "grad_norm": 2.781933307647705, | |
| "learning_rate": 4.7553191489361704e-05, | |
| "loss": 0.2697, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.175530981218185, | |
| "eval_f1": 0.8666467093588278, | |
| "eval_loss": 0.2597614824771881, | |
| "eval_precision": 0.8736361925736554, | |
| "eval_recall": 0.8661208831213065, | |
| "eval_runtime": 29.0115, | |
| "eval_samples_per_second": 261.068, | |
| "eval_steps_per_second": 32.642, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.23404130829091335, | |
| "grad_norm": 3.3094711303710938, | |
| "learning_rate": 4.656816390858944e-05, | |
| "loss": 0.2659, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.23404130829091335, | |
| "eval_f1": 0.8798149861336466, | |
| "eval_loss": 0.24752835929393768, | |
| "eval_precision": 0.8764034593493383, | |
| "eval_recall": 0.8851085241178042, | |
| "eval_runtime": 29.6951, | |
| "eval_samples_per_second": 255.059, | |
| "eval_steps_per_second": 31.891, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2925516353636417, | |
| "grad_norm": 2.068115234375, | |
| "learning_rate": 4.5583136327817186e-05, | |
| "loss": 0.2608, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2925516353636417, | |
| "eval_f1": 0.8836360762583084, | |
| "eval_loss": 0.23901157081127167, | |
| "eval_precision": 0.8866831487439992, | |
| "eval_recall": 0.8816467698684325, | |
| "eval_runtime": 29.6967, | |
| "eval_samples_per_second": 255.046, | |
| "eval_steps_per_second": 31.889, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.35106196243637, | |
| "grad_norm": 2.984527111053467, | |
| "learning_rate": 4.459810874704492e-05, | |
| "loss": 0.2548, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.35106196243637, | |
| "eval_f1": 0.8786857503892376, | |
| "eval_loss": 0.24374601244926453, | |
| "eval_precision": 0.8808030269623512, | |
| "eval_recall": 0.878183658913097, | |
| "eval_runtime": 29.5386, | |
| "eval_samples_per_second": 256.41, | |
| "eval_steps_per_second": 32.06, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.40957228950909835, | |
| "grad_norm": 3.5354621410369873, | |
| "learning_rate": 4.3613081166272655e-05, | |
| "loss": 0.2535, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.40957228950909835, | |
| "eval_f1": 0.8885007809186033, | |
| "eval_loss": 0.23431703448295593, | |
| "eval_precision": 0.8823384881830838, | |
| "eval_recall": 0.8961195227790774, | |
| "eval_runtime": 28.9987, | |
| "eval_samples_per_second": 261.184, | |
| "eval_steps_per_second": 32.657, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.4680826165818267, | |
| "grad_norm": 3.906360626220703, | |
| "learning_rate": 4.2628053585500396e-05, | |
| "loss": 0.255, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.4680826165818267, | |
| "eval_f1": 0.8796937707178186, | |
| "eval_loss": 0.23554810881614685, | |
| "eval_precision": 0.8812208287855526, | |
| "eval_recall": 0.880771461292737, | |
| "eval_runtime": 30.6899, | |
| "eval_samples_per_second": 246.791, | |
| "eval_steps_per_second": 30.857, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.526592943654555, | |
| "grad_norm": 3.616974353790283, | |
| "learning_rate": 4.1644011032308906e-05, | |
| "loss": 0.2457, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.526592943654555, | |
| "eval_f1": 0.8853991026903275, | |
| "eval_loss": 0.2313537746667862, | |
| "eval_precision": 0.8879722004510241, | |
| "eval_recall": 0.8847937369742619, | |
| "eval_runtime": 29.3159, | |
| "eval_samples_per_second": 258.358, | |
| "eval_steps_per_second": 32.303, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.5851032707272834, | |
| "grad_norm": 3.81512713432312, | |
| "learning_rate": 4.065898345153665e-05, | |
| "loss": 0.2527, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.5851032707272834, | |
| "eval_f1": 0.8797925096491191, | |
| "eval_loss": 0.2423781007528305, | |
| "eval_precision": 0.8750872391747755, | |
| "eval_recall": 0.8881566716696347, | |
| "eval_runtime": 30.0187, | |
| "eval_samples_per_second": 252.31, | |
| "eval_steps_per_second": 31.547, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.6436135978000117, | |
| "grad_norm": 2.189429521560669, | |
| "learning_rate": 3.967494089834515e-05, | |
| "loss": 0.2446, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.6436135978000117, | |
| "eval_f1": 0.8923392397004614, | |
| "eval_loss": 0.22613154351711273, | |
| "eval_precision": 0.8840821992199377, | |
| "eval_recall": 0.9019369581982146, | |
| "eval_runtime": 30.4237, | |
| "eval_samples_per_second": 248.951, | |
| "eval_steps_per_second": 31.127, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.70212392487274, | |
| "grad_norm": 3.8916683197021484, | |
| "learning_rate": 3.86899133175729e-05, | |
| "loss": 0.2496, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.70212392487274, | |
| "eval_f1": 0.8838883711908031, | |
| "eval_loss": 0.2282320261001587, | |
| "eval_precision": 0.8870449139456895, | |
| "eval_recall": 0.881842969912727, | |
| "eval_runtime": 28.9189, | |
| "eval_samples_per_second": 261.905, | |
| "eval_steps_per_second": 32.747, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.7606342519454684, | |
| "grad_norm": 2.145176887512207, | |
| "learning_rate": 3.770587076438141e-05, | |
| "loss": 0.2439, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.7606342519454684, | |
| "eval_f1": 0.8769622250324842, | |
| "eval_loss": 0.2252449244260788, | |
| "eval_precision": 0.887928073773863, | |
| "eval_recall": 0.8695732334120527, | |
| "eval_runtime": 29.4593, | |
| "eval_samples_per_second": 257.1, | |
| "eval_steps_per_second": 32.146, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.8191445790181967, | |
| "grad_norm": 1.7703274488449097, | |
| "learning_rate": 3.672084318360914e-05, | |
| "loss": 0.243, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.8191445790181967, | |
| "eval_f1": 0.879454001823476, | |
| "eval_loss": 0.22599145770072937, | |
| "eval_precision": 0.8828080832134976, | |
| "eval_recall": 0.8786664905760042, | |
| "eval_runtime": 29.753, | |
| "eval_samples_per_second": 254.562, | |
| "eval_steps_per_second": 31.829, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.877654906090925, | |
| "grad_norm": 4.403445243835449, | |
| "learning_rate": 3.573581560283688e-05, | |
| "loss": 0.2427, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.877654906090925, | |
| "eval_f1": 0.8888557192580028, | |
| "eval_loss": 0.22790007293224335, | |
| "eval_precision": 0.8901524013282429, | |
| "eval_recall": 0.8884489266872372, | |
| "eval_runtime": 31.5334, | |
| "eval_samples_per_second": 240.19, | |
| "eval_steps_per_second": 30.032, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.9361652331636534, | |
| "grad_norm": 6.978273868560791, | |
| "learning_rate": 3.4751773049645395e-05, | |
| "loss": 0.2371, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.9361652331636534, | |
| "eval_f1": 0.8933984344595972, | |
| "eval_loss": 0.22917409241199493, | |
| "eval_precision": 0.8891663922870625, | |
| "eval_recall": 0.8998411225843156, | |
| "eval_runtime": 29.6746, | |
| "eval_samples_per_second": 255.235, | |
| "eval_steps_per_second": 31.913, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.9946755602363817, | |
| "grad_norm": 4.8312859535217285, | |
| "learning_rate": 3.376674546887313e-05, | |
| "loss": 0.2405, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.9946755602363817, | |
| "eval_f1": 0.8946104799920928, | |
| "eval_loss": 0.2225894331932068, | |
| "eval_precision": 0.8872567105766707, | |
| "eval_recall": 0.903381031095196, | |
| "eval_runtime": 29.592, | |
| "eval_samples_per_second": 255.948, | |
| "eval_steps_per_second": 32.002, | |
| "step": 17000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 51273, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4442566947840000.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |