| { | |
| "best_metric": 0.04727424308657646, | |
| "best_model_checkpoint": "results_simple-latin-bert-uncased/epoch20_bs64/checkpoint-36", | |
| "epoch": 11.0, | |
| "eval_steps": 500, | |
| "global_step": 99, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7604417204856873, | |
| "learning_rate": 4.75e-05, | |
| "loss": 0.2645, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9775494672754946 | |
| }, | |
| "eval_f1": [ | |
| 0.9749575551782682, | |
| 0.7978910369068541, | |
| 0.9994264958898872 | |
| ], | |
| "eval_loss": 0.07480967044830322, | |
| "eval_precision": [ | |
| 0.968381112984823, | |
| 0.850187265917603, | |
| 0.9988536492166603 | |
| ], | |
| "eval_recall": [ | |
| 0.9816239316239316, | |
| 0.7516556291390728, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.8335, | |
| "eval_samples_per_second": 25.41, | |
| "eval_steps_per_second": 0.706, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.30367013812065125, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0564, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9813546423135464 | |
| }, | |
| "eval_f1": [ | |
| 0.9791400595998297, | |
| 0.8327645051194539, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.053640153259038925, | |
| "eval_precision": [ | |
| 0.9754028837998303, | |
| 0.8591549295774648, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9829059829059829, | |
| 0.8079470198675497, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.7042, | |
| "eval_samples_per_second": 26.625, | |
| "eval_steps_per_second": 0.74, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.28803956508636475, | |
| "learning_rate": 4.25e-05, | |
| "loss": 0.0447, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9823059360730594 | |
| }, | |
| "eval_f1": [ | |
| 0.9801916932907349, | |
| 0.8421052631578947, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.052741460502147675, | |
| "eval_precision": [ | |
| 0.9770700636942675, | |
| 0.8641114982578397, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9833333333333333, | |
| 0.8211920529801324, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6628, | |
| "eval_samples_per_second": 27.04, | |
| "eval_steps_per_second": 0.751, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.2971172630786896, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0377, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9811643835616438 | |
| }, | |
| "eval_f1": [ | |
| 0.9789227166276346, | |
| 0.8313458262350937, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.04727424308657646, | |
| "eval_precision": [ | |
| 0.9753924480271532, | |
| 0.856140350877193, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9824786324786324, | |
| 0.8079470198675497, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6802, | |
| "eval_samples_per_second": 26.864, | |
| "eval_steps_per_second": 0.746, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.32125240564346313, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0325, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9826864535768646 | |
| }, | |
| "eval_f1": [ | |
| 0.9806176783812567, | |
| 0.8455008488964346, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.04757271707057953, | |
| "eval_precision": [ | |
| 0.9774946921443737, | |
| 0.867595818815331, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9837606837606837, | |
| 0.8245033112582781, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6737, | |
| "eval_samples_per_second": 26.929, | |
| "eval_steps_per_second": 0.748, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.23932930827140808, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0276, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9828767123287672 | |
| }, | |
| "eval_f1": [ | |
| 0.98080204778157, | |
| 0.8489932885906041, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.04851287603378296, | |
| "eval_precision": [ | |
| 0.9791311754684838, | |
| 0.8605442176870748, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9824786324786324, | |
| 0.8377483443708609, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.7322, | |
| "eval_samples_per_second": 26.352, | |
| "eval_steps_per_second": 0.732, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.2584969997406006, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 0.0235, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9834474885844748 | |
| }, | |
| "eval_f1": [ | |
| 0.9814300960512273, | |
| 0.8547579298831386, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.04932190850377083, | |
| "eval_precision": [ | |
| 0.9803837953091684, | |
| 0.8619528619528619, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9824786324786324, | |
| 0.847682119205298, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.7945, | |
| "eval_samples_per_second": 25.765, | |
| "eval_steps_per_second": 0.716, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.22960752248764038, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0206, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9836377473363774 | |
| }, | |
| "eval_f1": [ | |
| 0.9816160752458315, | |
| 0.858085808580858, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.05238157883286476, | |
| "eval_precision": [ | |
| 0.9820359281437125, | |
| 0.8552631578947368, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9811965811965812, | |
| 0.8609271523178808, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6883, | |
| "eval_samples_per_second": 26.783, | |
| "eval_steps_per_second": 0.744, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.19476890563964844, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 0.0169, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9828767123287672 | |
| }, | |
| "eval_f1": [ | |
| 0.9807527801539777, | |
| 0.8519736842105263, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.055463388562202454, | |
| "eval_precision": [ | |
| 0.9815924657534246, | |
| 0.8464052287581699, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9799145299145299, | |
| 0.8576158940397351, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6823, | |
| "eval_samples_per_second": 26.843, | |
| "eval_steps_per_second": 0.746, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.2635619640350342, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0146, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9836377473363774 | |
| }, | |
| "eval_f1": [ | |
| 0.9816160752458315, | |
| 0.858085808580858, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.05970200523734093, | |
| "eval_precision": [ | |
| 0.9820359281437125, | |
| 0.8552631578947368, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9811965811965812, | |
| 0.8609271523178808, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.7111, | |
| "eval_samples_per_second": 26.558, | |
| "eval_steps_per_second": 0.738, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.23059502243995667, | |
| "learning_rate": 2.25e-05, | |
| "loss": 0.0125, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accurracy": { | |
| "accuracy": 0.9834474885844748 | |
| }, | |
| "eval_f1": [ | |
| 0.9813824095869891, | |
| 0.8576104746317512, | |
| 1.0 | |
| ], | |
| "eval_loss": 0.06442799419164658, | |
| "eval_precision": [ | |
| 0.9828546935276468, | |
| 0.8478964401294499, | |
| 1.0 | |
| ], | |
| "eval_recall": [ | |
| 0.9799145299145299, | |
| 0.8675496688741722, | |
| 1.0 | |
| ], | |
| "eval_runtime": 2.6437, | |
| "eval_samples_per_second": 27.234, | |
| "eval_steps_per_second": 0.757, | |
| "step": 99 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 180, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 267219177271200.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |