| { |
| "best_metric": 0.9155424324186734, |
| "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/hf/PDBE_V1/base/variant_tapt_freeze_llrd_LR_5e/checkpoint-5984", |
| "epoch": 20.0, |
| "eval_steps": 500, |
| "global_step": 7040, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.500258922576904, |
| "learning_rate": 9.886363636363637e-06, |
| "loss": 1.9616, |
| "step": 352 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.7942997075028909, |
| "eval_f1": 0.4435347694826792, |
| "eval_loss": 0.8090189695358276, |
| "eval_precision": 0.44396640032780166, |
| "eval_recall": 0.44310397709845617, |
| "eval_runtime": 4.7106, |
| "eval_samples_per_second": 256.019, |
| "eval_steps_per_second": 16.134, |
| "step": 352 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.6966164112091064, |
| "learning_rate": 1.9886363636363638e-05, |
| "loss": 0.5637, |
| "step": 704 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.907670679999093, |
| "eval_f1": 0.7394873848618342, |
| "eval_loss": 0.3523786664009094, |
| "eval_precision": 0.7244727807748896, |
| "eval_recall": 0.7551375115018915, |
| "eval_runtime": 4.7198, |
| "eval_samples_per_second": 255.517, |
| "eval_steps_per_second": 16.102, |
| "step": 704 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 3.4363410472869873, |
| "learning_rate": 1.8901515151515153e-05, |
| "loss": 0.2766, |
| "step": 1056 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.9382581683785683, |
| "eval_f1": 0.8332320162107396, |
| "eval_loss": 0.2291891872882843, |
| "eval_precision": 0.8257857214579777, |
| "eval_recall": 0.8408138227175136, |
| "eval_runtime": 4.7342, |
| "eval_samples_per_second": 254.741, |
| "eval_steps_per_second": 16.053, |
| "step": 1056 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 7.272336006164551, |
| "learning_rate": 1.779040404040404e-05, |
| "loss": 0.1736, |
| "step": 1408 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.9463301816202979, |
| "eval_f1": 0.8636762504516595, |
| "eval_loss": 0.21017228066921234, |
| "eval_precision": 0.872185154295246, |
| "eval_recall": 0.8553317656681321, |
| "eval_runtime": 4.6969, |
| "eval_samples_per_second": 256.763, |
| "eval_steps_per_second": 16.181, |
| "step": 1408 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 4.899765491485596, |
| "learning_rate": 1.667929292929293e-05, |
| "loss": 0.1209, |
| "step": 1760 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.9505929301861551, |
| "eval_f1": 0.8776264492937066, |
| "eval_loss": 0.19507275521755219, |
| "eval_precision": 0.8693079237713139, |
| "eval_recall": 0.8861057151620488, |
| "eval_runtime": 4.7971, |
| "eval_samples_per_second": 251.404, |
| "eval_steps_per_second": 15.843, |
| "step": 1760 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 6.236667156219482, |
| "learning_rate": 1.5568181818181822e-05, |
| "loss": 0.0846, |
| "step": 2112 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.9544248690565268, |
| "eval_f1": 0.8910820208631622, |
| "eval_loss": 0.1981223076581955, |
| "eval_precision": 0.8913554987212277, |
| "eval_recall": 0.8908087107657704, |
| "eval_runtime": 4.7584, |
| "eval_samples_per_second": 253.448, |
| "eval_steps_per_second": 15.972, |
| "step": 2112 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 2.797636032104492, |
| "learning_rate": 1.4457070707070708e-05, |
| "loss": 0.0641, |
| "step": 2464 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.9560120626714737, |
| "eval_f1": 0.8992974238875878, |
| "eval_loss": 0.1916956752538681, |
| "eval_precision": 0.8956495284453909, |
| "eval_recall": 0.9029751559145281, |
| "eval_runtime": 4.6977, |
| "eval_samples_per_second": 256.723, |
| "eval_steps_per_second": 16.178, |
| "step": 2464 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.630720317363739, |
| "learning_rate": 1.3349116161616163e-05, |
| "loss": 0.0485, |
| "step": 2816 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9574858853139242, |
| "eval_f1": 0.9026530715771318, |
| "eval_loss": 0.20014619827270508, |
| "eval_precision": 0.8957913813934756, |
| "eval_recall": 0.9096206931806564, |
| "eval_runtime": 4.7071, |
| "eval_samples_per_second": 256.208, |
| "eval_steps_per_second": 16.146, |
| "step": 2816 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 1.6925524473190308, |
| "learning_rate": 1.223800505050505e-05, |
| "loss": 0.0374, |
| "step": 3168 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.9577353014534159, |
| "eval_f1": 0.9029548674357417, |
| "eval_loss": 0.19619333744049072, |
| "eval_precision": 0.8938194931383352, |
| "eval_recall": 0.9122789080871077, |
| "eval_runtime": 4.7068, |
| "eval_samples_per_second": 256.224, |
| "eval_steps_per_second": 16.147, |
| "step": 3168 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 2.707470178604126, |
| "learning_rate": 1.1126893939393941e-05, |
| "loss": 0.0299, |
| "step": 3520 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.9590050563453734, |
| "eval_f1": 0.9061852020431902, |
| "eval_loss": 0.20695683360099792, |
| "eval_precision": 0.896617293835068, |
| "eval_recall": 0.915959513342194, |
| "eval_runtime": 4.8969, |
| "eval_samples_per_second": 246.279, |
| "eval_steps_per_second": 15.52, |
| "step": 3520 |
| }, |
| { |
| "epoch": 11.0, |
| "grad_norm": 1.546488881111145, |
| "learning_rate": 1.001578282828283e-05, |
| "loss": 0.0238, |
| "step": 3872 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.9591411015123688, |
| "eval_f1": 0.909967030180066, |
| "eval_loss": 0.2103826105594635, |
| "eval_precision": 0.9029595329172538, |
| "eval_recall": 0.9170841427256927, |
| "eval_runtime": 4.7212, |
| "eval_samples_per_second": 255.442, |
| "eval_steps_per_second": 16.098, |
| "step": 3872 |
| }, |
| { |
| "epoch": 12.0, |
| "grad_norm": 3.5837290287017822, |
| "learning_rate": 8.904671717171718e-06, |
| "loss": 0.0195, |
| "step": 4224 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.9592998208738635, |
| "eval_f1": 0.9081518987341772, |
| "eval_loss": 0.21696116030216217, |
| "eval_precision": 0.8995887250476478, |
| "eval_recall": 0.9168796646559656, |
| "eval_runtime": 4.7316, |
| "eval_samples_per_second": 254.88, |
| "eval_steps_per_second": 16.062, |
| "step": 4224 |
| }, |
| { |
| "epoch": 13.0, |
| "grad_norm": 1.4384242296218872, |
| "learning_rate": 7.793560606060607e-06, |
| "loss": 0.0158, |
| "step": 4576 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.9607509693218148, |
| "eval_f1": 0.911646790154783, |
| "eval_loss": 0.21840226650238037, |
| "eval_precision": 0.9050785973397824, |
| "eval_recall": 0.9183110111440548, |
| "eval_runtime": 4.7576, |
| "eval_samples_per_second": 253.492, |
| "eval_steps_per_second": 15.975, |
| "step": 4576 |
| }, |
| { |
| "epoch": 14.0, |
| "grad_norm": 0.07870839536190033, |
| "learning_rate": 6.685606060606061e-06, |
| "loss": 0.013, |
| "step": 4928 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.9601614402648346, |
| "eval_f1": 0.9112886910782925, |
| "eval_loss": 0.225505068898201, |
| "eval_precision": 0.9017115403863477, |
| "eval_recall": 0.9210714650853696, |
| "eval_runtime": 4.7173, |
| "eval_samples_per_second": 255.653, |
| "eval_steps_per_second": 16.111, |
| "step": 4928 |
| }, |
| { |
| "epoch": 15.0, |
| "grad_norm": 0.22939985990524292, |
| "learning_rate": 5.574494949494949e-06, |
| "loss": 0.011, |
| "step": 5280 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.9600027209033399, |
| "eval_f1": 0.911293592862936, |
| "eval_loss": 0.23664474487304688, |
| "eval_precision": 0.9036895546395898, |
| "eval_recall": 0.9190266843880994, |
| "eval_runtime": 4.6625, |
| "eval_samples_per_second": 258.659, |
| "eval_steps_per_second": 16.3, |
| "step": 5280 |
| }, |
| { |
| "epoch": 16.0, |
| "grad_norm": 1.5021331310272217, |
| "learning_rate": 4.463383838383838e-06, |
| "loss": 0.0101, |
| "step": 5632 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.9613404983787951, |
| "eval_f1": 0.9153041535493043, |
| "eval_loss": 0.23169253766536713, |
| "eval_precision": 0.909210128114597, |
| "eval_recall": 0.9214804212248237, |
| "eval_runtime": 4.7345, |
| "eval_samples_per_second": 254.724, |
| "eval_steps_per_second": 16.052, |
| "step": 5632 |
| }, |
| { |
| "epoch": 17.0, |
| "grad_norm": 2.7792775630950928, |
| "learning_rate": 3.352272727272727e-06, |
| "loss": 0.0083, |
| "step": 5984 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.9616579371017845, |
| "eval_f1": 0.9155424324186734, |
| "eval_loss": 0.2343681901693344, |
| "eval_precision": 0.9117825998783208, |
| "eval_recall": 0.9193334014926899, |
| "eval_runtime": 4.7707, |
| "eval_samples_per_second": 252.794, |
| "eval_steps_per_second": 15.931, |
| "step": 5984 |
| }, |
| { |
| "epoch": 18.0, |
| "grad_norm": 0.13295000791549683, |
| "learning_rate": 2.2411616161616163e-06, |
| "loss": 0.007, |
| "step": 6336 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.9614538693512913, |
| "eval_f1": 0.9152025997765817, |
| "eval_loss": 0.23998132348060608, |
| "eval_precision": 0.9091092504791688, |
| "eval_recall": 0.9213781821899601, |
| "eval_runtime": 4.7197, |
| "eval_samples_per_second": 255.526, |
| "eval_steps_per_second": 16.103, |
| "step": 6336 |
| }, |
| { |
| "epoch": 19.0, |
| "grad_norm": 0.09591899812221527, |
| "learning_rate": 1.1300505050505053e-06, |
| "loss": 0.0067, |
| "step": 6688 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9611364306283019, |
| "eval_f1": 0.9143581938102486, |
| "eval_loss": 0.2395462840795517, |
| "eval_precision": 0.9075435592708229, |
| "eval_recall": 0.9212759431550966, |
| "eval_runtime": 4.7907, |
| "eval_samples_per_second": 251.737, |
| "eval_steps_per_second": 15.864, |
| "step": 6688 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.8892824649810791, |
| "learning_rate": 1.893939393939394e-08, |
| "loss": 0.0063, |
| "step": 7040 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_accuracy": 0.9611364306283019, |
| "eval_f1": 0.9145468392993145, |
| "eval_loss": 0.24203987419605255, |
| "eval_precision": 0.9084123461771233, |
| "eval_recall": 0.920764747980779, |
| "eval_runtime": 4.8931, |
| "eval_samples_per_second": 246.467, |
| "eval_steps_per_second": 15.532, |
| "step": 7040 |
| }, |
| { |
| "epoch": 20.0, |
| "step": 7040, |
| "total_flos": 2304092004793320.0, |
| "train_loss": 0.1741130460731008, |
| "train_runtime": 917.4844, |
| "train_samples_per_second": 122.705, |
| "train_steps_per_second": 7.673 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 7040, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2304092004793320.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|