| { | |
| "best_global_step": null, | |
| "best_metric": 0.9240172831045173, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.263157894736842, | |
| "eval_steps": 16, | |
| "global_step": 96, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 82.16344451904297, | |
| "learning_rate": 5e-06, | |
| "loss": 3.8674, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 242.86280822753906, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 3.6908, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "eval_F1_err_corr": 0.5920247495103442, | |
| "eval_accuracy": 0.6310377809660449, | |
| "eval_correct_accuracy": 0.5115720882394246, | |
| "eval_error_accuracy": 0.7025044997996679, | |
| "eval_f1": 0.5395404356908385, | |
| "eval_loss": 0.8285790681838989, | |
| "eval_pr_auc": 0.5723630396883582, | |
| "eval_precision": 0.3904967602591793, | |
| "eval_recall": 0.8725868725868726, | |
| "eval_runtime": 23.3512, | |
| "eval_samples_per_second": 49.976, | |
| "eval_steps_per_second": 0.428, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 74.22534942626953, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 2.5223, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 64.61672973632812, | |
| "learning_rate": 1.99967206113942e-05, | |
| "loss": 1.9821, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "eval_F1_err_corr": 0.8456369286942332, | |
| "eval_accuracy": 0.8366810138689622, | |
| "eval_correct_accuracy": 0.898511262557064, | |
| "eval_error_accuracy": 0.7986396970115458, | |
| "eval_f1": 0.6812879141390574, | |
| "eval_loss": 0.3859586715698242, | |
| "eval_pr_auc": 0.7321071153896515, | |
| "eval_precision": 0.6594399277326106, | |
| "eval_recall": 0.7046332046332047, | |
| "eval_runtime": 23.1639, | |
| "eval_samples_per_second": 50.38, | |
| "eval_steps_per_second": 0.432, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 56.03799057006836, | |
| "learning_rate": 1.9982149887948264e-05, | |
| "loss": 1.787, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 43.37980651855469, | |
| "learning_rate": 1.995594042425798e-05, | |
| "loss": 1.4845, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "eval_F1_err_corr": 0.8872989542610511, | |
| "eval_accuracy": 0.8730272596843616, | |
| "eval_correct_accuracy": 0.9604554888142675, | |
| "eval_error_accuracy": 0.8244981013888578, | |
| "eval_f1": 0.730593607305936, | |
| "eval_loss": 0.30173778533935547, | |
| "eval_pr_auc": 0.7963278967873059, | |
| "eval_precision": 0.7700534759358288, | |
| "eval_recall": 0.694980694980695, | |
| "eval_runtime": 23.2199, | |
| "eval_samples_per_second": 50.259, | |
| "eval_steps_per_second": 0.431, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.3684210526315789, | |
| "grad_norm": 10.989681243896484, | |
| "learning_rate": 1.99181227793856e-05, | |
| "loss": 1.2391, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 19.56873893737793, | |
| "learning_rate": 1.9868741047013382e-05, | |
| "loss": 1.4107, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "eval_F1_err_corr": 0.8722315874998673, | |
| "eval_accuracy": 0.8766140602582496, | |
| "eval_correct_accuracy": 0.9661095393156461, | |
| "eval_error_accuracy": 0.7949822655705009, | |
| "eval_f1": 0.7024221453287197, | |
| "eval_loss": 0.29268908500671387, | |
| "eval_pr_auc": 0.8449069976179278, | |
| "eval_precision": 0.8724928366762178, | |
| "eval_recall": 0.5878378378378378, | |
| "eval_runtime": 23.2018, | |
| "eval_samples_per_second": 50.298, | |
| "eval_steps_per_second": 0.431, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.47368421052631576, | |
| "grad_norm": 26.890634536743164, | |
| "learning_rate": 1.9807852804032306e-05, | |
| "loss": 1.4247, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 22.178682327270508, | |
| "learning_rate": 1.9735529043410012e-05, | |
| "loss": 1.1194, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "eval_F1_err_corr": 0.9029221553098332, | |
| "eval_accuracy": 0.9050693448110951, | |
| "eval_correct_accuracy": 0.9454399276155002, | |
| "eval_error_accuracy": 0.8640639748307817, | |
| "eval_f1": 0.7975522692503825, | |
| "eval_loss": 0.239300936460495, | |
| "eval_pr_auc": 0.872941982874099, | |
| "eval_precision": 0.8454054054054054, | |
| "eval_recall": 0.7548262548262549, | |
| "eval_runtime": 23.1821, | |
| "eval_samples_per_second": 50.341, | |
| "eval_steps_per_second": 0.431, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5789473684210527, | |
| "grad_norm": 8.334005355834961, | |
| "learning_rate": 1.9651854091416175e-05, | |
| "loss": 1.0329, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 20.910919189453125, | |
| "learning_rate": 1.9556925509301844e-05, | |
| "loss": 0.9956, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "eval_F1_err_corr": 0.8796785116369793, | |
| "eval_accuracy": 0.8935915829746532, | |
| "eval_correct_accuracy": 0.9475924160656984, | |
| "eval_error_accuracy": 0.8208483486319621, | |
| "eval_f1": 0.7593293672255274, | |
| "eval_loss": 0.25272682309150696, | |
| "eval_pr_auc": 0.8738089681286181, | |
| "eval_precision": 0.8634686346863468, | |
| "eval_recall": 0.6776061776061776, | |
| "eval_runtime": 23.1794, | |
| "eval_samples_per_second": 50.346, | |
| "eval_steps_per_second": 0.431, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6842105263157895, | |
| "grad_norm": 22.64931869506836, | |
| "learning_rate": 1.9450853979547384e-05, | |
| "loss": 0.9445, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 9.232457160949707, | |
| "learning_rate": 1.9333763176811663e-05, | |
| "loss": 0.9956, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "eval_F1_err_corr": 0.9132652179840324, | |
| "eval_accuracy": 0.9122429459588713, | |
| "eval_correct_accuracy": 0.930750326857197, | |
| "eval_error_accuracy": 0.8964249469238965, | |
| "eval_f1": 0.8259838786154575, | |
| "eval_loss": 0.21841835975646973, | |
| "eval_pr_auc": 0.8865448688581825, | |
| "eval_precision": 0.8117427772600186, | |
| "eval_recall": 0.8407335907335908, | |
| "eval_runtime": 23.1656, | |
| "eval_samples_per_second": 50.376, | |
| "eval_steps_per_second": 0.432, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 11.506294250488281, | |
| "learning_rate": 1.9205789623732923e-05, | |
| "loss": 0.9999, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 14.85898494720459, | |
| "learning_rate": 1.9067082531749496e-05, | |
| "loss": 0.9873, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "eval_F1_err_corr": 0.9074790316918309, | |
| "eval_accuracy": 0.9057867049258728, | |
| "eval_correct_accuracy": 0.9265510387266113, | |
| "eval_error_accuracy": 0.8891763406206684, | |
| "eval_f1": 0.8187672493100276, | |
| "eval_loss": 0.23048239946365356, | |
| "eval_pr_auc": 0.8888016792056976, | |
| "eval_precision": 0.7820738137082601, | |
| "eval_recall": 0.859073359073359, | |
| "eval_runtime": 23.1547, | |
| "eval_samples_per_second": 50.4, | |
| "eval_steps_per_second": 0.432, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8947368421052632, | |
| "grad_norm": 12.407254219055176, | |
| "learning_rate": 1.891780362712594e-05, | |
| "loss": 0.9402, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 5.819011211395264, | |
| "learning_rate": 1.875812696238745e-05, | |
| "loss": 0.9055, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "eval_F1_err_corr": 0.9134620670871267, | |
| "eval_accuracy": 0.9120038259206121, | |
| "eval_correct_accuracy": 0.9335912631332478, | |
| "eval_error_accuracy": 0.894182563484034, | |
| "eval_f1": 0.8270676691729323, | |
| "eval_loss": 0.2183985859155655, | |
| "eval_pr_auc": 0.8924188619420418, | |
| "eval_precision": 0.8058608058608059, | |
| "eval_recall": 0.8494208494208494, | |
| "eval_runtime": 23.3258, | |
| "eval_samples_per_second": 50.03, | |
| "eval_steps_per_second": 0.429, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 12.771453857421875, | |
| "learning_rate": 1.85882387133824e-05, | |
| "loss": 0.9761, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 7.193549156188965, | |
| "learning_rate": 1.840833696220963e-05, | |
| "loss": 0.499, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "eval_F1_err_corr": 0.9079950528722031, | |
| "eval_accuracy": 0.9098517455762793, | |
| "eval_correct_accuracy": 0.9236024182207389, | |
| "eval_error_accuracy": 0.8929063998654336, | |
| "eval_f1": 0.824406148113647, | |
| "eval_loss": 0.2152077704668045, | |
| "eval_pr_auc": 0.8973152787211761, | |
| "eval_precision": 0.7965796579657966, | |
| "eval_recall": 0.8542471042471043, | |
| "eval_runtime": 23.1673, | |
| "eval_samples_per_second": 50.373, | |
| "eval_steps_per_second": 0.432, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 5.080722332000732, | |
| "learning_rate": 1.8218631466263584e-05, | |
| "loss": 0.4177, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 5.394437313079834, | |
| "learning_rate": 1.801934341366655e-05, | |
| "loss": 0.3945, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "eval_F1_err_corr": 0.9097585355242964, | |
| "eval_accuracy": 0.9158297465327594, | |
| "eval_correct_accuracy": 0.9179299538459843, | |
| "eval_error_accuracy": 0.9017313175664017, | |
| "eval_f1": 0.8367346938775511, | |
| "eval_loss": 0.23682163655757904, | |
| "eval_pr_auc": 0.8999781537818953, | |
| "eval_precision": 0.8053571428571429, | |
| "eval_recall": 0.8706563706563707, | |
| "eval_runtime": 23.1454, | |
| "eval_samples_per_second": 50.42, | |
| "eval_steps_per_second": 0.432, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 5.82737398147583, | |
| "learning_rate": 1.7810705165373245e-05, | |
| "loss": 0.3592, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 7.944681167602539, | |
| "learning_rate": 1.75929599842483e-05, | |
| "loss": 0.395, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "eval_F1_err_corr": 0.9240172831045173, | |
| "eval_accuracy": 0.9179818268770923, | |
| "eval_correct_accuracy": 0.9473624612073788, | |
| "eval_error_accuracy": 0.9017949916846976, | |
| "eval_f1": 0.8381311939594148, | |
| "eval_loss": 0.2285495400428772, | |
| "eval_pr_auc": 0.8914048907254241, | |
| "eval_precision": 0.8199445983379502, | |
| "eval_recall": 0.8571428571428571, | |
| "eval_runtime": 23.1568, | |
| "eval_samples_per_second": 50.396, | |
| "eval_steps_per_second": 0.432, | |
| "step": 96 | |
| } | |
| ], | |
| "logging_steps": 4, | |
| "max_steps": 380, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 16, | |
| "stateful_callbacks": { | |
| "MinEpochEarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.380768046815642e+16, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |