{ "best_global_step": null, "best_metric": 0.9240172831045173, "best_model_checkpoint": null, "epoch": 1.263157894736842, "eval_steps": 16, "global_step": 96, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 82.16344451904297, "learning_rate": 5e-06, "loss": 3.8674, "step": 4 }, { "epoch": 0.10526315789473684, "grad_norm": 242.86280822753906, "learning_rate": 1.1666666666666668e-05, "loss": 3.6908, "step": 8 }, { "epoch": 0.10526315789473684, "eval_F1_err_corr": 0.5920247495103442, "eval_accuracy": 0.6310377809660449, "eval_correct_accuracy": 0.5115720882394246, "eval_error_accuracy": 0.7025044997996679, "eval_f1": 0.5395404356908385, "eval_loss": 0.8285790681838989, "eval_pr_auc": 0.5723630396883582, "eval_precision": 0.3904967602591793, "eval_recall": 0.8725868725868726, "eval_runtime": 23.3512, "eval_samples_per_second": 49.976, "eval_steps_per_second": 0.428, "step": 8 }, { "epoch": 0.15789473684210525, "grad_norm": 74.22534942626953, "learning_rate": 1.8333333333333333e-05, "loss": 2.5223, "step": 12 }, { "epoch": 0.21052631578947367, "grad_norm": 64.61672973632812, "learning_rate": 1.99967206113942e-05, "loss": 1.9821, "step": 16 }, { "epoch": 0.21052631578947367, "eval_F1_err_corr": 0.8456369286942332, "eval_accuracy": 0.8366810138689622, "eval_correct_accuracy": 0.898511262557064, "eval_error_accuracy": 0.7986396970115458, "eval_f1": 0.6812879141390574, "eval_loss": 0.3859586715698242, "eval_pr_auc": 0.7321071153896515, "eval_precision": 0.6594399277326106, "eval_recall": 0.7046332046332047, "eval_runtime": 23.1639, "eval_samples_per_second": 50.38, "eval_steps_per_second": 0.432, "step": 16 }, { "epoch": 0.2631578947368421, "grad_norm": 56.03799057006836, "learning_rate": 1.9982149887948264e-05, "loss": 1.787, "step": 20 }, { "epoch": 0.3157894736842105, "grad_norm": 43.37980651855469, "learning_rate": 1.995594042425798e-05, "loss": 1.4845, "step": 24 }, { "epoch": 0.3157894736842105, "eval_F1_err_corr": 0.8872989542610511, "eval_accuracy": 0.8730272596843616, "eval_correct_accuracy": 0.9604554888142675, "eval_error_accuracy": 0.8244981013888578, "eval_f1": 0.730593607305936, "eval_loss": 0.30173778533935547, "eval_pr_auc": 0.7963278967873059, "eval_precision": 0.7700534759358288, "eval_recall": 0.694980694980695, "eval_runtime": 23.2199, "eval_samples_per_second": 50.259, "eval_steps_per_second": 0.431, "step": 24 }, { "epoch": 0.3684210526315789, "grad_norm": 10.989681243896484, "learning_rate": 1.99181227793856e-05, "loss": 1.2391, "step": 28 }, { "epoch": 0.42105263157894735, "grad_norm": 19.56873893737793, "learning_rate": 1.9868741047013382e-05, "loss": 1.4107, "step": 32 }, { "epoch": 0.42105263157894735, "eval_F1_err_corr": 0.8722315874998673, "eval_accuracy": 0.8766140602582496, "eval_correct_accuracy": 0.9661095393156461, "eval_error_accuracy": 0.7949822655705009, "eval_f1": 0.7024221453287197, "eval_loss": 0.29268908500671387, "eval_pr_auc": 0.8449069976179278, "eval_precision": 0.8724928366762178, "eval_recall": 0.5878378378378378, "eval_runtime": 23.2018, "eval_samples_per_second": 50.298, "eval_steps_per_second": 0.431, "step": 32 }, { "epoch": 0.47368421052631576, "grad_norm": 26.890634536743164, "learning_rate": 1.9807852804032306e-05, "loss": 1.4247, "step": 36 }, { "epoch": 0.5263157894736842, "grad_norm": 22.178682327270508, "learning_rate": 1.9735529043410012e-05, "loss": 1.1194, "step": 40 }, { "epoch": 0.5263157894736842, "eval_F1_err_corr": 0.9029221553098332, "eval_accuracy": 0.9050693448110951, "eval_correct_accuracy": 0.9454399276155002, "eval_error_accuracy": 0.8640639748307817, "eval_f1": 0.7975522692503825, "eval_loss": 0.239300936460495, "eval_pr_auc": 0.872941982874099, "eval_precision": 0.8454054054054054, "eval_recall": 0.7548262548262549, "eval_runtime": 23.1821, "eval_samples_per_second": 50.341, "eval_steps_per_second": 0.431, "step": 40 }, { "epoch": 0.5789473684210527, "grad_norm": 8.334005355834961, "learning_rate": 1.9651854091416175e-05, "loss": 1.0329, "step": 44 }, { "epoch": 0.631578947368421, "grad_norm": 20.910919189453125, "learning_rate": 1.9556925509301844e-05, "loss": 0.9956, "step": 48 }, { "epoch": 0.631578947368421, "eval_F1_err_corr": 0.8796785116369793, "eval_accuracy": 0.8935915829746532, "eval_correct_accuracy": 0.9475924160656984, "eval_error_accuracy": 0.8208483486319621, "eval_f1": 0.7593293672255274, "eval_loss": 0.25272682309150696, "eval_pr_auc": 0.8738089681286181, "eval_precision": 0.8634686346863468, "eval_recall": 0.6776061776061776, "eval_runtime": 23.1794, "eval_samples_per_second": 50.346, "eval_steps_per_second": 0.431, "step": 48 }, { "epoch": 0.6842105263157895, "grad_norm": 22.64931869506836, "learning_rate": 1.9450853979547384e-05, "loss": 0.9445, "step": 52 }, { "epoch": 0.7368421052631579, "grad_norm": 9.232457160949707, "learning_rate": 1.9333763176811663e-05, "loss": 0.9956, "step": 56 }, { "epoch": 0.7368421052631579, "eval_F1_err_corr": 0.9132652179840324, "eval_accuracy": 0.9122429459588713, "eval_correct_accuracy": 0.930750326857197, "eval_error_accuracy": 0.8964249469238965, "eval_f1": 0.8259838786154575, "eval_loss": 0.21841835975646973, "eval_pr_auc": 0.8865448688581825, "eval_precision": 0.8117427772600186, "eval_recall": 0.8407335907335908, "eval_runtime": 23.1656, "eval_samples_per_second": 50.376, "eval_steps_per_second": 0.432, "step": 56 }, { "epoch": 0.7894736842105263, "grad_norm": 11.506294250488281, "learning_rate": 1.9205789623732923e-05, "loss": 0.9999, "step": 60 }, { "epoch": 0.8421052631578947, "grad_norm": 14.85898494720459, "learning_rate": 1.9067082531749496e-05, "loss": 0.9873, "step": 64 }, { "epoch": 0.8421052631578947, "eval_F1_err_corr": 0.9074790316918309, "eval_accuracy": 0.9057867049258728, "eval_correct_accuracy": 0.9265510387266113, "eval_error_accuracy": 0.8891763406206684, "eval_f1": 0.8187672493100276, "eval_loss": 0.23048239946365356, "eval_pr_auc": 0.8888016792056976, "eval_precision": 0.7820738137082601, "eval_recall": 0.859073359073359, "eval_runtime": 23.1547, "eval_samples_per_second": 50.4, "eval_steps_per_second": 0.432, "step": 64 }, { "epoch": 0.8947368421052632, "grad_norm": 12.407254219055176, "learning_rate": 1.891780362712594e-05, "loss": 0.9402, "step": 68 }, { "epoch": 0.9473684210526315, "grad_norm": 5.819011211395264, "learning_rate": 1.875812696238745e-05, "loss": 0.9055, "step": 72 }, { "epoch": 0.9473684210526315, "eval_F1_err_corr": 0.9134620670871267, "eval_accuracy": 0.9120038259206121, "eval_correct_accuracy": 0.9335912631332478, "eval_error_accuracy": 0.894182563484034, "eval_f1": 0.8270676691729323, "eval_loss": 0.2183985859155655, "eval_pr_auc": 0.8924188619420418, "eval_precision": 0.8058608058608059, "eval_recall": 0.8494208494208494, "eval_runtime": 23.3258, "eval_samples_per_second": 50.03, "eval_steps_per_second": 0.429, "step": 72 }, { "epoch": 1.0, "grad_norm": 12.771453857421875, "learning_rate": 1.85882387133824e-05, "loss": 0.9761, "step": 76 }, { "epoch": 1.0526315789473684, "grad_norm": 7.193549156188965, "learning_rate": 1.840833696220963e-05, "loss": 0.499, "step": 80 }, { "epoch": 1.0526315789473684, "eval_F1_err_corr": 0.9079950528722031, "eval_accuracy": 0.9098517455762793, "eval_correct_accuracy": 0.9236024182207389, "eval_error_accuracy": 0.8929063998654336, "eval_f1": 0.824406148113647, "eval_loss": 0.2152077704668045, "eval_pr_auc": 0.8973152787211761, "eval_precision": 0.7965796579657966, "eval_recall": 0.8542471042471043, "eval_runtime": 23.1673, "eval_samples_per_second": 50.373, "eval_steps_per_second": 0.432, "step": 80 }, { "epoch": 1.1052631578947367, "grad_norm": 5.080722332000732, "learning_rate": 1.8218631466263584e-05, "loss": 0.4177, "step": 84 }, { "epoch": 1.1578947368421053, "grad_norm": 5.394437313079834, "learning_rate": 1.801934341366655e-05, "loss": 0.3945, "step": 88 }, { "epoch": 1.1578947368421053, "eval_F1_err_corr": 0.9097585355242964, "eval_accuracy": 0.9158297465327594, "eval_correct_accuracy": 0.9179299538459843, "eval_error_accuracy": 0.9017313175664017, "eval_f1": 0.8367346938775511, "eval_loss": 0.23682163655757904, "eval_pr_auc": 0.8999781537818953, "eval_precision": 0.8053571428571429, "eval_recall": 0.8706563706563707, "eval_runtime": 23.1454, "eval_samples_per_second": 50.42, "eval_steps_per_second": 0.432, "step": 88 }, { "epoch": 1.2105263157894737, "grad_norm": 5.82737398147583, "learning_rate": 1.7810705165373245e-05, "loss": 0.3592, "step": 92 }, { "epoch": 1.263157894736842, "grad_norm": 7.944681167602539, "learning_rate": 1.75929599842483e-05, "loss": 0.395, "step": 96 }, { "epoch": 1.263157894736842, "eval_F1_err_corr": 0.9240172831045173, "eval_accuracy": 0.9179818268770923, "eval_correct_accuracy": 0.9473624612073788, "eval_error_accuracy": 0.9017949916846976, "eval_f1": 0.8381311939594148, "eval_loss": 0.2285495400428772, "eval_pr_auc": 0.8914048907254241, "eval_precision": 0.8199445983379502, "eval_recall": 0.8571428571428571, "eval_runtime": 23.1568, "eval_samples_per_second": 50.396, "eval_steps_per_second": 0.432, "step": 96 } ], "logging_steps": 4, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 16, "stateful_callbacks": { "MinEpochEarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.380768046815642e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }