| { |
| "best_global_step": null, |
| "best_metric": 0.9240172831045173, |
| "best_model_checkpoint": null, |
| "epoch": 1.263157894736842, |
| "eval_steps": 16, |
| "global_step": 96, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05263157894736842, |
| "grad_norm": 82.16344451904297, |
| "learning_rate": 5e-06, |
| "loss": 3.8674, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "grad_norm": 242.86280822753906, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 3.6908, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.10526315789473684, |
| "eval_F1_err_corr": 0.5920247495103442, |
| "eval_accuracy": 0.6310377809660449, |
| "eval_correct_accuracy": 0.5115720882394246, |
| "eval_error_accuracy": 0.7025044997996679, |
| "eval_f1": 0.5395404356908385, |
| "eval_loss": 0.8285790681838989, |
| "eval_pr_auc": 0.5723630396883582, |
| "eval_precision": 0.3904967602591793, |
| "eval_recall": 0.8725868725868726, |
| "eval_runtime": 23.3512, |
| "eval_samples_per_second": 49.976, |
| "eval_steps_per_second": 0.428, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.15789473684210525, |
| "grad_norm": 74.22534942626953, |
| "learning_rate": 1.8333333333333333e-05, |
| "loss": 2.5223, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "grad_norm": 64.61672973632812, |
| "learning_rate": 1.99967206113942e-05, |
| "loss": 1.9821, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.21052631578947367, |
| "eval_F1_err_corr": 0.8456369286942332, |
| "eval_accuracy": 0.8366810138689622, |
| "eval_correct_accuracy": 0.898511262557064, |
| "eval_error_accuracy": 0.7986396970115458, |
| "eval_f1": 0.6812879141390574, |
| "eval_loss": 0.3859586715698242, |
| "eval_pr_auc": 0.7321071153896515, |
| "eval_precision": 0.6594399277326106, |
| "eval_recall": 0.7046332046332047, |
| "eval_runtime": 23.1639, |
| "eval_samples_per_second": 50.38, |
| "eval_steps_per_second": 0.432, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.2631578947368421, |
| "grad_norm": 56.03799057006836, |
| "learning_rate": 1.9982149887948264e-05, |
| "loss": 1.787, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "grad_norm": 43.37980651855469, |
| "learning_rate": 1.995594042425798e-05, |
| "loss": 1.4845, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3157894736842105, |
| "eval_F1_err_corr": 0.8872989542610511, |
| "eval_accuracy": 0.8730272596843616, |
| "eval_correct_accuracy": 0.9604554888142675, |
| "eval_error_accuracy": 0.8244981013888578, |
| "eval_f1": 0.730593607305936, |
| "eval_loss": 0.30173778533935547, |
| "eval_pr_auc": 0.7963278967873059, |
| "eval_precision": 0.7700534759358288, |
| "eval_recall": 0.694980694980695, |
| "eval_runtime": 23.2199, |
| "eval_samples_per_second": 50.259, |
| "eval_steps_per_second": 0.431, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3684210526315789, |
| "grad_norm": 10.989681243896484, |
| "learning_rate": 1.99181227793856e-05, |
| "loss": 1.2391, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 19.56873893737793, |
| "learning_rate": 1.9868741047013382e-05, |
| "loss": 1.4107, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "eval_F1_err_corr": 0.8722315874998673, |
| "eval_accuracy": 0.8766140602582496, |
| "eval_correct_accuracy": 0.9661095393156461, |
| "eval_error_accuracy": 0.7949822655705009, |
| "eval_f1": 0.7024221453287197, |
| "eval_loss": 0.29268908500671387, |
| "eval_pr_auc": 0.8449069976179278, |
| "eval_precision": 0.8724928366762178, |
| "eval_recall": 0.5878378378378378, |
| "eval_runtime": 23.2018, |
| "eval_samples_per_second": 50.298, |
| "eval_steps_per_second": 0.431, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.47368421052631576, |
| "grad_norm": 26.890634536743164, |
| "learning_rate": 1.9807852804032306e-05, |
| "loss": 1.4247, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 22.178682327270508, |
| "learning_rate": 1.9735529043410012e-05, |
| "loss": 1.1194, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "eval_F1_err_corr": 0.9029221553098332, |
| "eval_accuracy": 0.9050693448110951, |
| "eval_correct_accuracy": 0.9454399276155002, |
| "eval_error_accuracy": 0.8640639748307817, |
| "eval_f1": 0.7975522692503825, |
| "eval_loss": 0.239300936460495, |
| "eval_pr_auc": 0.872941982874099, |
| "eval_precision": 0.8454054054054054, |
| "eval_recall": 0.7548262548262549, |
| "eval_runtime": 23.1821, |
| "eval_samples_per_second": 50.341, |
| "eval_steps_per_second": 0.431, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5789473684210527, |
| "grad_norm": 8.334005355834961, |
| "learning_rate": 1.9651854091416175e-05, |
| "loss": 1.0329, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "grad_norm": 20.910919189453125, |
| "learning_rate": 1.9556925509301844e-05, |
| "loss": 0.9956, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.631578947368421, |
| "eval_F1_err_corr": 0.8796785116369793, |
| "eval_accuracy": 0.8935915829746532, |
| "eval_correct_accuracy": 0.9475924160656984, |
| "eval_error_accuracy": 0.8208483486319621, |
| "eval_f1": 0.7593293672255274, |
| "eval_loss": 0.25272682309150696, |
| "eval_pr_auc": 0.8738089681286181, |
| "eval_precision": 0.8634686346863468, |
| "eval_recall": 0.6776061776061776, |
| "eval_runtime": 23.1794, |
| "eval_samples_per_second": 50.346, |
| "eval_steps_per_second": 0.431, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.6842105263157895, |
| "grad_norm": 22.64931869506836, |
| "learning_rate": 1.9450853979547384e-05, |
| "loss": 0.9445, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "grad_norm": 9.232457160949707, |
| "learning_rate": 1.9333763176811663e-05, |
| "loss": 0.9956, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7368421052631579, |
| "eval_F1_err_corr": 0.9132652179840324, |
| "eval_accuracy": 0.9122429459588713, |
| "eval_correct_accuracy": 0.930750326857197, |
| "eval_error_accuracy": 0.8964249469238965, |
| "eval_f1": 0.8259838786154575, |
| "eval_loss": 0.21841835975646973, |
| "eval_pr_auc": 0.8865448688581825, |
| "eval_precision": 0.8117427772600186, |
| "eval_recall": 0.8407335907335908, |
| "eval_runtime": 23.1656, |
| "eval_samples_per_second": 50.376, |
| "eval_steps_per_second": 0.432, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.7894736842105263, |
| "grad_norm": 11.506294250488281, |
| "learning_rate": 1.9205789623732923e-05, |
| "loss": 0.9999, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 14.85898494720459, |
| "learning_rate": 1.9067082531749496e-05, |
| "loss": 0.9873, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "eval_F1_err_corr": 0.9074790316918309, |
| "eval_accuracy": 0.9057867049258728, |
| "eval_correct_accuracy": 0.9265510387266113, |
| "eval_error_accuracy": 0.8891763406206684, |
| "eval_f1": 0.8187672493100276, |
| "eval_loss": 0.23048239946365356, |
| "eval_pr_auc": 0.8888016792056976, |
| "eval_precision": 0.7820738137082601, |
| "eval_recall": 0.859073359073359, |
| "eval_runtime": 23.1547, |
| "eval_samples_per_second": 50.4, |
| "eval_steps_per_second": 0.432, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.8947368421052632, |
| "grad_norm": 12.407254219055176, |
| "learning_rate": 1.891780362712594e-05, |
| "loss": 0.9402, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "grad_norm": 5.819011211395264, |
| "learning_rate": 1.875812696238745e-05, |
| "loss": 0.9055, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.9473684210526315, |
| "eval_F1_err_corr": 0.9134620670871267, |
| "eval_accuracy": 0.9120038259206121, |
| "eval_correct_accuracy": 0.9335912631332478, |
| "eval_error_accuracy": 0.894182563484034, |
| "eval_f1": 0.8270676691729323, |
| "eval_loss": 0.2183985859155655, |
| "eval_pr_auc": 0.8924188619420418, |
| "eval_precision": 0.8058608058608059, |
| "eval_recall": 0.8494208494208494, |
| "eval_runtime": 23.3258, |
| "eval_samples_per_second": 50.03, |
| "eval_steps_per_second": 0.429, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 12.771453857421875, |
| "learning_rate": 1.85882387133824e-05, |
| "loss": 0.9761, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 7.193549156188965, |
| "learning_rate": 1.840833696220963e-05, |
| "loss": 0.499, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "eval_F1_err_corr": 0.9079950528722031, |
| "eval_accuracy": 0.9098517455762793, |
| "eval_correct_accuracy": 0.9236024182207389, |
| "eval_error_accuracy": 0.8929063998654336, |
| "eval_f1": 0.824406148113647, |
| "eval_loss": 0.2152077704668045, |
| "eval_pr_auc": 0.8973152787211761, |
| "eval_precision": 0.7965796579657966, |
| "eval_recall": 0.8542471042471043, |
| "eval_runtime": 23.1673, |
| "eval_samples_per_second": 50.373, |
| "eval_steps_per_second": 0.432, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1052631578947367, |
| "grad_norm": 5.080722332000732, |
| "learning_rate": 1.8218631466263584e-05, |
| "loss": 0.4177, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "grad_norm": 5.394437313079834, |
| "learning_rate": 1.801934341366655e-05, |
| "loss": 0.3945, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.1578947368421053, |
| "eval_F1_err_corr": 0.9097585355242964, |
| "eval_accuracy": 0.9158297465327594, |
| "eval_correct_accuracy": 0.9179299538459843, |
| "eval_error_accuracy": 0.9017313175664017, |
| "eval_f1": 0.8367346938775511, |
| "eval_loss": 0.23682163655757904, |
| "eval_pr_auc": 0.8999781537818953, |
| "eval_precision": 0.8053571428571429, |
| "eval_recall": 0.8706563706563707, |
| "eval_runtime": 23.1454, |
| "eval_samples_per_second": 50.42, |
| "eval_steps_per_second": 0.432, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 5.82737398147583, |
| "learning_rate": 1.7810705165373245e-05, |
| "loss": 0.3592, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "grad_norm": 7.944681167602539, |
| "learning_rate": 1.75929599842483e-05, |
| "loss": 0.395, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.263157894736842, |
| "eval_F1_err_corr": 0.9240172831045173, |
| "eval_accuracy": 0.9179818268770923, |
| "eval_correct_accuracy": 0.9473624612073788, |
| "eval_error_accuracy": 0.9017949916846976, |
| "eval_f1": 0.8381311939594148, |
| "eval_loss": 0.2285495400428772, |
| "eval_pr_auc": 0.8914048907254241, |
| "eval_precision": 0.8199445983379502, |
| "eval_recall": 0.8571428571428571, |
| "eval_runtime": 23.1568, |
| "eval_samples_per_second": 50.396, |
| "eval_steps_per_second": 0.432, |
| "step": 96 |
| } |
| ], |
| "logging_steps": 4, |
| "max_steps": 380, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 16, |
| "stateful_callbacks": { |
| "MinEpochEarlyStoppingCallback": { |
| "args": { |
| "early_stopping_patience": 5, |
| "early_stopping_threshold": 0.001 |
| }, |
| "attributes": { |
| "early_stopping_patience_counter": 0 |
| } |
| }, |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.380768046815642e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|