{ "best_global_step": null, "best_metric": 0.9054671281010055, "best_model_checkpoint": null, "epoch": 1.1578947368421053, "eval_steps": 16, "global_step": 88, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05263157894736842, "grad_norm": 291.3879089355469, "learning_rate": 5e-06, "loss": 4.5409, "step": 4 }, { "epoch": 0.10526315789473684, "grad_norm": 203.68991088867188, "learning_rate": 1.1666666666666668e-05, "loss": 3.7189, "step": 8 }, { "epoch": 0.10526315789473684, "eval_F1_err_corr": 0.6871469086338724, "eval_accuracy": 0.7513151602104257, "eval_correct_accuracy": 0.9983222883986244, "eval_error_accuracy": 0.523860477930856, "eval_f1": 0.0057361376673040155, "eval_loss": 1.093567132949829, "eval_pr_auc": 0.3045333732522034, "eval_precision": 0.3, "eval_recall": 0.0028957528957528956, "eval_runtime": 7.2032, "eval_samples_per_second": 162.011, "eval_steps_per_second": 1.388, "step": 8 }, { "epoch": 0.15789473684210525, "grad_norm": 187.05014038085938, "learning_rate": 1.8333333333333333e-05, "loss": 4.2318, "step": 12 }, { "epoch": 0.21052631578947367, "grad_norm": 146.46641540527344, "learning_rate": 1.99967206113942e-05, "loss": 2.7212, "step": 16 }, { "epoch": 0.21052631578947367, "eval_F1_err_corr": 0.6332040138458732, "eval_accuracy": 0.6743185078909613, "eval_correct_accuracy": 0.5579058876929509, "eval_error_accuracy": 0.7319986862791484, "eval_f1": 0.5659655831739961, "eval_loss": 0.7970147132873535, "eval_pr_auc": 0.5909991957581218, "eval_precision": 0.4224548049476689, "eval_recall": 0.8571428571428571, "eval_runtime": 7.138, "eval_samples_per_second": 163.491, "eval_steps_per_second": 1.401, "step": 16 }, { "epoch": 0.2631578947368421, "grad_norm": 28.303598403930664, "learning_rate": 1.9982149887948264e-05, "loss": 2.6877, "step": 20 }, { "epoch": 0.3157894736842105, "grad_norm": 63.692806243896484, "learning_rate": 1.995594042425798e-05, "loss": 1.8805, "step": 24 }, { "epoch": 0.3157894736842105, "eval_F1_err_corr": 0.8195852671712573, "eval_accuracy": 0.8283118125298901, "eval_correct_accuracy": 0.9559562730173418, "eval_error_accuracy": 0.7172645896385392, "eval_f1": 0.5815850815850816, "eval_loss": 0.39416325092315674, "eval_pr_auc": 0.693011369968824, "eval_precision": 0.7338235294117647, "eval_recall": 0.48166023166023164, "eval_runtime": 7.1406, "eval_samples_per_second": 163.433, "eval_steps_per_second": 1.4, "step": 24 }, { "epoch": 0.3684210526315789, "grad_norm": 53.82140350341797, "learning_rate": 1.99181227793856e-05, "loss": 1.6956, "step": 28 }, { "epoch": 0.42105263157894735, "grad_norm": 38.888492584228516, "learning_rate": 1.9868741047013382e-05, "loss": 1.5156, "step": 32 }, { "epoch": 0.42105263157894735, "eval_F1_err_corr": 0.8118976385967145, "eval_accuracy": 0.8354854136776662, "eval_correct_accuracy": 0.9372372100616376, "eval_error_accuracy": 0.71612771037666, "eval_f1": 0.5651074589127687, "eval_loss": 0.36986085772514343, "eval_pr_auc": 0.7616394776557907, "eval_precision": 0.8186813186813187, "eval_recall": 0.4314671814671815, "eval_runtime": 7.1716, "eval_samples_per_second": 162.725, "eval_steps_per_second": 1.394, "step": 32 }, { "epoch": 0.47368421052631576, "grad_norm": 22.607288360595703, "learning_rate": 1.9807852804032306e-05, "loss": 1.4892, "step": 36 }, { "epoch": 0.5263157894736842, "grad_norm": 13.400656700134277, "learning_rate": 1.9735529043410012e-05, "loss": 1.2424, "step": 40 }, { "epoch": 0.5263157894736842, "eval_F1_err_corr": 0.8711215240506508, "eval_accuracy": 0.8627450980392157, "eval_correct_accuracy": 0.9620440825784337, "eval_error_accuracy": 0.7959010995250491, "eval_f1": 0.6835722160970231, "eval_loss": 0.3136674463748932, "eval_pr_auc": 0.7987692721100079, "eval_precision": 0.7969151670951157, "eval_recall": 0.5984555984555985, "eval_runtime": 7.1521, "eval_samples_per_second": 163.17, "eval_steps_per_second": 1.398, "step": 40 }, { "epoch": 0.5789473684210527, "grad_norm": 16.472789764404297, "learning_rate": 1.9651854091416175e-05, "loss": 1.2995, "step": 44 }, { "epoch": 0.631578947368421, "grad_norm": 7.745436668395996, "learning_rate": 1.9556925509301844e-05, "loss": 1.1904, "step": 48 }, { "epoch": 0.631578947368421, "eval_F1_err_corr": 0.8839638940186375, "eval_accuracy": 0.8842659014825442, "eval_correct_accuracy": 0.9484455013462648, "eval_error_accuracy": 0.8276919152969574, "eval_f1": 0.7520491803278688, "eval_loss": 0.2730002999305725, "eval_pr_auc": 0.8327722297335601, "eval_precision": 0.8013100436681223, "eval_recall": 0.7084942084942085, "eval_runtime": 7.1492, "eval_samples_per_second": 163.234, "eval_steps_per_second": 1.399, "step": 48 }, { "epoch": 0.6842105263157895, "grad_norm": 22.183454513549805, "learning_rate": 1.9450853979547384e-05, "loss": 1.0703, "step": 52 }, { "epoch": 0.7368421052631579, "grad_norm": 10.418429374694824, "learning_rate": 1.9333763176811663e-05, "loss": 1.1166, "step": 56 }, { "epoch": 0.7368421052631579, "eval_F1_err_corr": 0.8963518458095878, "eval_accuracy": 0.8854615016738403, "eval_correct_accuracy": 0.9502566249131135, "eval_error_accuracy": 0.8482344335548118, "eval_f1": 0.7603801900950475, "eval_loss": 0.26140278577804565, "eval_pr_auc": 0.8394726873391715, "eval_precision": 0.7892004153686397, "eval_recall": 0.7335907335907336, "eval_runtime": 7.1393, "eval_samples_per_second": 163.461, "eval_steps_per_second": 1.401, "step": 56 }, { "epoch": 0.7894736842105263, "grad_norm": 24.996362686157227, "learning_rate": 1.9205789623732923e-05, "loss": 1.107, "step": 60 }, { "epoch": 0.8421052631578947, "grad_norm": 10.622379302978516, "learning_rate": 1.9067082531749496e-05, "loss": 1.0344, "step": 64 }, { "epoch": 0.8421052631578947, "eval_F1_err_corr": 0.8971597384167123, "eval_accuracy": 0.8928742228598756, "eval_correct_accuracy": 0.9511252134916257, "eval_error_accuracy": 0.8489893075764926, "eval_f1": 0.7764471057884231, "eval_loss": 0.25267964601516724, "eval_pr_auc": 0.8508672146171146, "eval_precision": 0.8037190082644629, "eval_recall": 0.750965250965251, "eval_runtime": 7.1659, "eval_samples_per_second": 162.854, "eval_steps_per_second": 1.395, "step": 64 }, { "epoch": 0.8947368421052632, "grad_norm": 4.826870441436768, "learning_rate": 1.891780362712594e-05, "loss": 0.984, "step": 68 }, { "epoch": 0.9473684210526315, "grad_norm": 8.766145706176758, "learning_rate": 1.875812696238745e-05, "loss": 0.9791, "step": 72 }, { "epoch": 0.9473684210526315, "eval_F1_err_corr": 0.89776260256973, "eval_accuracy": 0.8940698230511717, "eval_correct_accuracy": 0.9191008849787475, "eval_error_accuracy": 0.8773926386058739, "eval_f1": 0.7932804479701353, "eval_loss": 0.25162529945373535, "eval_pr_auc": 0.8580493064471059, "eval_precision": 0.7678410117434508, "eval_recall": 0.8204633204633205, "eval_runtime": 7.1451, "eval_samples_per_second": 163.329, "eval_steps_per_second": 1.4, "step": 72 }, { "epoch": 1.0, "grad_norm": 15.486649513244629, "learning_rate": 1.85882387133824e-05, "loss": 1.0899, "step": 76 }, { "epoch": 1.0526315789473684, "grad_norm": 9.650471687316895, "learning_rate": 1.840833696220963e-05, "loss": 0.61, "step": 80 }, { "epoch": 1.0526315789473684, "eval_F1_err_corr": 0.8951485557028362, "eval_accuracy": 0.8904830224772836, "eval_correct_accuracy": 0.9052260320962611, "eval_error_accuracy": 0.8852929850041195, "eval_f1": 0.7973451327433628, "eval_loss": 0.2587549686431885, "eval_pr_auc": 0.8644362216080673, "eval_precision": 0.7361111111111112, "eval_recall": 0.8696911196911197, "eval_runtime": 7.1493, "eval_samples_per_second": 163.233, "eval_steps_per_second": 1.399, "step": 80 }, { "epoch": 1.1052631578947367, "grad_norm": 7.722207546234131, "learning_rate": 1.8218631466263584e-05, "loss": 0.6206, "step": 84 }, { "epoch": 1.1578947368421053, "grad_norm": 23.517024993896484, "learning_rate": 1.801934341366655e-05, "loss": 0.6597, "step": 88 }, { "epoch": 1.1578947368421053, "eval_F1_err_corr": 0.9054671281010055, "eval_accuracy": 0.9033955045432808, "eval_correct_accuracy": 0.9527881095438347, "eval_error_accuracy": 0.8626242087113936, "eval_f1": 0.7973921765295887, "eval_loss": 0.25677669048309326, "eval_pr_auc": 0.8667069010917992, "eval_precision": 0.8298538622129437, "eval_recall": 0.7673745173745173, "eval_runtime": 7.1469, "eval_samples_per_second": 163.288, "eval_steps_per_second": 1.399, "step": 88 } ], "logging_steps": 4, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 16, "stateful_callbacks": { "MinEpochEarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.358268710518784e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }