{ "best_metric": 0.9648942420681551, "best_model_checkpoint": ".//debugged_ru_gsd_ses_udpipe_16_0.01_0.00005_15_04-22-24_21-06/checkpoint-2772", "epoch": 15.0, "global_step": 3780, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.964818463270476e-05, "loss": 2.2858, "step": 252 }, { "epoch": 1.0, "eval_accuracy": 0.8843166071803485, "eval_f1": 0.8188400388146599, "eval_loss": 0.5495285391807556, "eval_precision": 0.8330801944106926, "eval_recall": 0.8050785263466902, "eval_runtime": 3.87, "eval_samples_per_second": 130.234, "eval_steps_per_second": 16.279, "step": 252 }, { "epoch": 2.0, "learning_rate": 4.6101885730368706e-05, "loss": 0.4328, "step": 504 }, { "epoch": 2.0, "eval_accuracy": 0.9476170480789419, "eval_f1": 0.9214275223968277, "eval_loss": 0.25436148047447205, "eval_precision": 0.9219691403379868, "eval_recall": 0.920886540437399, "eval_runtime": 3.854, "eval_samples_per_second": 130.773, "eval_steps_per_second": 16.347, "step": 504 }, { "epoch": 3.0, "learning_rate": 4.255558682803265e-05, "loss": 0.2312, "step": 756 }, { "epoch": 3.0, "eval_accuracy": 0.9588494646231367, "eval_f1": 0.9394761939696281, "eval_loss": 0.20127034187316895, "eval_precision": 0.9391317101789382, "eval_recall": 0.9398209305739028, "eval_runtime": 3.8428, "eval_samples_per_second": 131.156, "eval_steps_per_second": 16.394, "step": 756 }, { "epoch": 4.0, "learning_rate": 3.9009287925696596e-05, "loss": 0.1473, "step": 1008 }, { "epoch": 4.0, "eval_accuracy": 0.9625236195674994, "eval_f1": 0.9416176470588237, "eval_loss": 0.1672007292509079, "eval_precision": 0.9434212465006631, "eval_recall": 0.9398209305739028, "eval_runtime": 3.8423, "eval_samples_per_second": 131.172, "eval_steps_per_second": 16.396, "step": 1008 }, { "epoch": 5.0, "learning_rate": 3.5462989023360544e-05, "loss": 0.1049, "step": 1260 }, { "epoch": 5.0, "eval_accuracy": 0.9709216880117573, "eval_f1": 0.9564258946285545, "eval_loss": 0.15631963312625885, "eval_precision": 0.9576221306650972, "eval_recall": 0.9552326434757082, "eval_runtime": 3.8547, "eval_samples_per_second": 130.751, "eval_steps_per_second": 16.344, "step": 1260 }, { "epoch": 6.0, "learning_rate": 3.191669012102449e-05, "loss": 0.0836, "step": 1512 }, { "epoch": 6.0, "eval_accuracy": 0.9724963258450556, "eval_f1": 0.9572322414427678, "eval_loss": 0.15163131058216095, "eval_precision": 0.9601299468399291, "eval_recall": 0.9543519741670337, "eval_runtime": 3.8588, "eval_samples_per_second": 130.609, "eval_steps_per_second": 16.326, "step": 1512 }, { "epoch": 7.0, "learning_rate": 2.8370391218688434e-05, "loss": 0.0506, "step": 1764 }, { "epoch": 7.0, "eval_accuracy": 0.9718664707117363, "eval_f1": 0.9565600882028665, "eval_loss": 0.1509512960910797, "eval_precision": 0.9580388692579506, "eval_recall": 0.9550858652575958, "eval_runtime": 3.8475, "eval_samples_per_second": 130.995, "eval_steps_per_second": 16.374, "step": 1764 }, { "epoch": 8.0, "learning_rate": 2.482409231635238e-05, "loss": 0.0364, "step": 2016 }, { "epoch": 8.0, "eval_accuracy": 0.973126180978375, "eval_f1": 0.9582385321100917, "eval_loss": 0.15088674426078796, "eval_precision": 0.9583088667058133, "eval_recall": 0.9581682078379569, "eval_runtime": 3.8432, "eval_samples_per_second": 131.139, "eval_steps_per_second": 16.392, "step": 2016 }, { "epoch": 9.0, "learning_rate": 2.1277793414016324e-05, "loss": 0.0273, "step": 2268 }, { "epoch": 9.0, "eval_accuracy": 0.9728112534117153, "eval_f1": 0.9574937490807472, "eval_loss": 0.15090352296829224, "eval_precision": 0.9594694178334562, "eval_recall": 0.9555261999119331, "eval_runtime": 3.8454, "eval_samples_per_second": 131.066, "eval_steps_per_second": 16.383, "step": 2268 }, { "epoch": 10.0, "learning_rate": 1.7731494511680272e-05, "loss": 0.0214, "step": 2520 }, { "epoch": 10.0, "eval_accuracy": 0.9751207222338862, "eval_f1": 0.9623209695189129, "eval_loss": 0.1550149917602539, "eval_precision": 0.9630990885033813, "eval_recall": 0.9615441068545428, "eval_runtime": 3.9336, "eval_samples_per_second": 128.128, "eval_steps_per_second": 16.016, "step": 2520 }, { "epoch": 11.0, "learning_rate": 1.4185195609344217e-05, "loss": 0.0138, "step": 2772 }, { "epoch": 11.0, "eval_accuracy": 0.976905311778291, "eval_f1": 0.9648942420681551, "eval_loss": 0.14702539145946503, "eval_precision": 0.9656034102601794, "eval_recall": 0.9641861147805666, "eval_runtime": 3.9253, "eval_samples_per_second": 128.398, "eval_steps_per_second": 16.05, "step": 2772 }, { "epoch": 12.0, "learning_rate": 1.0638896707008162e-05, "loss": 0.0102, "step": 3024 }, { "epoch": 12.0, "eval_accuracy": 0.9754356498005459, "eval_f1": 0.9619005589879376, "eval_loss": 0.15176355838775635, "eval_precision": 0.9640277163496977, "eval_recall": 0.9597827682371936, "eval_runtime": 3.9451, "eval_samples_per_second": 127.755, "eval_steps_per_second": 15.969, "step": 3024 }, { "epoch": 13.0, "learning_rate": 7.0925978046721085e-06, "loss": 0.0084, "step": 3276 }, { "epoch": 13.0, "eval_accuracy": 0.9764854083560781, "eval_f1": 0.9637526652452025, "eval_loss": 0.15135051310062408, "eval_precision": 0.9655274012964055, "eval_recall": 0.9619844415088801, "eval_runtime": 3.9363, "eval_samples_per_second": 128.038, "eval_steps_per_second": 16.005, "step": 3276 }, { "epoch": 14.0, "learning_rate": 3.5462989023360543e-06, "loss": 0.0069, "step": 3528 }, { "epoch": 14.0, "eval_accuracy": 0.9765903842116314, "eval_f1": 0.9642909625275533, "eval_loss": 0.15399284660816193, "eval_precision": 0.9654259232014124, "eval_recall": 0.9631586672537795, "eval_runtime": 3.9275, "eval_samples_per_second": 128.326, "eval_steps_per_second": 16.041, "step": 3528 }, { "epoch": 15.0, "learning_rate": 0.0, "loss": 0.0061, "step": 3780 }, { "epoch": 15.0, "eval_accuracy": 0.9766953600671845, "eval_f1": 0.9647421771705598, "eval_loss": 0.15449927747249603, "eval_precision": 0.9655932951036612, "eval_recall": 0.9638925583443417, "eval_runtime": 3.8888, "eval_samples_per_second": 129.601, "eval_steps_per_second": 16.2, "step": 3780 }, { "epoch": 15.0, "step": 3780, "total_flos": 7859492504967600.0, "train_loss": 0.23112083008049658, "train_runtime": 2555.6695, "train_samples_per_second": 23.624, "train_steps_per_second": 1.479 } ], "max_steps": 3780, "num_train_epochs": 15, "total_flos": 7859492504967600.0, "trial_name": null, "trial_params": null }