| { | |
| "best_metric": 0.9050816297531128, | |
| "best_model_checkpoint": "./mbert_ar_ur/checkpoint-5000", | |
| "epoch": 4.0, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.4454, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 0.2955, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_LOC_f1": 0.09510682288077188, | |
| "eval_ORG_f1": 0.24583663758921492, | |
| "eval_PER_f1": 0.3119353304802663, | |
| "eval_loss": 1.4924763441085815, | |
| "eval_overall_accuracy": 0.4886179434773416, | |
| "eval_overall_f1": 0.1982125758059368, | |
| "eval_overall_precision": 0.16568836712913554, | |
| "eval_overall_recall": 0.2466243050039714, | |
| "eval_runtime": 2.7088, | |
| "eval_samples_per_second": 369.163, | |
| "eval_steps_per_second": 23.257, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 0.2182, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 0.1877, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.181, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_LOC_f1": 0.16472203157172272, | |
| "eval_ORG_f1": 0.26800929512006194, | |
| "eval_PER_f1": 0.6124661246612466, | |
| "eval_loss": 1.1303032636642456, | |
| "eval_overall_accuracy": 0.6376770737895553, | |
| "eval_overall_f1": 0.3045238514346066, | |
| "eval_overall_precision": 0.2734745494783433, | |
| "eval_overall_recall": 0.3435266084193805, | |
| "eval_runtime": 2.7853, | |
| "eval_samples_per_second": 359.026, | |
| "eval_steps_per_second": 22.619, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 0.1295, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.1253, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_LOC_f1": 0.15284677111196027, | |
| "eval_ORG_f1": 0.3176020408163266, | |
| "eval_PER_f1": 0.6216577540106952, | |
| "eval_loss": 1.348933458328247, | |
| "eval_overall_accuracy": 0.6324617661568821, | |
| "eval_overall_f1": 0.3217743355043126, | |
| "eval_overall_precision": 0.2889661713563073, | |
| "eval_overall_recall": 0.36298649722001586, | |
| "eval_runtime": 2.5633, | |
| "eval_samples_per_second": 390.124, | |
| "eval_steps_per_second": 24.578, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 0.098, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 0.0916, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0866, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_LOC_f1": 0.42877697841726614, | |
| "eval_ORG_f1": 0.4028497409326424, | |
| "eval_PER_f1": 0.6109282422646478, | |
| "eval_loss": 0.9050816297531128, | |
| "eval_overall_accuracy": 0.7715836211149482, | |
| "eval_overall_f1": 0.47474747474747475, | |
| "eval_overall_precision": 0.464638783269962, | |
| "eval_overall_recall": 0.4853057982525814, | |
| "eval_runtime": 2.9178, | |
| "eval_samples_per_second": 342.725, | |
| "eval_steps_per_second": 21.592, | |
| "step": 5000 | |
| } | |
| ], | |
| "max_steps": 8750, | |
| "num_train_epochs": 7, | |
| "total_flos": 1363663262792160.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |