{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 8520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.5455477237701416, "learning_rate": 4.8333333333333334e-05, "loss": 0.4048, "step": 284 }, { "epoch": 1.0, "eval_srl_loss": 0.21862658858299255, "eval_srl_runtime": 17.5334, "eval_srl_samples_per_second": 129.467, "eval_srl_steps_per_second": 16.198, "step": 284 }, { "epoch": 1.0, "eval_ner_loss": 0.12236960977315903, "eval_ner_runtime": 17.5432, "eval_ner_samples_per_second": 129.395, "eval_ner_steps_per_second": 16.189, "step": 284 }, { "epoch": 2.0, "grad_norm": 1.0813124179840088, "learning_rate": 4.666666666666667e-05, "loss": 0.1086, "step": 568 }, { "epoch": 2.0, "eval_srl_loss": 0.15843605995178223, "eval_srl_runtime": 17.5383, "eval_srl_samples_per_second": 129.431, "eval_srl_steps_per_second": 16.193, "step": 568 }, { "epoch": 2.0, "eval_ner_loss": 0.020286982879042625, "eval_ner_runtime": 17.5444, "eval_ner_samples_per_second": 129.386, "eval_ner_steps_per_second": 16.188, "step": 568 }, { "epoch": 3.0, "grad_norm": 1.0206300020217896, "learning_rate": 4.5e-05, "loss": 0.0721, "step": 852 }, { "epoch": 3.0, "eval_srl_loss": 0.115463025867939, "eval_srl_runtime": 17.55, "eval_srl_samples_per_second": 129.345, "eval_srl_steps_per_second": 16.182, "step": 852 }, { "epoch": 3.0, "eval_ner_loss": 0.012386705726385117, "eval_ner_runtime": 17.5609, "eval_ner_samples_per_second": 129.264, "eval_ner_steps_per_second": 16.172, "step": 852 }, { "epoch": 4.0, "grad_norm": 0.18648108839988708, "learning_rate": 4.3333333333333334e-05, "loss": 0.0537, "step": 1136 }, { "epoch": 4.0, "eval_srl_loss": 0.08971945941448212, "eval_srl_runtime": 17.5456, "eval_srl_samples_per_second": 129.377, "eval_srl_steps_per_second": 16.186, "step": 1136 }, { "epoch": 4.0, "eval_ner_loss": 0.005814700853079557, "eval_ner_runtime": 17.5524, "eval_ner_samples_per_second": 129.327, "eval_ner_steps_per_second": 16.18, "step": 1136 }, { "epoch": 5.0, "grad_norm": 1.4305367469787598, "learning_rate": 4.166666666666667e-05, "loss": 0.0426, "step": 1420 }, { "epoch": 5.0, "eval_srl_loss": 0.06870203465223312, "eval_srl_runtime": 17.536, "eval_srl_samples_per_second": 129.448, "eval_srl_steps_per_second": 16.195, "step": 1420 }, { "epoch": 5.0, "eval_ner_loss": 0.004500186070799828, "eval_ner_runtime": 17.5488, "eval_ner_samples_per_second": 129.353, "eval_ner_steps_per_second": 16.183, "step": 1420 }, { "epoch": 6.0, "grad_norm": 0.16283409297466278, "learning_rate": 4e-05, "loss": 0.0339, "step": 1704 }, { "epoch": 6.0, "eval_srl_loss": 0.052619677037000656, "eval_srl_runtime": 17.5537, "eval_srl_samples_per_second": 129.317, "eval_srl_steps_per_second": 16.179, "step": 1704 }, { "epoch": 6.0, "eval_ner_loss": 0.0033325471449643373, "eval_ner_runtime": 17.574, "eval_ner_samples_per_second": 129.168, "eval_ner_steps_per_second": 16.16, "step": 1704 }, { "epoch": 7.0, "grad_norm": 0.07186176627874374, "learning_rate": 3.8333333333333334e-05, "loss": 0.0275, "step": 1988 }, { "epoch": 7.0, "eval_srl_loss": 0.0378902293741703, "eval_srl_runtime": 17.5438, "eval_srl_samples_per_second": 129.391, "eval_srl_steps_per_second": 16.188, "step": 1988 }, { "epoch": 7.0, "eval_ner_loss": 0.0033688645344227552, "eval_ner_runtime": 17.5486, "eval_ner_samples_per_second": 129.355, "eval_ner_steps_per_second": 16.184, "step": 1988 }, { "epoch": 8.0, "grad_norm": 0.3217866122722626, "learning_rate": 3.6666666666666666e-05, "loss": 0.0227, "step": 2272 }, { "epoch": 8.0, "eval_srl_loss": 0.03359847143292427, "eval_srl_runtime": 17.5486, "eval_srl_samples_per_second": 129.355, "eval_srl_steps_per_second": 16.184, "step": 2272 }, { "epoch": 8.0, "eval_ner_loss": 0.003445760579779744, "eval_ner_runtime": 17.5536, "eval_ner_samples_per_second": 129.318, "eval_ner_steps_per_second": 16.179, "step": 2272 }, { "epoch": 9.0, "grad_norm": 0.697796642780304, "learning_rate": 3.5e-05, "loss": 0.0192, "step": 2556 }, { "epoch": 9.0, "eval_srl_loss": 0.02817477658390999, "eval_srl_runtime": 17.5377, "eval_srl_samples_per_second": 129.435, "eval_srl_steps_per_second": 16.194, "step": 2556 }, { "epoch": 9.0, "eval_ner_loss": 0.0022896770387887955, "eval_ner_runtime": 17.5429, "eval_ner_samples_per_second": 129.397, "eval_ner_steps_per_second": 16.189, "step": 2556 }, { "epoch": 10.0, "grad_norm": 0.3553692698478699, "learning_rate": 3.3333333333333335e-05, "loss": 0.0158, "step": 2840 }, { "epoch": 10.0, "eval_srl_loss": 0.02144758589565754, "eval_srl_runtime": 17.5443, "eval_srl_samples_per_second": 129.387, "eval_srl_steps_per_second": 16.188, "step": 2840 }, { "epoch": 10.0, "eval_ner_loss": 0.0018517740536481142, "eval_ner_runtime": 17.554, "eval_ner_samples_per_second": 129.316, "eval_ner_steps_per_second": 16.179, "step": 2840 }, { "epoch": 11.0, "grad_norm": 0.35990962386131287, "learning_rate": 3.1666666666666666e-05, "loss": 0.013, "step": 3124 }, { "epoch": 11.0, "eval_srl_loss": 0.018339334055781364, "eval_srl_runtime": 17.5523, "eval_srl_samples_per_second": 129.327, "eval_srl_steps_per_second": 16.18, "step": 3124 }, { "epoch": 11.0, "eval_ner_loss": 0.001874973881058395, "eval_ner_runtime": 17.5544, "eval_ner_samples_per_second": 129.313, "eval_ner_steps_per_second": 16.178, "step": 3124 }, { "epoch": 12.0, "grad_norm": 0.5990183353424072, "learning_rate": 3e-05, "loss": 0.0105, "step": 3408 }, { "epoch": 12.0, "eval_srl_loss": 0.013035450130701065, "eval_srl_runtime": 17.5468, "eval_srl_samples_per_second": 129.368, "eval_srl_steps_per_second": 16.185, "step": 3408 }, { "epoch": 12.0, "eval_ner_loss": 0.0015207786345854402, "eval_ner_runtime": 17.562, "eval_ner_samples_per_second": 129.257, "eval_ner_steps_per_second": 16.171, "step": 3408 }, { "epoch": 13.0, "grad_norm": 0.9393383264541626, "learning_rate": 2.8333333333333335e-05, "loss": 0.0093, "step": 3692 }, { "epoch": 13.0, "eval_srl_loss": 0.010261264629662037, "eval_srl_runtime": 17.5461, "eval_srl_samples_per_second": 129.373, "eval_srl_steps_per_second": 16.186, "step": 3692 }, { "epoch": 13.0, "eval_ner_loss": 0.0013053927104920149, "eval_ner_runtime": 17.5578, "eval_ner_samples_per_second": 129.287, "eval_ner_steps_per_second": 16.175, "step": 3692 }, { "epoch": 14.0, "grad_norm": 0.442676305770874, "learning_rate": 2.6666666666666667e-05, "loss": 0.0079, "step": 3976 }, { "epoch": 14.0, "eval_srl_loss": 0.009250137023627758, "eval_srl_runtime": 17.548, "eval_srl_samples_per_second": 129.359, "eval_srl_steps_per_second": 16.184, "step": 3976 }, { "epoch": 14.0, "eval_ner_loss": 0.0014766417443752289, "eval_ner_runtime": 17.5576, "eval_ner_samples_per_second": 129.288, "eval_ner_steps_per_second": 16.175, "step": 3976 }, { "epoch": 15.0, "grad_norm": 0.3012068569660187, "learning_rate": 2.5e-05, "loss": 0.0066, "step": 4260 }, { "epoch": 15.0, "eval_srl_loss": 0.008085786364972591, "eval_srl_runtime": 17.5552, "eval_srl_samples_per_second": 129.306, "eval_srl_steps_per_second": 16.178, "step": 4260 }, { "epoch": 15.0, "eval_ner_loss": 0.0010674420045688748, "eval_ner_runtime": 17.5619, "eval_ner_samples_per_second": 129.257, "eval_ner_steps_per_second": 16.171, "step": 4260 }, { "epoch": 16.0, "grad_norm": 0.19097836315631866, "learning_rate": 2.3333333333333336e-05, "loss": 0.0056, "step": 4544 }, { "epoch": 16.0, "eval_srl_loss": 0.006198651157319546, "eval_srl_runtime": 17.5404, "eval_srl_samples_per_second": 129.416, "eval_srl_steps_per_second": 16.191, "step": 4544 }, { "epoch": 16.0, "eval_ner_loss": 0.0009344189311377704, "eval_ner_runtime": 17.5502, "eval_ner_samples_per_second": 129.343, "eval_ner_steps_per_second": 16.182, "step": 4544 }, { "epoch": 17.0, "grad_norm": 0.15881049633026123, "learning_rate": 2.1666666666666667e-05, "loss": 0.0052, "step": 4828 }, { "epoch": 17.0, "eval_srl_loss": 0.00588291697204113, "eval_srl_runtime": 17.5504, "eval_srl_samples_per_second": 129.342, "eval_srl_steps_per_second": 16.182, "step": 4828 }, { "epoch": 17.0, "eval_ner_loss": 0.0009248966816812754, "eval_ner_runtime": 17.555, "eval_ner_samples_per_second": 129.308, "eval_ner_steps_per_second": 16.178, "step": 4828 }, { "epoch": 18.0, "grad_norm": 0.054942790418863297, "learning_rate": 2e-05, "loss": 0.0045, "step": 5112 }, { "epoch": 18.0, "eval_srl_loss": 0.005140448454767466, "eval_srl_runtime": 17.5307, "eval_srl_samples_per_second": 129.487, "eval_srl_steps_per_second": 16.2, "step": 5112 }, { "epoch": 18.0, "eval_ner_loss": 0.0009396191453561187, "eval_ner_runtime": 17.532, "eval_ner_samples_per_second": 129.477, "eval_ner_steps_per_second": 16.199, "step": 5112 }, { "epoch": 19.0, "grad_norm": 0.02979622595012188, "learning_rate": 1.8333333333333333e-05, "loss": 0.0038, "step": 5396 }, { "epoch": 19.0, "eval_srl_loss": 0.005267995875328779, "eval_srl_runtime": 17.5559, "eval_srl_samples_per_second": 129.301, "eval_srl_steps_per_second": 16.177, "step": 5396 }, { "epoch": 19.0, "eval_ner_loss": 0.0010794103145599365, "eval_ner_runtime": 17.5581, "eval_ner_samples_per_second": 129.285, "eval_ner_steps_per_second": 16.175, "step": 5396 }, { "epoch": 20.0, "grad_norm": 0.34328535199165344, "learning_rate": 1.6666666666666667e-05, "loss": 0.0036, "step": 5680 }, { "epoch": 20.0, "eval_srl_loss": 0.005443137139081955, "eval_srl_runtime": 17.5496, "eval_srl_samples_per_second": 129.348, "eval_srl_steps_per_second": 16.183, "step": 5680 }, { "epoch": 20.0, "eval_ner_loss": 0.0007795258425176144, "eval_ner_runtime": 17.557, "eval_ner_samples_per_second": 129.293, "eval_ner_steps_per_second": 16.176, "step": 5680 }, { "epoch": 21.0, "grad_norm": 0.2540510594844818, "learning_rate": 1.5e-05, "loss": 0.0032, "step": 5964 }, { "epoch": 21.0, "eval_srl_loss": 0.00396768469363451, "eval_srl_runtime": 17.5412, "eval_srl_samples_per_second": 129.41, "eval_srl_steps_per_second": 16.19, "step": 5964 }, { "epoch": 21.0, "eval_ner_loss": 0.0007386294892057776, "eval_ner_runtime": 17.5472, "eval_ner_samples_per_second": 129.365, "eval_ner_steps_per_second": 16.185, "step": 5964 }, { "epoch": 22.0, "grad_norm": 0.1889248788356781, "learning_rate": 1.3333333333333333e-05, "loss": 0.0029, "step": 6248 }, { "epoch": 22.0, "eval_srl_loss": 0.004316597245633602, "eval_srl_runtime": 17.5421, "eval_srl_samples_per_second": 129.403, "eval_srl_steps_per_second": 16.19, "step": 6248 }, { "epoch": 22.0, "eval_ner_loss": 0.0006567554082721472, "eval_ner_runtime": 17.5545, "eval_ner_samples_per_second": 129.311, "eval_ner_steps_per_second": 16.178, "step": 6248 }, { "epoch": 23.0, "grad_norm": 0.13729548454284668, "learning_rate": 1.1666666666666668e-05, "loss": 0.0028, "step": 6532 }, { "epoch": 23.0, "eval_srl_loss": 0.0036644088104367256, "eval_srl_runtime": 17.5517, "eval_srl_samples_per_second": 129.332, "eval_srl_steps_per_second": 16.181, "step": 6532 }, { "epoch": 23.0, "eval_ner_loss": 0.0006873765378259122, "eval_ner_runtime": 17.5531, "eval_ner_samples_per_second": 129.322, "eval_ner_steps_per_second": 16.179, "step": 6532 }, { "epoch": 24.0, "grad_norm": 0.21299496293067932, "learning_rate": 1e-05, "loss": 0.0023, "step": 6816 }, { "epoch": 24.0, "eval_srl_loss": 0.0038258766289800406, "eval_srl_runtime": 17.5464, "eval_srl_samples_per_second": 129.371, "eval_srl_steps_per_second": 16.186, "step": 6816 }, { "epoch": 24.0, "eval_ner_loss": 0.0005753676523454487, "eval_ner_runtime": 17.5627, "eval_ner_samples_per_second": 129.251, "eval_ner_steps_per_second": 16.171, "step": 6816 }, { "epoch": 25.0, "grad_norm": 0.4840211272239685, "learning_rate": 8.333333333333334e-06, "loss": 0.0022, "step": 7100 }, { "epoch": 25.0, "eval_srl_loss": 0.0032577498350292444, "eval_srl_runtime": 17.5469, "eval_srl_samples_per_second": 129.368, "eval_srl_steps_per_second": 16.185, "step": 7100 }, { "epoch": 25.0, "eval_ner_loss": 0.0005282628699205816, "eval_ner_runtime": 17.5557, "eval_ner_samples_per_second": 129.303, "eval_ner_steps_per_second": 16.177, "step": 7100 }, { "epoch": 26.0, "grad_norm": 0.1340308040380478, "learning_rate": 6.666666666666667e-06, "loss": 0.002, "step": 7384 }, { "epoch": 26.0, "eval_srl_loss": 0.0033599596936255693, "eval_srl_runtime": 17.5361, "eval_srl_samples_per_second": 129.447, "eval_srl_steps_per_second": 16.195, "step": 7384 }, { "epoch": 26.0, "eval_ner_loss": 0.0004921465297229588, "eval_ner_runtime": 17.5484, "eval_ner_samples_per_second": 129.356, "eval_ner_steps_per_second": 16.184, "step": 7384 }, { "epoch": 27.0, "grad_norm": 0.1986730843782425, "learning_rate": 5e-06, "loss": 0.0019, "step": 7668 }, { "epoch": 27.0, "eval_srl_loss": 0.003047993639484048, "eval_srl_runtime": 17.5511, "eval_srl_samples_per_second": 129.337, "eval_srl_steps_per_second": 16.181, "step": 7668 }, { "epoch": 27.0, "eval_ner_loss": 0.0004909657291136682, "eval_ner_runtime": 17.5556, "eval_ner_samples_per_second": 129.303, "eval_ner_steps_per_second": 16.177, "step": 7668 }, { "epoch": 28.0, "grad_norm": 0.27138885855674744, "learning_rate": 3.3333333333333333e-06, "loss": 0.0018, "step": 7952 }, { "epoch": 28.0, "eval_srl_loss": 0.002978708129376173, "eval_srl_runtime": 17.5413, "eval_srl_samples_per_second": 129.409, "eval_srl_steps_per_second": 16.19, "step": 7952 }, { "epoch": 28.0, "eval_ner_loss": 0.00047884471132420003, "eval_ner_runtime": 17.5561, "eval_ner_samples_per_second": 129.3, "eval_ner_steps_per_second": 16.177, "step": 7952 }, { "epoch": 29.0, "grad_norm": 0.20234395563602448, "learning_rate": 1.6666666666666667e-06, "loss": 0.0017, "step": 8236 }, { "epoch": 29.0, "eval_srl_loss": 0.0029575293883681297, "eval_srl_runtime": 17.5499, "eval_srl_samples_per_second": 129.346, "eval_srl_steps_per_second": 16.182, "step": 8236 }, { "epoch": 29.0, "eval_ner_loss": 0.00046766019659116864, "eval_ner_runtime": 17.5592, "eval_ner_samples_per_second": 129.277, "eval_ner_steps_per_second": 16.174, "step": 8236 }, { "epoch": 30.0, "grad_norm": 0.014138607308268547, "learning_rate": 0.0, "loss": 0.0016, "step": 8520 }, { "epoch": 30.0, "eval_srl_loss": 0.0029636274557560682, "eval_srl_runtime": 17.5359, "eval_srl_samples_per_second": 129.448, "eval_srl_steps_per_second": 16.195, "step": 8520 }, { "epoch": 30.0, "eval_ner_loss": 0.0004607184964697808, "eval_ner_runtime": 17.5477, "eval_ner_samples_per_second": 129.362, "eval_ner_steps_per_second": 16.184, "step": 8520 } ], "logging_steps": 500, "max_steps": 8520, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.58531004841984e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }