| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 8520, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.5455477237701416, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 0.4048, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_srl_loss": 0.21862658858299255, | |
| "eval_srl_runtime": 17.5334, | |
| "eval_srl_samples_per_second": 129.467, | |
| "eval_srl_steps_per_second": 16.198, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_ner_loss": 0.12236960977315903, | |
| "eval_ner_runtime": 17.5432, | |
| "eval_ner_samples_per_second": 129.395, | |
| "eval_ner_steps_per_second": 16.189, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0813124179840088, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.1086, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_srl_loss": 0.15843605995178223, | |
| "eval_srl_runtime": 17.5383, | |
| "eval_srl_samples_per_second": 129.431, | |
| "eval_srl_steps_per_second": 16.193, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_ner_loss": 0.020286982879042625, | |
| "eval_ner_runtime": 17.5444, | |
| "eval_ner_samples_per_second": 129.386, | |
| "eval_ner_steps_per_second": 16.188, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.0206300020217896, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0721, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_srl_loss": 0.115463025867939, | |
| "eval_srl_runtime": 17.55, | |
| "eval_srl_samples_per_second": 129.345, | |
| "eval_srl_steps_per_second": 16.182, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_ner_loss": 0.012386705726385117, | |
| "eval_ner_runtime": 17.5609, | |
| "eval_ner_samples_per_second": 129.264, | |
| "eval_ner_steps_per_second": 16.172, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.18648108839988708, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.0537, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_srl_loss": 0.08971945941448212, | |
| "eval_srl_runtime": 17.5456, | |
| "eval_srl_samples_per_second": 129.377, | |
| "eval_srl_steps_per_second": 16.186, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_ner_loss": 0.005814700853079557, | |
| "eval_ner_runtime": 17.5524, | |
| "eval_ner_samples_per_second": 129.327, | |
| "eval_ner_steps_per_second": 16.18, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.4305367469787598, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.0426, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_srl_loss": 0.06870203465223312, | |
| "eval_srl_runtime": 17.536, | |
| "eval_srl_samples_per_second": 129.448, | |
| "eval_srl_steps_per_second": 16.195, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_ner_loss": 0.004500186070799828, | |
| "eval_ner_runtime": 17.5488, | |
| "eval_ner_samples_per_second": 129.353, | |
| "eval_ner_steps_per_second": 16.183, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.16283409297466278, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0339, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_srl_loss": 0.052619677037000656, | |
| "eval_srl_runtime": 17.5537, | |
| "eval_srl_samples_per_second": 129.317, | |
| "eval_srl_steps_per_second": 16.179, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_ner_loss": 0.0033325471449643373, | |
| "eval_ner_runtime": 17.574, | |
| "eval_ner_samples_per_second": 129.168, | |
| "eval_ner_steps_per_second": 16.16, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.07186176627874374, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "loss": 0.0275, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_srl_loss": 0.0378902293741703, | |
| "eval_srl_runtime": 17.5438, | |
| "eval_srl_samples_per_second": 129.391, | |
| "eval_srl_steps_per_second": 16.188, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_ner_loss": 0.0033688645344227552, | |
| "eval_ner_runtime": 17.5486, | |
| "eval_ner_samples_per_second": 129.355, | |
| "eval_ner_steps_per_second": 16.184, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.3217866122722626, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.0227, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_srl_loss": 0.03359847143292427, | |
| "eval_srl_runtime": 17.5486, | |
| "eval_srl_samples_per_second": 129.355, | |
| "eval_srl_steps_per_second": 16.184, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_ner_loss": 0.003445760579779744, | |
| "eval_ner_runtime": 17.5536, | |
| "eval_ner_samples_per_second": 129.318, | |
| "eval_ner_steps_per_second": 16.179, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.697796642780304, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0192, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_srl_loss": 0.02817477658390999, | |
| "eval_srl_runtime": 17.5377, | |
| "eval_srl_samples_per_second": 129.435, | |
| "eval_srl_steps_per_second": 16.194, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_ner_loss": 0.0022896770387887955, | |
| "eval_ner_runtime": 17.5429, | |
| "eval_ner_samples_per_second": 129.397, | |
| "eval_ner_steps_per_second": 16.189, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.3553692698478699, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0158, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_srl_loss": 0.02144758589565754, | |
| "eval_srl_runtime": 17.5443, | |
| "eval_srl_samples_per_second": 129.387, | |
| "eval_srl_steps_per_second": 16.188, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_ner_loss": 0.0018517740536481142, | |
| "eval_ner_runtime": 17.554, | |
| "eval_ner_samples_per_second": 129.316, | |
| "eval_ner_steps_per_second": 16.179, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 0.35990962386131287, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "loss": 0.013, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_srl_loss": 0.018339334055781364, | |
| "eval_srl_runtime": 17.5523, | |
| "eval_srl_samples_per_second": 129.327, | |
| "eval_srl_steps_per_second": 16.18, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_ner_loss": 0.001874973881058395, | |
| "eval_ner_runtime": 17.5544, | |
| "eval_ner_samples_per_second": 129.313, | |
| "eval_ner_steps_per_second": 16.178, | |
| "step": 3124 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.5990183353424072, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0105, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_srl_loss": 0.013035450130701065, | |
| "eval_srl_runtime": 17.5468, | |
| "eval_srl_samples_per_second": 129.368, | |
| "eval_srl_steps_per_second": 16.185, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_ner_loss": 0.0015207786345854402, | |
| "eval_ner_runtime": 17.562, | |
| "eval_ner_samples_per_second": 129.257, | |
| "eval_ner_steps_per_second": 16.171, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 0.9393383264541626, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 0.0093, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_srl_loss": 0.010261264629662037, | |
| "eval_srl_runtime": 17.5461, | |
| "eval_srl_samples_per_second": 129.373, | |
| "eval_srl_steps_per_second": 16.186, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_ner_loss": 0.0013053927104920149, | |
| "eval_ner_runtime": 17.5578, | |
| "eval_ner_samples_per_second": 129.287, | |
| "eval_ner_steps_per_second": 16.175, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 0.442676305770874, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.0079, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_srl_loss": 0.009250137023627758, | |
| "eval_srl_runtime": 17.548, | |
| "eval_srl_samples_per_second": 129.359, | |
| "eval_srl_steps_per_second": 16.184, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_ner_loss": 0.0014766417443752289, | |
| "eval_ner_runtime": 17.5576, | |
| "eval_ner_samples_per_second": 129.288, | |
| "eval_ner_steps_per_second": 16.175, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 0.3012068569660187, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0066, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_srl_loss": 0.008085786364972591, | |
| "eval_srl_runtime": 17.5552, | |
| "eval_srl_samples_per_second": 129.306, | |
| "eval_srl_steps_per_second": 16.178, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_ner_loss": 0.0010674420045688748, | |
| "eval_ner_runtime": 17.5619, | |
| "eval_ner_samples_per_second": 129.257, | |
| "eval_ner_steps_per_second": 16.171, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 0.19097836315631866, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.0056, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_srl_loss": 0.006198651157319546, | |
| "eval_srl_runtime": 17.5404, | |
| "eval_srl_samples_per_second": 129.416, | |
| "eval_srl_steps_per_second": 16.191, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_ner_loss": 0.0009344189311377704, | |
| "eval_ner_runtime": 17.5502, | |
| "eval_ner_samples_per_second": 129.343, | |
| "eval_ner_steps_per_second": 16.182, | |
| "step": 4544 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 0.15881049633026123, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.0052, | |
| "step": 4828 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_srl_loss": 0.00588291697204113, | |
| "eval_srl_runtime": 17.5504, | |
| "eval_srl_samples_per_second": 129.342, | |
| "eval_srl_steps_per_second": 16.182, | |
| "step": 4828 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_ner_loss": 0.0009248966816812754, | |
| "eval_ner_runtime": 17.555, | |
| "eval_ner_samples_per_second": 129.308, | |
| "eval_ner_steps_per_second": 16.178, | |
| "step": 4828 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 0.054942790418863297, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0045, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_srl_loss": 0.005140448454767466, | |
| "eval_srl_runtime": 17.5307, | |
| "eval_srl_samples_per_second": 129.487, | |
| "eval_srl_steps_per_second": 16.2, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_ner_loss": 0.0009396191453561187, | |
| "eval_ner_runtime": 17.532, | |
| "eval_ner_samples_per_second": 129.477, | |
| "eval_ner_steps_per_second": 16.199, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 0.02979622595012188, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.0038, | |
| "step": 5396 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_srl_loss": 0.005267995875328779, | |
| "eval_srl_runtime": 17.5559, | |
| "eval_srl_samples_per_second": 129.301, | |
| "eval_srl_steps_per_second": 16.177, | |
| "step": 5396 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_ner_loss": 0.0010794103145599365, | |
| "eval_ner_runtime": 17.5581, | |
| "eval_ner_samples_per_second": 129.285, | |
| "eval_ner_steps_per_second": 16.175, | |
| "step": 5396 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 0.34328535199165344, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0036, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_srl_loss": 0.005443137139081955, | |
| "eval_srl_runtime": 17.5496, | |
| "eval_srl_samples_per_second": 129.348, | |
| "eval_srl_steps_per_second": 16.183, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_ner_loss": 0.0007795258425176144, | |
| "eval_ner_runtime": 17.557, | |
| "eval_ner_samples_per_second": 129.293, | |
| "eval_ner_steps_per_second": 16.176, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 0.2540510594844818, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0032, | |
| "step": 5964 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_srl_loss": 0.00396768469363451, | |
| "eval_srl_runtime": 17.5412, | |
| "eval_srl_samples_per_second": 129.41, | |
| "eval_srl_steps_per_second": 16.19, | |
| "step": 5964 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_ner_loss": 0.0007386294892057776, | |
| "eval_ner_runtime": 17.5472, | |
| "eval_ner_samples_per_second": 129.365, | |
| "eval_ner_steps_per_second": 16.185, | |
| "step": 5964 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 0.1889248788356781, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0029, | |
| "step": 6248 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_srl_loss": 0.004316597245633602, | |
| "eval_srl_runtime": 17.5421, | |
| "eval_srl_samples_per_second": 129.403, | |
| "eval_srl_steps_per_second": 16.19, | |
| "step": 6248 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_ner_loss": 0.0006567554082721472, | |
| "eval_ner_runtime": 17.5545, | |
| "eval_ner_samples_per_second": 129.311, | |
| "eval_ner_steps_per_second": 16.178, | |
| "step": 6248 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 0.13729548454284668, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.0028, | |
| "step": 6532 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_srl_loss": 0.0036644088104367256, | |
| "eval_srl_runtime": 17.5517, | |
| "eval_srl_samples_per_second": 129.332, | |
| "eval_srl_steps_per_second": 16.181, | |
| "step": 6532 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_ner_loss": 0.0006873765378259122, | |
| "eval_ner_runtime": 17.5531, | |
| "eval_ner_samples_per_second": 129.322, | |
| "eval_ner_steps_per_second": 16.179, | |
| "step": 6532 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 0.21299496293067932, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0023, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_srl_loss": 0.0038258766289800406, | |
| "eval_srl_runtime": 17.5464, | |
| "eval_srl_samples_per_second": 129.371, | |
| "eval_srl_steps_per_second": 16.186, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_ner_loss": 0.0005753676523454487, | |
| "eval_ner_runtime": 17.5627, | |
| "eval_ner_samples_per_second": 129.251, | |
| "eval_ner_steps_per_second": 16.171, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.4840211272239685, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0022, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_srl_loss": 0.0032577498350292444, | |
| "eval_srl_runtime": 17.5469, | |
| "eval_srl_samples_per_second": 129.368, | |
| "eval_srl_steps_per_second": 16.185, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_ner_loss": 0.0005282628699205816, | |
| "eval_ner_runtime": 17.5557, | |
| "eval_ner_samples_per_second": 129.303, | |
| "eval_ner_steps_per_second": 16.177, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 0.1340308040380478, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.002, | |
| "step": 7384 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_srl_loss": 0.0033599596936255693, | |
| "eval_srl_runtime": 17.5361, | |
| "eval_srl_samples_per_second": 129.447, | |
| "eval_srl_steps_per_second": 16.195, | |
| "step": 7384 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_ner_loss": 0.0004921465297229588, | |
| "eval_ner_runtime": 17.5484, | |
| "eval_ner_samples_per_second": 129.356, | |
| "eval_ner_steps_per_second": 16.184, | |
| "step": 7384 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 0.1986730843782425, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0019, | |
| "step": 7668 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_srl_loss": 0.003047993639484048, | |
| "eval_srl_runtime": 17.5511, | |
| "eval_srl_samples_per_second": 129.337, | |
| "eval_srl_steps_per_second": 16.181, | |
| "step": 7668 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_ner_loss": 0.0004909657291136682, | |
| "eval_ner_runtime": 17.5556, | |
| "eval_ner_samples_per_second": 129.303, | |
| "eval_ner_steps_per_second": 16.177, | |
| "step": 7668 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 0.27138885855674744, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0018, | |
| "step": 7952 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_srl_loss": 0.002978708129376173, | |
| "eval_srl_runtime": 17.5413, | |
| "eval_srl_samples_per_second": 129.409, | |
| "eval_srl_steps_per_second": 16.19, | |
| "step": 7952 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_ner_loss": 0.00047884471132420003, | |
| "eval_ner_runtime": 17.5561, | |
| "eval_ner_samples_per_second": 129.3, | |
| "eval_ner_steps_per_second": 16.177, | |
| "step": 7952 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 0.20234395563602448, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0017, | |
| "step": 8236 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_srl_loss": 0.0029575293883681297, | |
| "eval_srl_runtime": 17.5499, | |
| "eval_srl_samples_per_second": 129.346, | |
| "eval_srl_steps_per_second": 16.182, | |
| "step": 8236 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_ner_loss": 0.00046766019659116864, | |
| "eval_ner_runtime": 17.5592, | |
| "eval_ner_samples_per_second": 129.277, | |
| "eval_ner_steps_per_second": 16.174, | |
| "step": 8236 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 0.014138607308268547, | |
| "learning_rate": 0.0, | |
| "loss": 0.0016, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_srl_loss": 0.0029636274557560682, | |
| "eval_srl_runtime": 17.5359, | |
| "eval_srl_samples_per_second": 129.448, | |
| "eval_srl_steps_per_second": 16.195, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_ner_loss": 0.0004607184964697808, | |
| "eval_ner_runtime": 17.5477, | |
| "eval_ner_samples_per_second": 129.362, | |
| "eval_ner_steps_per_second": 16.184, | |
| "step": 8520 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 8520, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.58531004841984e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |