MFT-mBERT / trainer_state.json
HannahGoossens's picture
Upload 10 files
711f7fa verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 8520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.5455477237701416,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.4048,
"step": 284
},
{
"epoch": 1.0,
"eval_srl_loss": 0.21862658858299255,
"eval_srl_runtime": 17.5334,
"eval_srl_samples_per_second": 129.467,
"eval_srl_steps_per_second": 16.198,
"step": 284
},
{
"epoch": 1.0,
"eval_ner_loss": 0.12236960977315903,
"eval_ner_runtime": 17.5432,
"eval_ner_samples_per_second": 129.395,
"eval_ner_steps_per_second": 16.189,
"step": 284
},
{
"epoch": 2.0,
"grad_norm": 1.0813124179840088,
"learning_rate": 4.666666666666667e-05,
"loss": 0.1086,
"step": 568
},
{
"epoch": 2.0,
"eval_srl_loss": 0.15843605995178223,
"eval_srl_runtime": 17.5383,
"eval_srl_samples_per_second": 129.431,
"eval_srl_steps_per_second": 16.193,
"step": 568
},
{
"epoch": 2.0,
"eval_ner_loss": 0.020286982879042625,
"eval_ner_runtime": 17.5444,
"eval_ner_samples_per_second": 129.386,
"eval_ner_steps_per_second": 16.188,
"step": 568
},
{
"epoch": 3.0,
"grad_norm": 1.0206300020217896,
"learning_rate": 4.5e-05,
"loss": 0.0721,
"step": 852
},
{
"epoch": 3.0,
"eval_srl_loss": 0.115463025867939,
"eval_srl_runtime": 17.55,
"eval_srl_samples_per_second": 129.345,
"eval_srl_steps_per_second": 16.182,
"step": 852
},
{
"epoch": 3.0,
"eval_ner_loss": 0.012386705726385117,
"eval_ner_runtime": 17.5609,
"eval_ner_samples_per_second": 129.264,
"eval_ner_steps_per_second": 16.172,
"step": 852
},
{
"epoch": 4.0,
"grad_norm": 0.18648108839988708,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.0537,
"step": 1136
},
{
"epoch": 4.0,
"eval_srl_loss": 0.08971945941448212,
"eval_srl_runtime": 17.5456,
"eval_srl_samples_per_second": 129.377,
"eval_srl_steps_per_second": 16.186,
"step": 1136
},
{
"epoch": 4.0,
"eval_ner_loss": 0.005814700853079557,
"eval_ner_runtime": 17.5524,
"eval_ner_samples_per_second": 129.327,
"eval_ner_steps_per_second": 16.18,
"step": 1136
},
{
"epoch": 5.0,
"grad_norm": 1.4305367469787598,
"learning_rate": 4.166666666666667e-05,
"loss": 0.0426,
"step": 1420
},
{
"epoch": 5.0,
"eval_srl_loss": 0.06870203465223312,
"eval_srl_runtime": 17.536,
"eval_srl_samples_per_second": 129.448,
"eval_srl_steps_per_second": 16.195,
"step": 1420
},
{
"epoch": 5.0,
"eval_ner_loss": 0.004500186070799828,
"eval_ner_runtime": 17.5488,
"eval_ner_samples_per_second": 129.353,
"eval_ner_steps_per_second": 16.183,
"step": 1420
},
{
"epoch": 6.0,
"grad_norm": 0.16283409297466278,
"learning_rate": 4e-05,
"loss": 0.0339,
"step": 1704
},
{
"epoch": 6.0,
"eval_srl_loss": 0.052619677037000656,
"eval_srl_runtime": 17.5537,
"eval_srl_samples_per_second": 129.317,
"eval_srl_steps_per_second": 16.179,
"step": 1704
},
{
"epoch": 6.0,
"eval_ner_loss": 0.0033325471449643373,
"eval_ner_runtime": 17.574,
"eval_ner_samples_per_second": 129.168,
"eval_ner_steps_per_second": 16.16,
"step": 1704
},
{
"epoch": 7.0,
"grad_norm": 0.07186176627874374,
"learning_rate": 3.8333333333333334e-05,
"loss": 0.0275,
"step": 1988
},
{
"epoch": 7.0,
"eval_srl_loss": 0.0378902293741703,
"eval_srl_runtime": 17.5438,
"eval_srl_samples_per_second": 129.391,
"eval_srl_steps_per_second": 16.188,
"step": 1988
},
{
"epoch": 7.0,
"eval_ner_loss": 0.0033688645344227552,
"eval_ner_runtime": 17.5486,
"eval_ner_samples_per_second": 129.355,
"eval_ner_steps_per_second": 16.184,
"step": 1988
},
{
"epoch": 8.0,
"grad_norm": 0.3217866122722626,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.0227,
"step": 2272
},
{
"epoch": 8.0,
"eval_srl_loss": 0.03359847143292427,
"eval_srl_runtime": 17.5486,
"eval_srl_samples_per_second": 129.355,
"eval_srl_steps_per_second": 16.184,
"step": 2272
},
{
"epoch": 8.0,
"eval_ner_loss": 0.003445760579779744,
"eval_ner_runtime": 17.5536,
"eval_ner_samples_per_second": 129.318,
"eval_ner_steps_per_second": 16.179,
"step": 2272
},
{
"epoch": 9.0,
"grad_norm": 0.697796642780304,
"learning_rate": 3.5e-05,
"loss": 0.0192,
"step": 2556
},
{
"epoch": 9.0,
"eval_srl_loss": 0.02817477658390999,
"eval_srl_runtime": 17.5377,
"eval_srl_samples_per_second": 129.435,
"eval_srl_steps_per_second": 16.194,
"step": 2556
},
{
"epoch": 9.0,
"eval_ner_loss": 0.0022896770387887955,
"eval_ner_runtime": 17.5429,
"eval_ner_samples_per_second": 129.397,
"eval_ner_steps_per_second": 16.189,
"step": 2556
},
{
"epoch": 10.0,
"grad_norm": 0.3553692698478699,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0158,
"step": 2840
},
{
"epoch": 10.0,
"eval_srl_loss": 0.02144758589565754,
"eval_srl_runtime": 17.5443,
"eval_srl_samples_per_second": 129.387,
"eval_srl_steps_per_second": 16.188,
"step": 2840
},
{
"epoch": 10.0,
"eval_ner_loss": 0.0018517740536481142,
"eval_ner_runtime": 17.554,
"eval_ner_samples_per_second": 129.316,
"eval_ner_steps_per_second": 16.179,
"step": 2840
},
{
"epoch": 11.0,
"grad_norm": 0.35990962386131287,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.013,
"step": 3124
},
{
"epoch": 11.0,
"eval_srl_loss": 0.018339334055781364,
"eval_srl_runtime": 17.5523,
"eval_srl_samples_per_second": 129.327,
"eval_srl_steps_per_second": 16.18,
"step": 3124
},
{
"epoch": 11.0,
"eval_ner_loss": 0.001874973881058395,
"eval_ner_runtime": 17.5544,
"eval_ner_samples_per_second": 129.313,
"eval_ner_steps_per_second": 16.178,
"step": 3124
},
{
"epoch": 12.0,
"grad_norm": 0.5990183353424072,
"learning_rate": 3e-05,
"loss": 0.0105,
"step": 3408
},
{
"epoch": 12.0,
"eval_srl_loss": 0.013035450130701065,
"eval_srl_runtime": 17.5468,
"eval_srl_samples_per_second": 129.368,
"eval_srl_steps_per_second": 16.185,
"step": 3408
},
{
"epoch": 12.0,
"eval_ner_loss": 0.0015207786345854402,
"eval_ner_runtime": 17.562,
"eval_ner_samples_per_second": 129.257,
"eval_ner_steps_per_second": 16.171,
"step": 3408
},
{
"epoch": 13.0,
"grad_norm": 0.9393383264541626,
"learning_rate": 2.8333333333333335e-05,
"loss": 0.0093,
"step": 3692
},
{
"epoch": 13.0,
"eval_srl_loss": 0.010261264629662037,
"eval_srl_runtime": 17.5461,
"eval_srl_samples_per_second": 129.373,
"eval_srl_steps_per_second": 16.186,
"step": 3692
},
{
"epoch": 13.0,
"eval_ner_loss": 0.0013053927104920149,
"eval_ner_runtime": 17.5578,
"eval_ner_samples_per_second": 129.287,
"eval_ner_steps_per_second": 16.175,
"step": 3692
},
{
"epoch": 14.0,
"grad_norm": 0.442676305770874,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.0079,
"step": 3976
},
{
"epoch": 14.0,
"eval_srl_loss": 0.009250137023627758,
"eval_srl_runtime": 17.548,
"eval_srl_samples_per_second": 129.359,
"eval_srl_steps_per_second": 16.184,
"step": 3976
},
{
"epoch": 14.0,
"eval_ner_loss": 0.0014766417443752289,
"eval_ner_runtime": 17.5576,
"eval_ner_samples_per_second": 129.288,
"eval_ner_steps_per_second": 16.175,
"step": 3976
},
{
"epoch": 15.0,
"grad_norm": 0.3012068569660187,
"learning_rate": 2.5e-05,
"loss": 0.0066,
"step": 4260
},
{
"epoch": 15.0,
"eval_srl_loss": 0.008085786364972591,
"eval_srl_runtime": 17.5552,
"eval_srl_samples_per_second": 129.306,
"eval_srl_steps_per_second": 16.178,
"step": 4260
},
{
"epoch": 15.0,
"eval_ner_loss": 0.0010674420045688748,
"eval_ner_runtime": 17.5619,
"eval_ner_samples_per_second": 129.257,
"eval_ner_steps_per_second": 16.171,
"step": 4260
},
{
"epoch": 16.0,
"grad_norm": 0.19097836315631866,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.0056,
"step": 4544
},
{
"epoch": 16.0,
"eval_srl_loss": 0.006198651157319546,
"eval_srl_runtime": 17.5404,
"eval_srl_samples_per_second": 129.416,
"eval_srl_steps_per_second": 16.191,
"step": 4544
},
{
"epoch": 16.0,
"eval_ner_loss": 0.0009344189311377704,
"eval_ner_runtime": 17.5502,
"eval_ner_samples_per_second": 129.343,
"eval_ner_steps_per_second": 16.182,
"step": 4544
},
{
"epoch": 17.0,
"grad_norm": 0.15881049633026123,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.0052,
"step": 4828
},
{
"epoch": 17.0,
"eval_srl_loss": 0.00588291697204113,
"eval_srl_runtime": 17.5504,
"eval_srl_samples_per_second": 129.342,
"eval_srl_steps_per_second": 16.182,
"step": 4828
},
{
"epoch": 17.0,
"eval_ner_loss": 0.0009248966816812754,
"eval_ner_runtime": 17.555,
"eval_ner_samples_per_second": 129.308,
"eval_ner_steps_per_second": 16.178,
"step": 4828
},
{
"epoch": 18.0,
"grad_norm": 0.054942790418863297,
"learning_rate": 2e-05,
"loss": 0.0045,
"step": 5112
},
{
"epoch": 18.0,
"eval_srl_loss": 0.005140448454767466,
"eval_srl_runtime": 17.5307,
"eval_srl_samples_per_second": 129.487,
"eval_srl_steps_per_second": 16.2,
"step": 5112
},
{
"epoch": 18.0,
"eval_ner_loss": 0.0009396191453561187,
"eval_ner_runtime": 17.532,
"eval_ner_samples_per_second": 129.477,
"eval_ner_steps_per_second": 16.199,
"step": 5112
},
{
"epoch": 19.0,
"grad_norm": 0.02979622595012188,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.0038,
"step": 5396
},
{
"epoch": 19.0,
"eval_srl_loss": 0.005267995875328779,
"eval_srl_runtime": 17.5559,
"eval_srl_samples_per_second": 129.301,
"eval_srl_steps_per_second": 16.177,
"step": 5396
},
{
"epoch": 19.0,
"eval_ner_loss": 0.0010794103145599365,
"eval_ner_runtime": 17.5581,
"eval_ner_samples_per_second": 129.285,
"eval_ner_steps_per_second": 16.175,
"step": 5396
},
{
"epoch": 20.0,
"grad_norm": 0.34328535199165344,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0036,
"step": 5680
},
{
"epoch": 20.0,
"eval_srl_loss": 0.005443137139081955,
"eval_srl_runtime": 17.5496,
"eval_srl_samples_per_second": 129.348,
"eval_srl_steps_per_second": 16.183,
"step": 5680
},
{
"epoch": 20.0,
"eval_ner_loss": 0.0007795258425176144,
"eval_ner_runtime": 17.557,
"eval_ner_samples_per_second": 129.293,
"eval_ner_steps_per_second": 16.176,
"step": 5680
},
{
"epoch": 21.0,
"grad_norm": 0.2540510594844818,
"learning_rate": 1.5e-05,
"loss": 0.0032,
"step": 5964
},
{
"epoch": 21.0,
"eval_srl_loss": 0.00396768469363451,
"eval_srl_runtime": 17.5412,
"eval_srl_samples_per_second": 129.41,
"eval_srl_steps_per_second": 16.19,
"step": 5964
},
{
"epoch": 21.0,
"eval_ner_loss": 0.0007386294892057776,
"eval_ner_runtime": 17.5472,
"eval_ner_samples_per_second": 129.365,
"eval_ner_steps_per_second": 16.185,
"step": 5964
},
{
"epoch": 22.0,
"grad_norm": 0.1889248788356781,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0029,
"step": 6248
},
{
"epoch": 22.0,
"eval_srl_loss": 0.004316597245633602,
"eval_srl_runtime": 17.5421,
"eval_srl_samples_per_second": 129.403,
"eval_srl_steps_per_second": 16.19,
"step": 6248
},
{
"epoch": 22.0,
"eval_ner_loss": 0.0006567554082721472,
"eval_ner_runtime": 17.5545,
"eval_ner_samples_per_second": 129.311,
"eval_ner_steps_per_second": 16.178,
"step": 6248
},
{
"epoch": 23.0,
"grad_norm": 0.13729548454284668,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.0028,
"step": 6532
},
{
"epoch": 23.0,
"eval_srl_loss": 0.0036644088104367256,
"eval_srl_runtime": 17.5517,
"eval_srl_samples_per_second": 129.332,
"eval_srl_steps_per_second": 16.181,
"step": 6532
},
{
"epoch": 23.0,
"eval_ner_loss": 0.0006873765378259122,
"eval_ner_runtime": 17.5531,
"eval_ner_samples_per_second": 129.322,
"eval_ner_steps_per_second": 16.179,
"step": 6532
},
{
"epoch": 24.0,
"grad_norm": 0.21299496293067932,
"learning_rate": 1e-05,
"loss": 0.0023,
"step": 6816
},
{
"epoch": 24.0,
"eval_srl_loss": 0.0038258766289800406,
"eval_srl_runtime": 17.5464,
"eval_srl_samples_per_second": 129.371,
"eval_srl_steps_per_second": 16.186,
"step": 6816
},
{
"epoch": 24.0,
"eval_ner_loss": 0.0005753676523454487,
"eval_ner_runtime": 17.5627,
"eval_ner_samples_per_second": 129.251,
"eval_ner_steps_per_second": 16.171,
"step": 6816
},
{
"epoch": 25.0,
"grad_norm": 0.4840211272239685,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0022,
"step": 7100
},
{
"epoch": 25.0,
"eval_srl_loss": 0.0032577498350292444,
"eval_srl_runtime": 17.5469,
"eval_srl_samples_per_second": 129.368,
"eval_srl_steps_per_second": 16.185,
"step": 7100
},
{
"epoch": 25.0,
"eval_ner_loss": 0.0005282628699205816,
"eval_ner_runtime": 17.5557,
"eval_ner_samples_per_second": 129.303,
"eval_ner_steps_per_second": 16.177,
"step": 7100
},
{
"epoch": 26.0,
"grad_norm": 0.1340308040380478,
"learning_rate": 6.666666666666667e-06,
"loss": 0.002,
"step": 7384
},
{
"epoch": 26.0,
"eval_srl_loss": 0.0033599596936255693,
"eval_srl_runtime": 17.5361,
"eval_srl_samples_per_second": 129.447,
"eval_srl_steps_per_second": 16.195,
"step": 7384
},
{
"epoch": 26.0,
"eval_ner_loss": 0.0004921465297229588,
"eval_ner_runtime": 17.5484,
"eval_ner_samples_per_second": 129.356,
"eval_ner_steps_per_second": 16.184,
"step": 7384
},
{
"epoch": 27.0,
"grad_norm": 0.1986730843782425,
"learning_rate": 5e-06,
"loss": 0.0019,
"step": 7668
},
{
"epoch": 27.0,
"eval_srl_loss": 0.003047993639484048,
"eval_srl_runtime": 17.5511,
"eval_srl_samples_per_second": 129.337,
"eval_srl_steps_per_second": 16.181,
"step": 7668
},
{
"epoch": 27.0,
"eval_ner_loss": 0.0004909657291136682,
"eval_ner_runtime": 17.5556,
"eval_ner_samples_per_second": 129.303,
"eval_ner_steps_per_second": 16.177,
"step": 7668
},
{
"epoch": 28.0,
"grad_norm": 0.27138885855674744,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0018,
"step": 7952
},
{
"epoch": 28.0,
"eval_srl_loss": 0.002978708129376173,
"eval_srl_runtime": 17.5413,
"eval_srl_samples_per_second": 129.409,
"eval_srl_steps_per_second": 16.19,
"step": 7952
},
{
"epoch": 28.0,
"eval_ner_loss": 0.00047884471132420003,
"eval_ner_runtime": 17.5561,
"eval_ner_samples_per_second": 129.3,
"eval_ner_steps_per_second": 16.177,
"step": 7952
},
{
"epoch": 29.0,
"grad_norm": 0.20234395563602448,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0017,
"step": 8236
},
{
"epoch": 29.0,
"eval_srl_loss": 0.0029575293883681297,
"eval_srl_runtime": 17.5499,
"eval_srl_samples_per_second": 129.346,
"eval_srl_steps_per_second": 16.182,
"step": 8236
},
{
"epoch": 29.0,
"eval_ner_loss": 0.00046766019659116864,
"eval_ner_runtime": 17.5592,
"eval_ner_samples_per_second": 129.277,
"eval_ner_steps_per_second": 16.174,
"step": 8236
},
{
"epoch": 30.0,
"grad_norm": 0.014138607308268547,
"learning_rate": 0.0,
"loss": 0.0016,
"step": 8520
},
{
"epoch": 30.0,
"eval_srl_loss": 0.0029636274557560682,
"eval_srl_runtime": 17.5359,
"eval_srl_samples_per_second": 129.448,
"eval_srl_steps_per_second": 16.195,
"step": 8520
},
{
"epoch": 30.0,
"eval_ner_loss": 0.0004607184964697808,
"eval_ner_runtime": 17.5477,
"eval_ner_samples_per_second": 129.362,
"eval_ner_steps_per_second": 16.184,
"step": 8520
}
],
"logging_steps": 500,
"max_steps": 8520,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.58531004841984e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}