MFT-XLM-R / trainer_state.json
HannahGoossens's picture
Upload 8 files
1ec62af verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 30.0,
"eval_steps": 500,
"global_step": 8520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.5159730315208435,
"learning_rate": 4.8333333333333334e-05,
"loss": 0.5703,
"step": 284
},
{
"epoch": 1.0,
"eval_srl_loss": 0.2537473142147064,
"eval_srl_runtime": 17.5701,
"eval_srl_samples_per_second": 129.197,
"eval_srl_steps_per_second": 16.164,
"step": 284
},
{
"epoch": 1.0,
"eval_ner_loss": 0.31049808859825134,
"eval_ner_runtime": 17.5692,
"eval_ner_samples_per_second": 129.203,
"eval_ner_steps_per_second": 16.165,
"step": 284
},
{
"epoch": 2.0,
"grad_norm": 0.9760362505912781,
"learning_rate": 4.666666666666667e-05,
"loss": 0.1433,
"step": 568
},
{
"epoch": 2.0,
"eval_srl_loss": 0.17236216366291046,
"eval_srl_runtime": 17.5646,
"eval_srl_samples_per_second": 129.237,
"eval_srl_steps_per_second": 16.169,
"step": 568
},
{
"epoch": 2.0,
"eval_ner_loss": 0.037721339613199234,
"eval_ner_runtime": 17.5653,
"eval_ner_samples_per_second": 129.232,
"eval_ner_steps_per_second": 16.168,
"step": 568
},
{
"epoch": 3.0,
"grad_norm": 1.6804832220077515,
"learning_rate": 4.5e-05,
"loss": 0.0805,
"step": 852
},
{
"epoch": 3.0,
"eval_srl_loss": 0.15186269581317902,
"eval_srl_runtime": 17.7859,
"eval_srl_samples_per_second": 127.629,
"eval_srl_steps_per_second": 15.968,
"step": 852
},
{
"epoch": 3.0,
"eval_ner_loss": 0.020406559109687805,
"eval_ner_runtime": 17.7913,
"eval_ner_samples_per_second": 127.591,
"eval_ner_steps_per_second": 15.963,
"step": 852
},
{
"epoch": 4.0,
"grad_norm": 0.3662571907043457,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.0581,
"step": 1136
},
{
"epoch": 4.0,
"eval_srl_loss": 0.0956922098994255,
"eval_srl_runtime": 17.7671,
"eval_srl_samples_per_second": 127.764,
"eval_srl_steps_per_second": 15.985,
"step": 1136
},
{
"epoch": 4.0,
"eval_ner_loss": 0.008525359444320202,
"eval_ner_runtime": 17.7782,
"eval_ner_samples_per_second": 127.684,
"eval_ner_steps_per_second": 15.975,
"step": 1136
},
{
"epoch": 5.0,
"grad_norm": 0.767581582069397,
"learning_rate": 4.166666666666667e-05,
"loss": 0.0467,
"step": 1420
},
{
"epoch": 5.0,
"eval_srl_loss": 0.08026313781738281,
"eval_srl_runtime": 17.7961,
"eval_srl_samples_per_second": 127.556,
"eval_srl_steps_per_second": 15.959,
"step": 1420
},
{
"epoch": 5.0,
"eval_ner_loss": 0.005362341180443764,
"eval_ner_runtime": 17.7935,
"eval_ner_samples_per_second": 127.574,
"eval_ner_steps_per_second": 15.961,
"step": 1420
},
{
"epoch": 6.0,
"grad_norm": 0.8563648462295532,
"learning_rate": 4e-05,
"loss": 0.0377,
"step": 1704
},
{
"epoch": 6.0,
"eval_srl_loss": 0.062152598053216934,
"eval_srl_runtime": 17.7659,
"eval_srl_samples_per_second": 127.773,
"eval_srl_steps_per_second": 15.986,
"step": 1704
},
{
"epoch": 6.0,
"eval_ner_loss": 0.0037422562018036842,
"eval_ner_runtime": 17.7731,
"eval_ner_samples_per_second": 127.721,
"eval_ner_steps_per_second": 15.979,
"step": 1704
},
{
"epoch": 7.0,
"grad_norm": 0.06660082936286926,
"learning_rate": 3.8333333333333334e-05,
"loss": 0.0306,
"step": 1988
},
{
"epoch": 7.0,
"eval_srl_loss": 0.04806334525346756,
"eval_srl_runtime": 17.7866,
"eval_srl_samples_per_second": 127.624,
"eval_srl_steps_per_second": 15.967,
"step": 1988
},
{
"epoch": 7.0,
"eval_ner_loss": 0.0034043670166283846,
"eval_ner_runtime": 17.7804,
"eval_ner_samples_per_second": 127.669,
"eval_ner_steps_per_second": 15.973,
"step": 1988
},
{
"epoch": 8.0,
"grad_norm": 0.520635187625885,
"learning_rate": 3.6666666666666666e-05,
"loss": 0.0253,
"step": 2272
},
{
"epoch": 8.0,
"eval_srl_loss": 0.0377148799598217,
"eval_srl_runtime": 17.7743,
"eval_srl_samples_per_second": 127.713,
"eval_srl_steps_per_second": 15.978,
"step": 2272
},
{
"epoch": 8.0,
"eval_ner_loss": 0.0033542541787028313,
"eval_ner_runtime": 17.7809,
"eval_ner_samples_per_second": 127.665,
"eval_ner_steps_per_second": 15.972,
"step": 2272
},
{
"epoch": 9.0,
"grad_norm": 1.5942113399505615,
"learning_rate": 3.5e-05,
"loss": 0.0207,
"step": 2556
},
{
"epoch": 9.0,
"eval_srl_loss": 0.036016304045915604,
"eval_srl_runtime": 17.7903,
"eval_srl_samples_per_second": 127.598,
"eval_srl_steps_per_second": 15.964,
"step": 2556
},
{
"epoch": 9.0,
"eval_ner_loss": 0.0024884804151952267,
"eval_ner_runtime": 17.7971,
"eval_ner_samples_per_second": 127.549,
"eval_ner_steps_per_second": 15.958,
"step": 2556
},
{
"epoch": 10.0,
"grad_norm": 0.18361079692840576,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0173,
"step": 2840
},
{
"epoch": 10.0,
"eval_srl_loss": 0.024318644776940346,
"eval_srl_runtime": 17.7929,
"eval_srl_samples_per_second": 127.579,
"eval_srl_steps_per_second": 15.961,
"step": 2840
},
{
"epoch": 10.0,
"eval_ner_loss": 0.0017394711030647159,
"eval_ner_runtime": 17.8065,
"eval_ner_samples_per_second": 127.482,
"eval_ner_steps_per_second": 15.949,
"step": 2840
},
{
"epoch": 11.0,
"grad_norm": 0.1815880984067917,
"learning_rate": 3.1666666666666666e-05,
"loss": 0.0143,
"step": 3124
},
{
"epoch": 11.0,
"eval_srl_loss": 0.023377003148198128,
"eval_srl_runtime": 17.7486,
"eval_srl_samples_per_second": 127.897,
"eval_srl_steps_per_second": 16.001,
"step": 3124
},
{
"epoch": 11.0,
"eval_ner_loss": 0.0016144798137247562,
"eval_ner_runtime": 17.7863,
"eval_ner_samples_per_second": 127.626,
"eval_ner_steps_per_second": 15.967,
"step": 3124
},
{
"epoch": 12.0,
"grad_norm": 0.9344964027404785,
"learning_rate": 3e-05,
"loss": 0.0117,
"step": 3408
},
{
"epoch": 12.0,
"eval_srl_loss": 0.016138222068548203,
"eval_srl_runtime": 17.5666,
"eval_srl_samples_per_second": 129.222,
"eval_srl_steps_per_second": 16.167,
"step": 3408
},
{
"epoch": 12.0,
"eval_ner_loss": 0.0013956058537587523,
"eval_ner_runtime": 17.5632,
"eval_ner_samples_per_second": 129.248,
"eval_ner_steps_per_second": 16.17,
"step": 3408
},
{
"epoch": 13.0,
"grad_norm": 0.962304949760437,
"learning_rate": 2.8333333333333335e-05,
"loss": 0.0105,
"step": 3692
},
{
"epoch": 13.0,
"eval_srl_loss": 0.01380192395299673,
"eval_srl_runtime": 17.569,
"eval_srl_samples_per_second": 129.205,
"eval_srl_steps_per_second": 16.165,
"step": 3692
},
{
"epoch": 13.0,
"eval_ner_loss": 0.0013204001588746905,
"eval_ner_runtime": 17.5704,
"eval_ner_samples_per_second": 129.195,
"eval_ner_steps_per_second": 16.164,
"step": 3692
},
{
"epoch": 14.0,
"grad_norm": 0.45638516545295715,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.0089,
"step": 3976
},
{
"epoch": 14.0,
"eval_srl_loss": 0.01093687117099762,
"eval_srl_runtime": 17.57,
"eval_srl_samples_per_second": 129.197,
"eval_srl_steps_per_second": 16.164,
"step": 3976
},
{
"epoch": 14.0,
"eval_ner_loss": 0.0010757059790194035,
"eval_ner_runtime": 17.5648,
"eval_ner_samples_per_second": 129.236,
"eval_ner_steps_per_second": 16.169,
"step": 3976
},
{
"epoch": 15.0,
"grad_norm": 0.3635445833206177,
"learning_rate": 2.5e-05,
"loss": 0.0079,
"step": 4260
},
{
"epoch": 15.0,
"eval_srl_loss": 0.011952933855354786,
"eval_srl_runtime": 17.5622,
"eval_srl_samples_per_second": 129.255,
"eval_srl_steps_per_second": 16.171,
"step": 4260
},
{
"epoch": 15.0,
"eval_ner_loss": 0.001067746547050774,
"eval_ner_runtime": 17.5636,
"eval_ner_samples_per_second": 129.245,
"eval_ner_steps_per_second": 16.17,
"step": 4260
},
{
"epoch": 16.0,
"grad_norm": 0.11766979098320007,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.0068,
"step": 4544
},
{
"epoch": 16.0,
"eval_srl_loss": 0.008437857031822205,
"eval_srl_runtime": 17.5643,
"eval_srl_samples_per_second": 129.239,
"eval_srl_steps_per_second": 16.169,
"step": 4544
},
{
"epoch": 16.0,
"eval_ner_loss": 0.0009184295195154846,
"eval_ner_runtime": 17.5679,
"eval_ner_samples_per_second": 129.213,
"eval_ner_steps_per_second": 16.166,
"step": 4544
},
{
"epoch": 17.0,
"grad_norm": 0.20853333175182343,
"learning_rate": 2.1666666666666667e-05,
"loss": 0.0062,
"step": 4828
},
{
"epoch": 17.0,
"eval_srl_loss": 0.007432564627379179,
"eval_srl_runtime": 17.5647,
"eval_srl_samples_per_second": 129.236,
"eval_srl_steps_per_second": 16.169,
"step": 4828
},
{
"epoch": 17.0,
"eval_ner_loss": 0.0009312801994383335,
"eval_ner_runtime": 17.5704,
"eval_ner_samples_per_second": 129.194,
"eval_ner_steps_per_second": 16.164,
"step": 4828
},
{
"epoch": 18.0,
"grad_norm": 0.10051178932189941,
"learning_rate": 2e-05,
"loss": 0.005,
"step": 5112
},
{
"epoch": 18.0,
"eval_srl_loss": 0.006476872134953737,
"eval_srl_runtime": 17.564,
"eval_srl_samples_per_second": 129.241,
"eval_srl_steps_per_second": 16.169,
"step": 5112
},
{
"epoch": 18.0,
"eval_ner_loss": 0.0008163392194546759,
"eval_ner_runtime": 17.5706,
"eval_ner_samples_per_second": 129.193,
"eval_ner_steps_per_second": 16.163,
"step": 5112
},
{
"epoch": 19.0,
"grad_norm": 0.3805777132511139,
"learning_rate": 1.8333333333333333e-05,
"loss": 0.0046,
"step": 5396
},
{
"epoch": 19.0,
"eval_srl_loss": 0.005975746084004641,
"eval_srl_runtime": 17.5674,
"eval_srl_samples_per_second": 129.216,
"eval_srl_steps_per_second": 16.166,
"step": 5396
},
{
"epoch": 19.0,
"eval_ner_loss": 0.001090305158868432,
"eval_ner_runtime": 17.5791,
"eval_ner_samples_per_second": 129.13,
"eval_ner_steps_per_second": 16.156,
"step": 5396
},
{
"epoch": 20.0,
"grad_norm": 0.2945084273815155,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0043,
"step": 5680
},
{
"epoch": 20.0,
"eval_srl_loss": 0.005685478448867798,
"eval_srl_runtime": 17.5743,
"eval_srl_samples_per_second": 129.166,
"eval_srl_steps_per_second": 16.16,
"step": 5680
},
{
"epoch": 20.0,
"eval_ner_loss": 0.0006339964456856251,
"eval_ner_runtime": 17.5756,
"eval_ner_samples_per_second": 129.156,
"eval_ner_steps_per_second": 16.159,
"step": 5680
},
{
"epoch": 21.0,
"grad_norm": 0.39416804909706116,
"learning_rate": 1.5e-05,
"loss": 0.0038,
"step": 5964
},
{
"epoch": 21.0,
"eval_srl_loss": 0.005070784129202366,
"eval_srl_runtime": 17.5904,
"eval_srl_samples_per_second": 129.048,
"eval_srl_steps_per_second": 16.145,
"step": 5964
},
{
"epoch": 21.0,
"eval_ner_loss": 0.0006808372563682497,
"eval_ner_runtime": 17.5977,
"eval_ner_samples_per_second": 128.994,
"eval_ner_steps_per_second": 16.138,
"step": 5964
},
{
"epoch": 22.0,
"grad_norm": 0.39179959893226624,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0035,
"step": 6248
},
{
"epoch": 22.0,
"eval_srl_loss": 0.00477498397231102,
"eval_srl_runtime": 17.5631,
"eval_srl_samples_per_second": 129.248,
"eval_srl_steps_per_second": 16.17,
"step": 6248
},
{
"epoch": 22.0,
"eval_ner_loss": 0.0005992591031827033,
"eval_ner_runtime": 17.5704,
"eval_ner_samples_per_second": 129.195,
"eval_ner_steps_per_second": 16.164,
"step": 6248
},
{
"epoch": 23.0,
"grad_norm": 0.1726011037826538,
"learning_rate": 1.1666666666666668e-05,
"loss": 0.0031,
"step": 6532
},
{
"epoch": 23.0,
"eval_srl_loss": 0.004426795057952404,
"eval_srl_runtime": 17.5671,
"eval_srl_samples_per_second": 129.219,
"eval_srl_steps_per_second": 16.167,
"step": 6532
},
{
"epoch": 23.0,
"eval_ner_loss": 0.0005771311116404831,
"eval_ner_runtime": 17.5705,
"eval_ner_samples_per_second": 129.194,
"eval_ner_steps_per_second": 16.163,
"step": 6532
},
{
"epoch": 24.0,
"grad_norm": 0.21839284896850586,
"learning_rate": 1e-05,
"loss": 0.0029,
"step": 6816
},
{
"epoch": 24.0,
"eval_srl_loss": 0.00477210059762001,
"eval_srl_runtime": 17.5725,
"eval_srl_samples_per_second": 129.179,
"eval_srl_steps_per_second": 16.162,
"step": 6816
},
{
"epoch": 24.0,
"eval_ner_loss": 0.0006085642380639911,
"eval_ner_runtime": 17.5728,
"eval_ner_samples_per_second": 129.177,
"eval_ner_steps_per_second": 16.161,
"step": 6816
},
{
"epoch": 25.0,
"grad_norm": 0.31567102670669556,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0027,
"step": 7100
},
{
"epoch": 25.0,
"eval_srl_loss": 0.003935088403522968,
"eval_srl_runtime": 17.5754,
"eval_srl_samples_per_second": 129.158,
"eval_srl_steps_per_second": 16.159,
"step": 7100
},
{
"epoch": 25.0,
"eval_ner_loss": 0.000611834810115397,
"eval_ner_runtime": 17.5758,
"eval_ner_samples_per_second": 129.155,
"eval_ner_steps_per_second": 16.159,
"step": 7100
},
{
"epoch": 26.0,
"grad_norm": 0.14785556495189667,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0026,
"step": 7384
},
{
"epoch": 26.0,
"eval_srl_loss": 0.00404845317825675,
"eval_srl_runtime": 17.5681,
"eval_srl_samples_per_second": 129.211,
"eval_srl_steps_per_second": 16.166,
"step": 7384
},
{
"epoch": 26.0,
"eval_ner_loss": 0.0005031172186136246,
"eval_ner_runtime": 17.5675,
"eval_ner_samples_per_second": 129.216,
"eval_ner_steps_per_second": 16.166,
"step": 7384
},
{
"epoch": 27.0,
"grad_norm": 0.22755762934684753,
"learning_rate": 5e-06,
"loss": 0.0025,
"step": 7668
},
{
"epoch": 27.0,
"eval_srl_loss": 0.0036419888492673635,
"eval_srl_runtime": 17.5746,
"eval_srl_samples_per_second": 129.164,
"eval_srl_steps_per_second": 16.16,
"step": 7668
},
{
"epoch": 27.0,
"eval_ner_loss": 0.0004956649499945343,
"eval_ner_runtime": 17.5715,
"eval_ner_samples_per_second": 129.186,
"eval_ner_steps_per_second": 16.163,
"step": 7668
},
{
"epoch": 28.0,
"grad_norm": 0.03562552109360695,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0023,
"step": 7952
},
{
"epoch": 28.0,
"eval_srl_loss": 0.0035714716650545597,
"eval_srl_runtime": 17.5746,
"eval_srl_samples_per_second": 129.164,
"eval_srl_steps_per_second": 16.16,
"step": 7952
},
{
"epoch": 28.0,
"eval_ner_loss": 0.0004828003002330661,
"eval_ner_runtime": 17.5755,
"eval_ner_samples_per_second": 129.157,
"eval_ner_steps_per_second": 16.159,
"step": 7952
},
{
"epoch": 29.0,
"grad_norm": 0.1542978137731552,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0021,
"step": 8236
},
{
"epoch": 29.0,
"eval_srl_loss": 0.003562468569725752,
"eval_srl_runtime": 17.6836,
"eval_srl_samples_per_second": 128.368,
"eval_srl_steps_per_second": 16.06,
"step": 8236
},
{
"epoch": 29.0,
"eval_ner_loss": 0.0004974309122189879,
"eval_ner_runtime": 17.5765,
"eval_ner_samples_per_second": 129.149,
"eval_ner_steps_per_second": 16.158,
"step": 8236
},
{
"epoch": 30.0,
"grad_norm": 0.015293457545340061,
"learning_rate": 0.0,
"loss": 0.002,
"step": 8520
},
{
"epoch": 30.0,
"eval_srl_loss": 0.003541701938956976,
"eval_srl_runtime": 17.5723,
"eval_srl_samples_per_second": 129.181,
"eval_srl_steps_per_second": 16.162,
"step": 8520
},
{
"epoch": 30.0,
"eval_ner_loss": 0.0004944342072121799,
"eval_ner_runtime": 17.5784,
"eval_ner_samples_per_second": 129.136,
"eval_ner_steps_per_second": 16.156,
"step": 8520
}
],
"logging_steps": 500,
"max_steps": 8520,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.58531004841984e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}