sour-sarma / trainer_state.json
ivlcic's picture
Upload 7 files
0b355b4 verified
raw
history blame
16.9 kB
{
"best_global_step": 169584,
"best_metric": 0.9357306547788711,
"best_model_checkpoint": "/workspace/cannopy/result/train/ner/ner.mm-bert.b32.lr2e-05/checkpoint-169584",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 211980,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.8071305155754089,
"learning_rate": 1.933342768185678e-05,
"loss": 0.0833,
"step": 7066
},
{
"epoch": 1.0,
"eval_accuracy": 0.9816472726923888,
"eval_f1": 0.9143601801750758,
"eval_loss": 0.06737913936376572,
"eval_precision": 0.9067209043611152,
"eval_recall": 0.9221292740903273,
"eval_runtime": 63.6285,
"eval_samples_per_second": 444.094,
"eval_steps_per_second": 13.893,
"step": 7066
},
{
"epoch": 2.0,
"grad_norm": 1.6214423179626465,
"learning_rate": 1.8666761015190115e-05,
"loss": 0.041,
"step": 14132
},
{
"epoch": 2.0,
"eval_accuracy": 0.9832133401124307,
"eval_f1": 0.9247473576465666,
"eval_loss": 0.06235222890973091,
"eval_precision": 0.9153111601195607,
"eval_recall": 0.9343801426220516,
"eval_runtime": 26.8725,
"eval_samples_per_second": 1051.52,
"eval_steps_per_second": 32.896,
"step": 14132
},
{
"epoch": 3.0,
"grad_norm": 0.07880270481109619,
"learning_rate": 1.8000094348523447e-05,
"loss": 0.0238,
"step": 21198
},
{
"epoch": 3.0,
"eval_accuracy": 0.9830166823353351,
"eval_f1": 0.9234717685487634,
"eval_loss": 0.07555678486824036,
"eval_precision": 0.91971390356147,
"eval_recall": 0.9272604680928872,
"eval_runtime": 25.0859,
"eval_samples_per_second": 1126.41,
"eval_steps_per_second": 35.239,
"step": 21198
},
{
"epoch": 4.0,
"grad_norm": 0.9151579141616821,
"learning_rate": 1.733342768185678e-05,
"loss": 0.0146,
"step": 28264
},
{
"epoch": 4.0,
"eval_accuracy": 0.9836390322518754,
"eval_f1": 0.9253655132503873,
"eval_loss": 0.08953021466732025,
"eval_precision": 0.9188440087882372,
"eval_recall": 0.931980252331322,
"eval_runtime": 33.5139,
"eval_samples_per_second": 843.142,
"eval_steps_per_second": 26.377,
"step": 28264
},
{
"epoch": 5.0,
"grad_norm": 0.006677389610558748,
"learning_rate": 1.6666761015190114e-05,
"loss": 0.0096,
"step": 35330
},
{
"epoch": 5.0,
"eval_accuracy": 0.983770936858464,
"eval_f1": 0.9272781274631878,
"eval_loss": 0.08903905749320984,
"eval_precision": 0.9269075296320826,
"eval_recall": 0.9276490217590053,
"eval_runtime": 24.8287,
"eval_samples_per_second": 1138.077,
"eval_steps_per_second": 35.604,
"step": 35330
},
{
"epoch": 6.0,
"grad_norm": 0.060630589723587036,
"learning_rate": 1.6000094348523446e-05,
"loss": 0.0069,
"step": 42396
},
{
"epoch": 6.0,
"eval_accuracy": 0.9842493908405441,
"eval_f1": 0.9304571065067249,
"eval_loss": 0.10069137066602707,
"eval_precision": 0.9249096657633243,
"eval_recall": 0.9360714938745658,
"eval_runtime": 24.4078,
"eval_samples_per_second": 1157.705,
"eval_steps_per_second": 36.218,
"step": 42396
},
{
"epoch": 7.0,
"grad_norm": 0.6270243525505066,
"learning_rate": 1.533342768185678e-05,
"loss": 0.0055,
"step": 49462
},
{
"epoch": 7.0,
"eval_accuracy": 0.9841822394044627,
"eval_f1": 0.9297173805559187,
"eval_loss": 0.1080186516046524,
"eval_precision": 0.924709180731881,
"eval_recall": 0.9347801243371732,
"eval_runtime": 25.0447,
"eval_samples_per_second": 1128.262,
"eval_steps_per_second": 35.297,
"step": 49462
},
{
"epoch": 8.0,
"grad_norm": 0.0023466802667826414,
"learning_rate": 1.4666761015190114e-05,
"loss": 0.0044,
"step": 56528
},
{
"epoch": 8.0,
"eval_accuracy": 0.9840215556109821,
"eval_f1": 0.9293982272654773,
"eval_loss": 0.13059474527835846,
"eval_precision": 0.9265480895915679,
"eval_recall": 0.9322659535564088,
"eval_runtime": 24.3355,
"eval_samples_per_second": 1161.141,
"eval_steps_per_second": 36.325,
"step": 56528
},
{
"epoch": 9.0,
"grad_norm": 0.28039243817329407,
"learning_rate": 1.4000094348523448e-05,
"loss": 0.0038,
"step": 63594
},
{
"epoch": 9.0,
"eval_accuracy": 0.983985581627367,
"eval_f1": 0.9288880061288766,
"eval_loss": 0.11633551865816116,
"eval_precision": 0.9225687907925736,
"eval_recall": 0.9352943865423295,
"eval_runtime": 25.2015,
"eval_samples_per_second": 1121.242,
"eval_steps_per_second": 35.077,
"step": 63594
},
{
"epoch": 10.0,
"grad_norm": 0.08014234900474548,
"learning_rate": 1.3333427681856781e-05,
"loss": 0.0035,
"step": 70660
},
{
"epoch": 10.0,
"eval_accuracy": 0.9841786420061012,
"eval_f1": 0.9307753433987968,
"eval_loss": 0.12983541190624237,
"eval_precision": 0.9246244756191078,
"eval_recall": 0.9370085938928506,
"eval_runtime": 24.5312,
"eval_samples_per_second": 1151.881,
"eval_steps_per_second": 36.036,
"step": 70660
},
{
"epoch": 11.0,
"grad_norm": 0.00041744596092030406,
"learning_rate": 1.2666761015190115e-05,
"loss": 0.0028,
"step": 77726
},
{
"epoch": 11.0,
"eval_accuracy": 0.9843285336044972,
"eval_f1": 0.9306707629288274,
"eval_loss": 0.14403043687343597,
"eval_precision": 0.9266535245734484,
"eval_recall": 0.9347229840921558,
"eval_runtime": 25.1796,
"eval_samples_per_second": 1122.216,
"eval_steps_per_second": 35.108,
"step": 77726
},
{
"epoch": 12.0,
"grad_norm": 0.00028503904468379915,
"learning_rate": 1.2000094348523445e-05,
"loss": 0.0025,
"step": 84792
},
{
"epoch": 12.0,
"eval_accuracy": 0.9844340572897681,
"eval_f1": 0.9317236158307861,
"eval_loss": 0.14104171097278595,
"eval_precision": 0.9281574978169406,
"eval_recall": 0.9353172426403364,
"eval_runtime": 24.4922,
"eval_samples_per_second": 1153.715,
"eval_steps_per_second": 36.093,
"step": 84792
},
{
"epoch": 13.0,
"grad_norm": 0.0035878911148756742,
"learning_rate": 1.1333427681856779e-05,
"loss": 0.0022,
"step": 91858
},
{
"epoch": 13.0,
"eval_accuracy": 0.9844544425471499,
"eval_f1": 0.9315798786508901,
"eval_loss": 0.13236555457115173,
"eval_precision": 0.9305759866808069,
"eval_recall": 0.9325859389285062,
"eval_runtime": 25.03,
"eval_samples_per_second": 1128.927,
"eval_steps_per_second": 35.318,
"step": 91858
},
{
"epoch": 14.0,
"grad_norm": 0.00016986434638965875,
"learning_rate": 1.0666761015190112e-05,
"loss": 0.0021,
"step": 98924
},
{
"epoch": 14.0,
"eval_accuracy": 0.9844880182651906,
"eval_f1": 0.9330711130112583,
"eval_loss": 0.13451294600963593,
"eval_precision": 0.9308583842312985,
"eval_recall": 0.9352943865423295,
"eval_runtime": 24.4183,
"eval_samples_per_second": 1157.205,
"eval_steps_per_second": 36.202,
"step": 98924
},
{
"epoch": 15.0,
"grad_norm": 0.0010187036823481321,
"learning_rate": 1.0000094348523446e-05,
"loss": 0.002,
"step": 105990
},
{
"epoch": 15.0,
"eval_accuracy": 0.9843788971815582,
"eval_f1": 0.9319383071528353,
"eval_loss": 0.13619445264339447,
"eval_precision": 0.929939349802574,
"eval_recall": 0.9339458767599196,
"eval_runtime": 25.0941,
"eval_samples_per_second": 1126.042,
"eval_steps_per_second": 35.227,
"step": 105990
},
{
"epoch": 16.0,
"grad_norm": 0.00028575636679306626,
"learning_rate": 9.333427681856779e-06,
"loss": 0.0017,
"step": 113056
},
{
"epoch": 16.0,
"eval_accuracy": 0.9843669058536866,
"eval_f1": 0.9323486068748073,
"eval_loss": 0.15387865900993347,
"eval_precision": 0.9319972593353888,
"eval_recall": 0.9327002194185409,
"eval_runtime": 24.3506,
"eval_samples_per_second": 1160.422,
"eval_steps_per_second": 36.303,
"step": 113056
},
{
"epoch": 17.0,
"grad_norm": 0.00035141929402016103,
"learning_rate": 8.666761015190113e-06,
"loss": 0.0016,
"step": 120122
},
{
"epoch": 17.0,
"eval_accuracy": 0.9846175246062048,
"eval_f1": 0.9333576463482042,
"eval_loss": 0.14764520525932312,
"eval_precision": 0.9308063874524066,
"eval_recall": 0.9359229292375205,
"eval_runtime": 25.0122,
"eval_samples_per_second": 1129.728,
"eval_steps_per_second": 35.343,
"step": 120122
},
{
"epoch": 18.0,
"grad_norm": 0.00019602595421019942,
"learning_rate": 8.000094348523446e-06,
"loss": 0.0014,
"step": 127188
},
{
"epoch": 18.0,
"eval_accuracy": 0.9846570959881813,
"eval_f1": 0.9343024019274798,
"eval_loss": 0.16399884223937988,
"eval_precision": 0.9335402238524992,
"eval_recall": 0.93506582556226,
"eval_runtime": 24.5918,
"eval_samples_per_second": 1149.04,
"eval_steps_per_second": 35.947,
"step": 127188
},
{
"epoch": 19.0,
"grad_norm": 0.00016485151718370616,
"learning_rate": 7.33342768185678e-06,
"loss": 0.0013,
"step": 134254
},
{
"epoch": 19.0,
"eval_accuracy": 0.984291360488095,
"eval_f1": 0.9317829016575214,
"eval_loss": 0.16133514046669006,
"eval_precision": 0.9272642908070302,
"eval_recall": 0.9363457670506491,
"eval_runtime": 36.0736,
"eval_samples_per_second": 783.315,
"eval_steps_per_second": 24.505,
"step": 134254
},
{
"epoch": 20.0,
"grad_norm": 3.631131039583124e-05,
"learning_rate": 6.666761015190113e-06,
"loss": 0.0011,
"step": 141320
},
{
"epoch": 20.0,
"eval_accuracy": 0.9844688321405959,
"eval_f1": 0.933409899819197,
"eval_loss": 0.15595203638076782,
"eval_precision": 0.9287944963678744,
"eval_recall": 0.9380714024501737,
"eval_runtime": 24.4307,
"eval_samples_per_second": 1156.619,
"eval_steps_per_second": 36.184,
"step": 141320
},
{
"epoch": 21.0,
"grad_norm": 0.00046227360144257545,
"learning_rate": 6.000094348523447e-06,
"loss": 0.001,
"step": 148386
},
{
"epoch": 21.0,
"eval_accuracy": 0.9848237754455977,
"eval_f1": 0.9352061312288099,
"eval_loss": 0.15601831674575806,
"eval_precision": 0.9326181086702049,
"eval_recall": 0.9378085573230938,
"eval_runtime": 25.2528,
"eval_samples_per_second": 1118.963,
"eval_steps_per_second": 35.006,
"step": 148386
},
{
"epoch": 22.0,
"grad_norm": 1.7225727333425311e-06,
"learning_rate": 5.33342768185678e-06,
"loss": 0.0009,
"step": 155452
},
{
"epoch": 22.0,
"eval_accuracy": 0.9847434335488575,
"eval_f1": 0.9346046246074793,
"eval_loss": 0.17021338641643524,
"eval_precision": 0.9338475229902106,
"eval_recall": 0.9353629548363503,
"eval_runtime": 27.4692,
"eval_samples_per_second": 1028.681,
"eval_steps_per_second": 32.182,
"step": 155452
},
{
"epoch": 23.0,
"grad_norm": 0.00017540222324896604,
"learning_rate": 4.666761015190113e-06,
"loss": 0.0009,
"step": 162518
},
{
"epoch": 23.0,
"eval_accuracy": 0.9847638188062393,
"eval_f1": 0.9339199036024054,
"eval_loss": 0.16753220558166504,
"eval_precision": 0.9290092046045638,
"eval_recall": 0.9388827939294204,
"eval_runtime": 25.2471,
"eval_samples_per_second": 1119.219,
"eval_steps_per_second": 35.014,
"step": 162518
},
{
"epoch": 24.0,
"grad_norm": 1.526959204056766e-05,
"learning_rate": 4.000094348523446e-06,
"loss": 0.0008,
"step": 169584
},
{
"epoch": 24.0,
"eval_accuracy": 0.9849400913259531,
"eval_f1": 0.9357306547788711,
"eval_loss": 0.17812186479568481,
"eval_precision": 0.9335486953158769,
"eval_recall": 0.9379228378131286,
"eval_runtime": 24.4734,
"eval_samples_per_second": 1154.601,
"eval_steps_per_second": 36.121,
"step": 169584
},
{
"epoch": 25.0,
"grad_norm": 8.847219760355074e-06,
"learning_rate": 3.333427681856779e-06,
"loss": 0.0007,
"step": 176650
},
{
"epoch": 25.0,
"eval_accuracy": 0.9848741390226589,
"eval_f1": 0.935050123741204,
"eval_loss": 0.17935001850128174,
"eval_precision": 0.9331451594545993,
"eval_recall": 0.9369628816968367,
"eval_runtime": 25.1905,
"eval_samples_per_second": 1121.735,
"eval_steps_per_second": 35.093,
"step": 176650
},
{
"epoch": 26.0,
"grad_norm": 0.00021123145415913314,
"learning_rate": 2.6667610151901125e-06,
"loss": 0.0007,
"step": 183716
},
{
"epoch": 26.0,
"eval_accuracy": 0.9848621476947871,
"eval_f1": 0.9346344803971922,
"eval_loss": 0.17747123539447784,
"eval_precision": 0.9334518842760413,
"eval_recall": 0.9358200767964893,
"eval_runtime": 24.5312,
"eval_samples_per_second": 1151.881,
"eval_steps_per_second": 36.036,
"step": 183716
},
{
"epoch": 27.0,
"grad_norm": 2.859743472072296e-05,
"learning_rate": 2.000094348523446e-06,
"loss": 0.0007,
"step": 190782
},
{
"epoch": 27.0,
"eval_accuracy": 0.9849700696456323,
"eval_f1": 0.9355334631546582,
"eval_loss": 0.1866326779127121,
"eval_precision": 0.9322530128685231,
"eval_recall": 0.9388370817334065,
"eval_runtime": 25.0727,
"eval_samples_per_second": 1127.002,
"eval_steps_per_second": 35.257,
"step": 190782
},
{
"epoch": 28.0,
"grad_norm": 0.0020877772476524115,
"learning_rate": 1.333427681856779e-06,
"loss": 0.0007,
"step": 197848
},
{
"epoch": 28.0,
"eval_accuracy": 0.984804589321003,
"eval_f1": 0.9346563738215604,
"eval_loss": 0.1925719529390335,
"eval_precision": 0.9312210726926219,
"eval_recall": 0.9381171146461876,
"eval_runtime": 24.3867,
"eval_samples_per_second": 1158.706,
"eval_steps_per_second": 36.249,
"step": 197848
},
{
"epoch": 29.0,
"grad_norm": 1.088813405658584e-06,
"learning_rate": 6.667610151901123e-07,
"loss": 0.0006,
"step": 204914
},
{
"epoch": 29.0,
"eval_accuracy": 0.9849496843882504,
"eval_f1": 0.9355303716618523,
"eval_loss": 0.198269322514534,
"eval_precision": 0.9328674507130277,
"eval_recall": 0.9382085390382154,
"eval_runtime": 25.149,
"eval_samples_per_second": 1123.581,
"eval_steps_per_second": 35.15,
"step": 204914
},
{
"epoch": 30.0,
"grad_norm": 5.2584637160180137e-05,
"learning_rate": 9.434852344560809e-11,
"loss": 0.0006,
"step": 211980
},
{
"epoch": 30.0,
"eval_accuracy": 0.9849424895915274,
"eval_f1": 0.9356125356125355,
"eval_loss": 0.20442676544189453,
"eval_precision": 0.9329969544070185,
"eval_recall": 0.9382428231852258,
"eval_runtime": 24.5229,
"eval_samples_per_second": 1152.27,
"eval_steps_per_second": 36.048,
"step": 211980
}
],
"logging_steps": 500,
"max_steps": 211980,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.3987517048244486e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}