arabert-v2-ner-lora / trainer_state.json
Diaa-Essam's picture
Upload 13 files
9246c3a verified
{
"best_global_step": 13430,
"best_metric": 0.8506006776876477,
"best_model_checkpoint": "./aner_lora_model/checkpoint-13430",
"epoch": 34.0,
"eval_steps": 500,
"global_step": 13430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.1466563940048218,
"learning_rate": 9.450337512054003e-06,
"loss": 1.6728,
"step": 395
},
{
"epoch": 1.0,
"eval_f1": 0.07284681130834977,
"eval_loss": 0.8734206557273865,
"eval_precision": 0.09899928520371694,
"eval_recall": 0.05762429789889744,
"eval_runtime": 6.1863,
"eval_samples_per_second": 415.757,
"eval_steps_per_second": 6.628,
"step": 395
},
{
"epoch": 2.0,
"grad_norm": 1.8038758039474487,
"learning_rate": 1.8948891031822567e-05,
"loss": 0.5426,
"step": 790
},
{
"epoch": 2.0,
"eval_f1": 0.6452263332997278,
"eval_loss": 0.3722752630710602,
"eval_precision": 0.6259780907668232,
"eval_recall": 0.6656958602038694,
"eval_runtime": 6.2125,
"eval_samples_per_second": 414.003,
"eval_steps_per_second": 6.6,
"step": 790
},
{
"epoch": 3.0,
"grad_norm": 2.234807014465332,
"learning_rate": 2.844744455159113e-05,
"loss": 0.3384,
"step": 1185
},
{
"epoch": 3.0,
"eval_f1": 0.7009996970614966,
"eval_loss": 0.2873896658420563,
"eval_precision": 0.6811224489795918,
"eval_recall": 0.7220719783648846,
"eval_runtime": 6.2219,
"eval_samples_per_second": 413.378,
"eval_steps_per_second": 6.59,
"step": 1185
},
{
"epoch": 4.0,
"grad_norm": 2.2888472080230713,
"learning_rate": 3.7970106075216974e-05,
"loss": 0.2783,
"step": 1580
},
{
"epoch": 4.0,
"eval_f1": 0.7592421915002561,
"eval_loss": 0.27101996541023254,
"eval_precision": 0.7476805163372328,
"eval_recall": 0.7711670480549199,
"eval_runtime": 6.2368,
"eval_samples_per_second": 412.391,
"eval_steps_per_second": 6.574,
"step": 1580
},
{
"epoch": 5.0,
"grad_norm": 2.091982841491699,
"learning_rate": 4.749276759884282e-05,
"loss": 0.2408,
"step": 1975
},
{
"epoch": 5.0,
"eval_f1": 0.7708728179551123,
"eval_loss": 0.2352629005908966,
"eval_precision": 0.7405136067458796,
"eval_recall": 0.8038277511961722,
"eval_runtime": 6.2249,
"eval_samples_per_second": 413.181,
"eval_steps_per_second": 6.586,
"step": 1975
},
{
"epoch": 6.0,
"grad_norm": 3.6555726528167725,
"learning_rate": 5.699132111861139e-05,
"loss": 0.2138,
"step": 2370
},
{
"epoch": 6.0,
"eval_f1": 0.7946803069053708,
"eval_loss": 0.23413550853729248,
"eval_precision": 0.7818035426731079,
"eval_recall": 0.8079883503224464,
"eval_runtime": 6.2199,
"eval_samples_per_second": 413.509,
"eval_steps_per_second": 6.592,
"step": 2370
},
{
"epoch": 7.0,
"grad_norm": 1.3194808959960938,
"learning_rate": 6.648987463837994e-05,
"loss": 0.1997,
"step": 2765
},
{
"epoch": 7.0,
"eval_f1": 0.7968973259848949,
"eval_loss": 0.217984139919281,
"eval_precision": 0.7822079743538369,
"eval_recall": 0.8121489494487206,
"eval_runtime": 6.2321,
"eval_samples_per_second": 412.701,
"eval_steps_per_second": 6.579,
"step": 2765
},
{
"epoch": 8.0,
"grad_norm": 3.00563907623291,
"learning_rate": 7.601253616200579e-05,
"loss": 0.1796,
"step": 3160
},
{
"epoch": 8.0,
"eval_f1": 0.8006453564586065,
"eval_loss": 0.21363000571727753,
"eval_precision": 0.776908023483366,
"eval_recall": 0.8258789265654254,
"eval_runtime": 6.2148,
"eval_samples_per_second": 413.849,
"eval_steps_per_second": 6.597,
"step": 3160
},
{
"epoch": 9.0,
"grad_norm": 1.7021512985229492,
"learning_rate": 8.553519768563163e-05,
"loss": 0.1628,
"step": 3555
},
{
"epoch": 9.0,
"eval_f1": 0.8144906889182865,
"eval_loss": 0.21552523970603943,
"eval_precision": 0.7972111553784861,
"eval_recall": 0.8325358851674641,
"eval_runtime": 6.2332,
"eval_samples_per_second": 412.63,
"eval_steps_per_second": 6.578,
"step": 3555
},
{
"epoch": 10.0,
"grad_norm": 1.311308741569519,
"learning_rate": 9.505785920925749e-05,
"loss": 0.1487,
"step": 3950
},
{
"epoch": 10.0,
"eval_f1": 0.8213381921247089,
"eval_loss": 0.2061518430709839,
"eval_precision": 0.7998817034700315,
"eval_recall": 0.8439775327647181,
"eval_runtime": 6.2201,
"eval_samples_per_second": 413.497,
"eval_steps_per_second": 6.592,
"step": 3950
},
{
"epoch": 11.0,
"grad_norm": 0.8549454808235168,
"learning_rate": 9.998387447114843e-05,
"loss": 0.1373,
"step": 4345
},
{
"epoch": 11.0,
"eval_f1": 0.8243118796684744,
"eval_loss": 0.22040106356143951,
"eval_precision": 0.8111155859846959,
"eval_recall": 0.8379446640316206,
"eval_runtime": 6.2308,
"eval_samples_per_second": 412.791,
"eval_steps_per_second": 6.58,
"step": 4345
},
{
"epoch": 12.0,
"grad_norm": 1.147991418838501,
"learning_rate": 9.984720099749273e-05,
"loss": 0.1214,
"step": 4740
},
{
"epoch": 12.0,
"eval_f1": 0.833384789544098,
"eval_loss": 0.21504363417625427,
"eval_precision": 0.824643584521385,
"eval_recall": 0.8423132931142084,
"eval_runtime": 6.2273,
"eval_samples_per_second": 413.023,
"eval_steps_per_second": 6.584,
"step": 4740
},
{
"epoch": 13.0,
"grad_norm": 0.7487550973892212,
"learning_rate": 9.957158852398329e-05,
"loss": 0.1091,
"step": 5135
},
{
"epoch": 13.0,
"eval_f1": 0.83111337061515,
"eval_loss": 0.21468985080718994,
"eval_precision": 0.8128480509148767,
"eval_recall": 0.8502184314541293,
"eval_runtime": 6.2234,
"eval_samples_per_second": 413.28,
"eval_steps_per_second": 6.588,
"step": 5135
},
{
"epoch": 14.0,
"grad_norm": 1.407198429107666,
"learning_rate": 9.915780526469535e-05,
"loss": 0.0979,
"step": 5530
},
{
"epoch": 14.0,
"eval_f1": 0.838515129458251,
"eval_loss": 0.21988336741924286,
"eval_precision": 0.8382536382536383,
"eval_recall": 0.8387767838568754,
"eval_runtime": 6.2301,
"eval_samples_per_second": 412.832,
"eval_steps_per_second": 6.581,
"step": 5530
},
{
"epoch": 15.0,
"grad_norm": 0.48871779441833496,
"learning_rate": 9.86070045568485e-05,
"loss": 0.0873,
"step": 5925
},
{
"epoch": 15.0,
"eval_f1": 0.8290302907094939,
"eval_loss": 0.22614166140556335,
"eval_precision": 0.8105744384814152,
"eval_recall": 0.848346161847306,
"eval_runtime": 6.2224,
"eval_samples_per_second": 413.346,
"eval_steps_per_second": 6.589,
"step": 5925
},
{
"epoch": 16.0,
"grad_norm": 2.422437906265259,
"learning_rate": 9.792072164611214e-05,
"loss": 0.0784,
"step": 6320
},
{
"epoch": 16.0,
"eval_f1": 0.8394858611825193,
"eval_loss": 0.2310497760772705,
"eval_precision": 0.8300122000813339,
"eval_recall": 0.8491782816725608,
"eval_runtime": 6.2296,
"eval_samples_per_second": 412.868,
"eval_steps_per_second": 6.581,
"step": 6320
},
{
"epoch": 17.0,
"grad_norm": 1.0132837295532227,
"learning_rate": 9.710086940741867e-05,
"loss": 0.0699,
"step": 6715
},
{
"epoch": 17.0,
"eval_f1": 0.8350071736011477,
"eval_loss": 0.24940118193626404,
"eval_precision": 0.8228640678650777,
"eval_recall": 0.8475140420220512,
"eval_runtime": 6.2441,
"eval_samples_per_second": 411.911,
"eval_steps_per_second": 6.566,
"step": 6715
},
{
"epoch": 18.0,
"grad_norm": 1.5865906476974487,
"learning_rate": 9.61523045626935e-05,
"loss": 0.0615,
"step": 7110
},
{
"epoch": 18.0,
"eval_f1": 0.8329538397847237,
"eval_loss": 0.2699420750141144,
"eval_precision": 0.8288362512873326,
"eval_recall": 0.8371125442063657,
"eval_runtime": 6.2246,
"eval_samples_per_second": 413.199,
"eval_steps_per_second": 6.587,
"step": 7110
},
{
"epoch": 19.0,
"grad_norm": 0.9923080205917358,
"learning_rate": 9.507285711634415e-05,
"loss": 0.0576,
"step": 7505
},
{
"epoch": 19.0,
"eval_f1": 0.82642089093702,
"eval_loss": 0.2682414948940277,
"eval_precision": 0.8138362242839855,
"eval_recall": 0.8394008737258165,
"eval_runtime": 6.2361,
"eval_samples_per_second": 412.439,
"eval_steps_per_second": 6.575,
"step": 7505
},
{
"epoch": 20.0,
"grad_norm": 1.0131040811538696,
"learning_rate": 9.38677781889291e-05,
"loss": 0.0497,
"step": 7900
},
{
"epoch": 20.0,
"eval_f1": 0.8408021280949458,
"eval_loss": 0.2817462980747223,
"eval_precision": 0.8272599154419167,
"eval_recall": 0.854795090493031,
"eval_runtime": 6.2397,
"eval_samples_per_second": 412.202,
"eval_steps_per_second": 6.571,
"step": 7900
},
{
"epoch": 21.0,
"grad_norm": 1.129745364189148,
"learning_rate": 9.254042669444088e-05,
"loss": 0.0463,
"step": 8295
},
{
"epoch": 21.0,
"eval_f1": 0.8426954625687884,
"eval_loss": 0.2874164581298828,
"eval_precision": 0.8412106135986733,
"eval_recall": 0.8441855627210318,
"eval_runtime": 6.2285,
"eval_samples_per_second": 412.943,
"eval_steps_per_second": 6.583,
"step": 8295
},
{
"epoch": 22.0,
"grad_norm": 1.3233388662338257,
"learning_rate": 9.10945023569444e-05,
"loss": 0.0412,
"step": 8690
},
{
"epoch": 22.0,
"eval_f1": 0.8439258178787248,
"eval_loss": 0.29583561420440674,
"eval_precision": 0.8453350031308704,
"eval_recall": 0.8425213230705222,
"eval_runtime": 6.2232,
"eval_samples_per_second": 413.292,
"eval_steps_per_second": 6.588,
"step": 8690
},
{
"epoch": 23.0,
"grad_norm": 0.6825762987136841,
"learning_rate": 8.953403539834375e-05,
"loss": 0.0388,
"step": 9085
},
{
"epoch": 23.0,
"eval_f1": 0.8467850836189883,
"eval_loss": 0.29167571663856506,
"eval_precision": 0.845643153526971,
"eval_recall": 0.8479301019346785,
"eval_runtime": 6.2302,
"eval_samples_per_second": 412.826,
"eval_steps_per_second": 6.581,
"step": 9085
},
{
"epoch": 24.0,
"grad_norm": 0.829084038734436,
"learning_rate": 8.786337530495295e-05,
"loss": 0.0343,
"step": 9480
},
{
"epoch": 24.0,
"eval_f1": 0.8326785534401128,
"eval_loss": 0.29245325922966003,
"eval_precision": 0.8072265625,
"eval_recall": 0.85978780944456,
"eval_runtime": 6.2492,
"eval_samples_per_second": 411.57,
"eval_steps_per_second": 6.561,
"step": 9480
},
{
"epoch": 25.0,
"grad_norm": 1.2089000940322876,
"learning_rate": 8.609180455499081e-05,
"loss": 0.0319,
"step": 9875
},
{
"epoch": 25.0,
"eval_f1": 0.8404788703571062,
"eval_loss": 0.3039838969707489,
"eval_precision": 0.8270237615787354,
"eval_recall": 0.8543790305804035,
"eval_runtime": 6.2496,
"eval_samples_per_second": 411.548,
"eval_steps_per_second": 6.56,
"step": 9875
},
{
"epoch": 26.0,
"grad_norm": 0.7031717300415039,
"learning_rate": 8.421527039366616e-05,
"loss": 0.0297,
"step": 10270
},
{
"epoch": 26.0,
"eval_f1": 0.8409658617818484,
"eval_loss": 0.3323589265346527,
"eval_precision": 0.8414913559675068,
"eval_recall": 0.840441023507385,
"eval_runtime": 6.2285,
"eval_samples_per_second": 412.939,
"eval_steps_per_second": 6.583,
"step": 10270
},
{
"epoch": 27.0,
"grad_norm": 1.0431761741638184,
"learning_rate": 8.224336808020677e-05,
"loss": 0.0254,
"step": 10665
},
{
"epoch": 27.0,
"eval_f1": 0.842159067675322,
"eval_loss": 0.31897974014282227,
"eval_precision": 0.8279396984924623,
"eval_recall": 0.8568753900561681,
"eval_runtime": 6.2339,
"eval_samples_per_second": 412.583,
"eval_steps_per_second": 6.577,
"step": 10665
},
{
"epoch": 28.0,
"grad_norm": 0.3485606014728546,
"learning_rate": 8.018159389383709e-05,
"loss": 0.0237,
"step": 11060
},
{
"epoch": 28.0,
"eval_f1": 0.8385502471169687,
"eval_loss": 0.3295208811759949,
"eval_precision": 0.8301732925586136,
"eval_recall": 0.8470979821094238,
"eval_runtime": 6.2249,
"eval_samples_per_second": 413.179,
"eval_steps_per_second": 6.586,
"step": 11060
},
{
"epoch": 29.0,
"grad_norm": 2.1144888401031494,
"learning_rate": 7.80356946134664e-05,
"loss": 0.0217,
"step": 11455
},
{
"epoch": 29.0,
"eval_f1": 0.8437499999999999,
"eval_loss": 0.32363420724868774,
"eval_precision": 0.833976833976834,
"eval_recall": 0.8537549407114624,
"eval_runtime": 6.2372,
"eval_samples_per_second": 412.362,
"eval_steps_per_second": 6.573,
"step": 11455
},
{
"epoch": 30.0,
"grad_norm": 0.29809829592704773,
"learning_rate": 7.581165149970385e-05,
"loss": 0.0204,
"step": 11850
},
{
"epoch": 30.0,
"eval_f1": 0.8443138900545532,
"eval_loss": 0.35288870334625244,
"eval_precision": 0.8516402116402116,
"eval_recall": 0.8371125442063657,
"eval_runtime": 6.2428,
"eval_samples_per_second": 411.996,
"eval_steps_per_second": 6.568,
"step": 11850
},
{
"epoch": 31.0,
"grad_norm": 0.03988971561193466,
"learning_rate": 7.351566362330311e-05,
"loss": 0.0187,
"step": 12245
},
{
"epoch": 31.0,
"eval_f1": 0.8424198701432547,
"eval_loss": 0.3403635323047638,
"eval_precision": 0.8347630718954249,
"eval_recall": 0.8502184314541293,
"eval_runtime": 6.1938,
"eval_samples_per_second": 415.254,
"eval_steps_per_second": 6.62,
"step": 12245
},
{
"epoch": 32.0,
"grad_norm": 0.40415504574775696,
"learning_rate": 7.115413058650544e-05,
"loss": 0.0168,
"step": 12640
},
{
"epoch": 32.0,
"eval_f1": 0.8455690929917734,
"eval_loss": 0.34554678201675415,
"eval_precision": 0.8465387823185988,
"eval_recall": 0.8446016226336592,
"eval_runtime": 6.1803,
"eval_samples_per_second": 416.159,
"eval_steps_per_second": 6.634,
"step": 12640
},
{
"epoch": 33.0,
"grad_norm": 1.3843847513198853,
"learning_rate": 6.873363468544161e-05,
"loss": 0.0154,
"step": 13035
},
{
"epoch": 33.0,
"eval_f1": 0.8446492575524833,
"eval_loss": 0.34318989515304565,
"eval_precision": 0.8317870108914885,
"eval_recall": 0.8579155398377366,
"eval_runtime": 6.1485,
"eval_samples_per_second": 418.312,
"eval_steps_per_second": 6.668,
"step": 13035
},
{
"epoch": 34.0,
"grad_norm": 0.4158535897731781,
"learning_rate": 6.626092256331148e-05,
"loss": 0.0141,
"step": 13430
},
{
"epoch": 34.0,
"eval_f1": 0.8506006776876477,
"eval_loss": 0.353132963180542,
"eval_precision": 0.8398215733982157,
"eval_recall": 0.8616600790513834,
"eval_runtime": 6.1312,
"eval_samples_per_second": 419.497,
"eval_steps_per_second": 6.687,
"step": 13430
}
],
"logging_steps": 50,
"max_steps": 27650,
"num_input_tokens_seen": 0,
"num_train_epochs": 70,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.5841542093969132e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}