phobert-absa-smartphone / trainer_state.json
AnnyNguyen's picture
Upload trainer_state.json with huggingface_hub
7eb58ef verified
{
"best_global_step": 26785,
"best_metric": 0.5293099134739596,
"best_model_checkpoint": "outputs/smartphone/vinai_phobert-base/checkpoint-26785",
"epoch": 55.0,
"eval_steps": 500,
"global_step": 26785,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.214017629623413,
"learning_rate": 2.9992628768482664e-05,
"loss": 0.3598,
"step": 487
},
{
"epoch": 1.0,
"eval_f1_aspect": 0.6824618736383442,
"eval_f1_macro_aspect": 0.23965991762577465,
"eval_f1_sentiment": 0.23965991762577465,
"eval_loss": 0.2352765053510666,
"eval_runtime": 5.2822,
"eval_samples_per_second": 421.033,
"eval_steps_per_second": 13.252,
"step": 487
},
{
"epoch": 2.0,
"grad_norm": 1.3990741968154907,
"learning_rate": 2.9970461653663515e-05,
"loss": 0.2082,
"step": 974
},
{
"epoch": 2.0,
"eval_f1_aspect": 0.771007371007371,
"eval_f1_macro_aspect": 0.36928730826344125,
"eval_f1_sentiment": 0.36928730826344125,
"eval_loss": 0.18111465871334076,
"eval_runtime": 5.2643,
"eval_samples_per_second": 422.471,
"eval_steps_per_second": 13.297,
"step": 974
},
{
"epoch": 3.0,
"grad_norm": 1.6626859903335571,
"learning_rate": 2.9933520500597934e-05,
"loss": 0.1604,
"step": 1461
},
{
"epoch": 3.0,
"eval_f1_aspect": 0.8001937515136837,
"eval_f1_macro_aspect": 0.39273139407455954,
"eval_f1_sentiment": 0.39273139407455954,
"eval_loss": 0.16435211896896362,
"eval_runtime": 5.2672,
"eval_samples_per_second": 422.237,
"eval_steps_per_second": 13.29,
"step": 1461
},
{
"epoch": 4.0,
"grad_norm": 2.1720831394195557,
"learning_rate": 2.9881841765744044e-05,
"loss": 0.1325,
"step": 1948
},
{
"epoch": 4.0,
"eval_f1_aspect": 0.791747572815534,
"eval_f1_macro_aspect": 0.39735406210910534,
"eval_f1_sentiment": 0.39735406210910534,
"eval_loss": 0.16188032925128937,
"eval_runtime": 5.3277,
"eval_samples_per_second": 417.439,
"eval_steps_per_second": 13.139,
"step": 1948
},
{
"epoch": 5.0,
"grad_norm": 3.8878087997436523,
"learning_rate": 2.9815476449773896e-05,
"loss": 0.1102,
"step": 2435
},
{
"epoch": 5.0,
"eval_f1_aspect": 0.8246024321796072,
"eval_f1_macro_aspect": 0.46887437550765554,
"eval_f1_sentiment": 0.46887437550765554,
"eval_loss": 0.15876558423042297,
"eval_runtime": 5.3286,
"eval_samples_per_second": 417.369,
"eval_steps_per_second": 13.137,
"step": 2435
},
{
"epoch": 6.0,
"grad_norm": 1.500092625617981,
"learning_rate": 2.973449004724197e-05,
"loss": 0.0919,
"step": 2922
},
{
"epoch": 6.0,
"eval_f1_aspect": 0.8168293816829382,
"eval_f1_macro_aspect": 0.47478350769030153,
"eval_f1_sentiment": 0.47478350769030153,
"eval_loss": 0.16465845704078674,
"eval_runtime": 5.3861,
"eval_samples_per_second": 412.914,
"eval_steps_per_second": 12.996,
"step": 2922
},
{
"epoch": 7.0,
"grad_norm": 1.246404767036438,
"learning_rate": 2.9638962481949954e-05,
"loss": 0.078,
"step": 3409
},
{
"epoch": 7.0,
"eval_f1_aspect": 0.8095238095238095,
"eval_f1_macro_aspect": 0.4796789474797879,
"eval_f1_sentiment": 0.4796789474797879,
"eval_loss": 0.17780332267284393,
"eval_runtime": 5.2676,
"eval_samples_per_second": 422.206,
"eval_steps_per_second": 13.289,
"step": 3409
},
{
"epoch": 8.0,
"grad_norm": 0.4573521018028259,
"learning_rate": 2.9528988028071616e-05,
"loss": 0.0677,
"step": 3896
},
{
"epoch": 8.0,
"eval_f1_aspect": 0.8132741294695022,
"eval_f1_macro_aspect": 0.48194296253877855,
"eval_f1_sentiment": 0.48194296253877855,
"eval_loss": 0.17726708948612213,
"eval_runtime": 5.2769,
"eval_samples_per_second": 421.462,
"eval_steps_per_second": 13.265,
"step": 3896
},
{
"epoch": 9.0,
"grad_norm": 1.0054028034210205,
"learning_rate": 2.9404675217115552e-05,
"loss": 0.0582,
"step": 4383
},
{
"epoch": 9.0,
"eval_f1_aspect": 0.8148489739451233,
"eval_f1_macro_aspect": 0.49901186603018977,
"eval_f1_sentiment": 0.49901186603018977,
"eval_loss": 0.18432779610157013,
"eval_runtime": 5.2788,
"eval_samples_per_second": 421.307,
"eval_steps_per_second": 13.261,
"step": 4383
},
{
"epoch": 10.0,
"grad_norm": 0.597675621509552,
"learning_rate": 2.926614673081771e-05,
"loss": 0.05,
"step": 4870
},
{
"epoch": 10.0,
"eval_f1_aspect": 0.8054654932839277,
"eval_f1_macro_aspect": 0.48681724911620844,
"eval_f1_sentiment": 0.48681724911620844,
"eval_loss": 0.19903235137462616,
"eval_runtime": 5.3605,
"eval_samples_per_second": 414.887,
"eval_steps_per_second": 13.059,
"step": 4870
},
{
"epoch": 11.0,
"grad_norm": 1.099621057510376,
"learning_rate": 2.9113539280069318e-05,
"loss": 0.0443,
"step": 5357
},
{
"epoch": 11.0,
"eval_f1_aspect": 0.803812180381218,
"eval_f1_macro_aspect": 0.49064332461219184,
"eval_f1_sentiment": 0.49064332461219184,
"eval_loss": 0.20838035643100739,
"eval_runtime": 5.3056,
"eval_samples_per_second": 419.18,
"eval_steps_per_second": 13.194,
"step": 5357
},
{
"epoch": 12.0,
"grad_norm": 8.490036010742188,
"learning_rate": 2.8947003469999752e-05,
"loss": 0.0385,
"step": 5844
},
{
"epoch": 12.0,
"eval_f1_aspect": 0.8149005090236002,
"eval_f1_macro_aspect": 0.5031596962140534,
"eval_f1_sentiment": 0.5031596962140534,
"eval_loss": 0.2110637128353119,
"eval_runtime": 5.2792,
"eval_samples_per_second": 421.277,
"eval_steps_per_second": 13.26,
"step": 5844
},
{
"epoch": 13.0,
"grad_norm": 1.8095080852508545,
"learning_rate": 2.8766703651347423e-05,
"loss": 0.0327,
"step": 6331
},
{
"epoch": 13.0,
"eval_f1_aspect": 0.8119677790563866,
"eval_f1_macro_aspect": 0.49863733694553214,
"eval_f1_sentiment": 0.49863733694553214,
"eval_loss": 0.2265346497297287,
"eval_runtime": 5.381,
"eval_samples_per_second": 413.307,
"eval_steps_per_second": 13.009,
"step": 6331
},
{
"epoch": 14.0,
"grad_norm": 1.6875431537628174,
"learning_rate": 2.857281775826549e-05,
"loss": 0.0283,
"step": 6818
},
{
"epoch": 14.0,
"eval_f1_aspect": 0.8086857142857143,
"eval_f1_macro_aspect": 0.5133505127639592,
"eval_f1_sentiment": 0.5133505127639592,
"eval_loss": 0.23116181790828705,
"eval_runtime": 5.2712,
"eval_samples_per_second": 421.915,
"eval_steps_per_second": 13.28,
"step": 6818
},
{
"epoch": 15.0,
"grad_norm": 1.34958016872406,
"learning_rate": 2.8365537132722293e-05,
"loss": 0.0251,
"step": 7305
},
{
"epoch": 15.0,
"eval_f1_aspect": 0.8102658111824015,
"eval_f1_macro_aspect": 0.5025552457560816,
"eval_f1_sentiment": 0.5025552457560816,
"eval_loss": 0.23273742198944092,
"eval_runtime": 5.2784,
"eval_samples_per_second": 421.337,
"eval_steps_per_second": 13.262,
"step": 7305
},
{
"epoch": 16.0,
"grad_norm": 4.683711051940918,
"learning_rate": 2.8145066335669954e-05,
"loss": 0.0207,
"step": 7792
},
{
"epoch": 16.0,
"eval_f1_aspect": 0.8045182111572153,
"eval_f1_macro_aspect": 0.4980081283232479,
"eval_f1_sentiment": 0.4980081283232479,
"eval_loss": 0.2506747841835022,
"eval_runtime": 5.2898,
"eval_samples_per_second": 420.428,
"eval_steps_per_second": 13.233,
"step": 7792
},
{
"epoch": 17.0,
"grad_norm": 0.895351231098175,
"learning_rate": 2.7911622945167402e-05,
"loss": 0.0187,
"step": 8279
},
{
"epoch": 17.0,
"eval_f1_aspect": 0.8070496681162738,
"eval_f1_macro_aspect": 0.5174731474301049,
"eval_f1_sentiment": 0.5174731474301049,
"eval_loss": 0.24516579508781433,
"eval_runtime": 5.2809,
"eval_samples_per_second": 421.137,
"eval_steps_per_second": 13.255,
"step": 8279
},
{
"epoch": 18.0,
"grad_norm": 0.40506213903427124,
"learning_rate": 2.76654373416571e-05,
"loss": 0.0164,
"step": 8766
},
{
"epoch": 18.0,
"eval_f1_aspect": 0.80653324131585,
"eval_f1_macro_aspect": 0.4992792703618323,
"eval_f1_sentiment": 0.4992792703618323,
"eval_loss": 0.2601372003555298,
"eval_runtime": 5.2829,
"eval_samples_per_second": 420.982,
"eval_steps_per_second": 13.25,
"step": 8766
},
{
"epoch": 19.0,
"grad_norm": 0.15703117847442627,
"learning_rate": 2.740675248060736e-05,
"loss": 0.0141,
"step": 9253
},
{
"epoch": 19.0,
"eval_f1_aspect": 0.8097674418604651,
"eval_f1_macro_aspect": 0.49406251042693405,
"eval_f1_sentiment": 0.49406251042693405,
"eval_loss": 0.2688779830932617,
"eval_runtime": 5.2799,
"eval_samples_per_second": 421.222,
"eval_steps_per_second": 13.258,
"step": 9253
},
{
"epoch": 20.0,
"grad_norm": 0.614011287689209,
"learning_rate": 2.7135823652744645e-05,
"loss": 0.0135,
"step": 9740
},
{
"epoch": 20.0,
"eval_f1_aspect": 0.8078146297137665,
"eval_f1_macro_aspect": 0.5061512331950289,
"eval_f1_sentiment": 0.5061512331950289,
"eval_loss": 0.2663342356681824,
"eval_runtime": 5.2404,
"eval_samples_per_second": 424.392,
"eval_steps_per_second": 13.358,
"step": 9740
},
{
"epoch": 21.0,
"grad_norm": 0.04745159670710564,
"learning_rate": 2.6852918232112412e-05,
"loss": 0.0118,
"step": 10227
},
{
"epoch": 21.0,
"eval_f1_aspect": 0.8046875,
"eval_f1_macro_aspect": 0.4998370067335585,
"eval_f1_sentiment": 0.4998370067335585,
"eval_loss": 0.2750583589076996,
"eval_runtime": 5.2661,
"eval_samples_per_second": 422.324,
"eval_steps_per_second": 13.293,
"step": 10227
},
{
"epoch": 22.0,
"grad_norm": 0.6339995265007019,
"learning_rate": 2.6558315412205247e-05,
"loss": 0.0104,
"step": 10714
},
{
"epoch": 22.0,
"eval_f1_aspect": 0.8146297137664698,
"eval_f1_macro_aspect": 0.5046009870856859,
"eval_f1_sentiment": 0.5046009870856859,
"eval_loss": 0.2895522117614746,
"eval_runtime": 5.2588,
"eval_samples_per_second": 422.906,
"eval_steps_per_second": 13.311,
"step": 10714
},
{
"epoch": 23.0,
"grad_norm": 0.04146909713745117,
"learning_rate": 2.6252305930438563e-05,
"loss": 0.0092,
"step": 11201
},
{
"epoch": 23.0,
"eval_f1_aspect": 0.8131566925536775,
"eval_f1_macro_aspect": 0.5065319656270583,
"eval_f1_sentiment": 0.5065319656270583,
"eval_loss": 0.294137567281723,
"eval_runtime": 5.3335,
"eval_samples_per_second": 416.987,
"eval_steps_per_second": 13.125,
"step": 11201
},
{
"epoch": 24.0,
"grad_norm": 11.924123764038086,
"learning_rate": 2.5935191781225898e-05,
"loss": 0.0081,
"step": 11688
},
{
"epoch": 24.0,
"eval_f1_aspect": 0.8160550458715596,
"eval_f1_macro_aspect": 0.5096299311354855,
"eval_f1_sentiment": 0.5096299311354855,
"eval_loss": 0.2907048463821411,
"eval_runtime": 5.3212,
"eval_samples_per_second": 417.948,
"eval_steps_per_second": 13.155,
"step": 11688
},
{
"epoch": 25.0,
"grad_norm": 0.7775519490242004,
"learning_rate": 2.5607285917946864e-05,
"loss": 0.0083,
"step": 12175
},
{
"epoch": 25.0,
"eval_f1_aspect": 0.818926296633303,
"eval_f1_macro_aspect": 0.5084465129276063,
"eval_f1_sentiment": 0.5084465129276063,
"eval_loss": 0.30101829767227173,
"eval_runtime": 5.3775,
"eval_samples_per_second": 413.572,
"eval_steps_per_second": 13.017,
"step": 12175
},
{
"epoch": 26.0,
"grad_norm": 2.315323829650879,
"learning_rate": 2.5268911944099967e-05,
"loss": 0.008,
"step": 12662
},
{
"epoch": 26.0,
"eval_f1_aspect": 0.8159158472444547,
"eval_f1_macro_aspect": 0.5100191821282204,
"eval_f1_sentiment": 0.5100191821282204,
"eval_loss": 0.3063946068286896,
"eval_runtime": 5.3501,
"eval_samples_per_second": 415.691,
"eval_steps_per_second": 13.084,
"step": 12662
},
{
"epoch": 27.0,
"grad_norm": 1.4911683797836304,
"learning_rate": 2.492040379394501e-05,
"loss": 0.0066,
"step": 13149
},
{
"epoch": 27.0,
"eval_f1_aspect": 0.8137142857142857,
"eval_f1_macro_aspect": 0.5107237241401983,
"eval_f1_sentiment": 0.5107237241401983,
"eval_loss": 0.31042352318763733,
"eval_runtime": 5.2518,
"eval_samples_per_second": 423.477,
"eval_steps_per_second": 13.329,
"step": 13149
},
{
"epoch": 28.0,
"grad_norm": 0.9785826802253723,
"learning_rate": 2.4562105402950287e-05,
"loss": 0.0054,
"step": 13636
},
{
"epoch": 28.0,
"eval_f1_aspect": 0.8164295548416705,
"eval_f1_macro_aspect": 0.5113764177223036,
"eval_f1_sentiment": 0.5113764177223036,
"eval_loss": 0.30704399943351746,
"eval_runtime": 5.3102,
"eval_samples_per_second": 418.818,
"eval_steps_per_second": 13.182,
"step": 13636
},
{
"epoch": 29.0,
"grad_norm": 0.01176587212830782,
"learning_rate": 2.419437036836983e-05,
"loss": 0.0059,
"step": 14123
},
{
"epoch": 29.0,
"eval_f1_aspect": 0.810004589261129,
"eval_f1_macro_aspect": 0.51595438584814,
"eval_f1_sentiment": 0.51595438584814,
"eval_loss": 0.3131285309791565,
"eval_runtime": 5.3519,
"eval_samples_per_second": 415.555,
"eval_steps_per_second": 13.08,
"step": 14123
},
{
"epoch": 30.0,
"grad_norm": 0.22203879058361053,
"learning_rate": 2.3817561600285582e-05,
"loss": 0.0051,
"step": 14610
},
{
"epoch": 30.0,
"eval_f1_aspect": 0.8104515241806096,
"eval_f1_macro_aspect": 0.5174387744262563,
"eval_f1_sentiment": 0.5174387744262563,
"eval_loss": 0.32132473587989807,
"eval_runtime": 5.3173,
"eval_samples_per_second": 418.256,
"eval_steps_per_second": 13.165,
"step": 14610
},
{
"epoch": 31.0,
"grad_norm": 0.06852120161056519,
"learning_rate": 2.343205096345896e-05,
"loss": 0.0054,
"step": 15097
},
{
"epoch": 31.0,
"eval_f1_aspect": 0.8126286890871655,
"eval_f1_macro_aspect": 0.505160075382636,
"eval_f1_sentiment": 0.505160075382636,
"eval_loss": 0.31839850544929504,
"eval_runtime": 5.2676,
"eval_samples_per_second": 422.201,
"eval_steps_per_second": 13.289,
"step": 15097
},
{
"epoch": 32.0,
"grad_norm": 2.4210550785064697,
"learning_rate": 2.303821891034526e-05,
"loss": 0.0055,
"step": 15584
},
{
"epoch": 32.0,
"eval_f1_aspect": 0.8065116279069767,
"eval_f1_macro_aspect": 0.5054766945577382,
"eval_f1_sentiment": 0.5054766945577382,
"eval_loss": 0.32863593101501465,
"eval_runtime": 5.2714,
"eval_samples_per_second": 421.901,
"eval_steps_per_second": 13.279,
"step": 15584
},
{
"epoch": 33.0,
"grad_norm": 0.003676912747323513,
"learning_rate": 2.2636454105632974e-05,
"loss": 0.0047,
"step": 16071
},
{
"epoch": 33.0,
"eval_f1_aspect": 0.8185381600184459,
"eval_f1_macro_aspect": 0.5082259114157306,
"eval_f1_sentiment": 0.5082259114157306,
"eval_loss": 0.3176405429840088,
"eval_runtime": 5.2759,
"eval_samples_per_second": 421.542,
"eval_steps_per_second": 13.268,
"step": 16071
},
{
"epoch": 34.0,
"grad_norm": 0.3813173472881317,
"learning_rate": 2.2227153042678708e-05,
"loss": 0.0043,
"step": 16558
},
{
"epoch": 34.0,
"eval_f1_aspect": 0.822883295194508,
"eval_f1_macro_aspect": 0.5175972532342072,
"eval_f1_sentiment": 0.5175972532342072,
"eval_loss": 0.31699639558792114,
"eval_runtime": 5.2786,
"eval_samples_per_second": 421.322,
"eval_steps_per_second": 13.261,
"step": 16558
},
{
"epoch": 35.0,
"grad_norm": 0.017288848757743835,
"learning_rate": 2.1810719652216077e-05,
"loss": 0.0039,
"step": 17045
},
{
"epoch": 35.0,
"eval_f1_aspect": 0.821256038647343,
"eval_f1_macro_aspect": 0.5079532801084986,
"eval_f1_sentiment": 0.5079532801084986,
"eval_loss": 0.339198499917984,
"eval_runtime": 5.3292,
"eval_samples_per_second": 417.322,
"eval_steps_per_second": 13.135,
"step": 17045
},
{
"epoch": 36.0,
"grad_norm": 0.43333691358566284,
"learning_rate": 2.138756490372486e-05,
"loss": 0.0036,
"step": 17532
},
{
"epoch": 36.0,
"eval_f1_aspect": 0.8142365097588978,
"eval_f1_macro_aspect": 0.5104098398330861,
"eval_f1_sentiment": 0.5104098398330861,
"eval_loss": 0.33852654695510864,
"eval_runtime": 5.2774,
"eval_samples_per_second": 421.424,
"eval_steps_per_second": 13.264,
"step": 17532
},
{
"epoch": 37.0,
"grad_norm": 0.13463492691516876,
"learning_rate": 2.0958106399853726e-05,
"loss": 0.0035,
"step": 18019
},
{
"epoch": 37.0,
"eval_f1_aspect": 0.8179949760219228,
"eval_f1_macro_aspect": 0.5061536037175701,
"eval_f1_sentiment": 0.5061536037175701,
"eval_loss": 0.3427109718322754,
"eval_runtime": 5.2483,
"eval_samples_per_second": 423.758,
"eval_steps_per_second": 13.338,
"step": 18019
},
{
"epoch": 38.0,
"grad_norm": 0.14240214228630066,
"learning_rate": 2.0522767964296835e-05,
"loss": 0.0044,
"step": 18506
},
{
"epoch": 38.0,
"eval_f1_aspect": 0.813799405985835,
"eval_f1_macro_aspect": 0.4926077943615257,
"eval_f1_sentiment": 0.4926077943615257,
"eval_loss": 0.34948092699050903,
"eval_runtime": 5.2669,
"eval_samples_per_second": 422.261,
"eval_steps_per_second": 13.291,
"step": 18506
},
{
"epoch": 39.0,
"grad_norm": 0.3045540153980255,
"learning_rate": 2.0081979223531034e-05,
"loss": 0.005,
"step": 18993
},
{
"epoch": 39.0,
"eval_f1_aspect": 0.8197553657973691,
"eval_f1_macro_aspect": 0.5165866468068806,
"eval_f1_sentiment": 0.5165866468068806,
"eval_loss": 0.33417609333992004,
"eval_runtime": 5.334,
"eval_samples_per_second": 416.95,
"eval_steps_per_second": 13.123,
"step": 18993
},
{
"epoch": 40.0,
"grad_norm": 0.10395223647356033,
"learning_rate": 1.9636175182826402e-05,
"loss": 0.0026,
"step": 19480
},
{
"epoch": 40.0,
"eval_f1_aspect": 0.8171650308148825,
"eval_f1_macro_aspect": 0.5192047014309698,
"eval_f1_sentiment": 0.5192047014309698,
"eval_loss": 0.36051589250564575,
"eval_runtime": 5.3449,
"eval_samples_per_second": 416.099,
"eval_steps_per_second": 13.097,
"step": 19480
},
{
"epoch": 41.0,
"grad_norm": 0.47768187522888184,
"learning_rate": 1.918579579694853e-05,
"loss": 0.0031,
"step": 19967
},
{
"epoch": 41.0,
"eval_f1_aspect": 0.8147638697845025,
"eval_f1_macro_aspect": 0.5071432459726105,
"eval_f1_sentiment": 0.5071432459726105,
"eval_loss": 0.3499327600002289,
"eval_runtime": 5.2702,
"eval_samples_per_second": 421.995,
"eval_steps_per_second": 13.282,
"step": 19967
},
{
"epoch": 42.0,
"grad_norm": 3.665292978286743,
"learning_rate": 1.8731285535976327e-05,
"loss": 0.0024,
"step": 20454
},
{
"epoch": 42.0,
"eval_f1_aspect": 0.8167467398764585,
"eval_f1_macro_aspect": 0.5079256124525199,
"eval_f1_sentiment": 0.5079256124525199,
"eval_loss": 0.3565986454486847,
"eval_runtime": 5.2524,
"eval_samples_per_second": 423.429,
"eval_steps_per_second": 13.327,
"step": 20454
},
{
"epoch": 43.0,
"grad_norm": 0.041507333517074585,
"learning_rate": 1.8273092946663674e-05,
"loss": 0.0024,
"step": 20941
},
{
"epoch": 43.0,
"eval_f1_aspect": 0.8205010342450012,
"eval_f1_macro_aspect": 0.5057632659931312,
"eval_f1_sentiment": 0.5057632659931312,
"eval_loss": 0.353305459022522,
"eval_runtime": 5.268,
"eval_samples_per_second": 422.171,
"eval_steps_per_second": 13.288,
"step": 20941
},
{
"epoch": 44.0,
"grad_norm": 11.411001205444336,
"learning_rate": 1.7811670209777964e-05,
"loss": 0.0019,
"step": 21428
},
{
"epoch": 44.0,
"eval_f1_aspect": 0.8167010782289515,
"eval_f1_macro_aspect": 0.5160780166340071,
"eval_f1_sentiment": 0.5160780166340071,
"eval_loss": 0.3536563515663147,
"eval_runtime": 5.2634,
"eval_samples_per_second": 422.542,
"eval_steps_per_second": 13.299,
"step": 21428
},
{
"epoch": 45.0,
"grad_norm": 0.008572953753173351,
"learning_rate": 1.734747269385226e-05,
"loss": 0.0026,
"step": 21915
},
{
"epoch": 45.0,
"eval_f1_aspect": 0.8217799130633723,
"eval_f1_macro_aspect": 0.5217043640896729,
"eval_f1_sentiment": 0.5217043640896729,
"eval_loss": 0.35641050338745117,
"eval_runtime": 5.2869,
"eval_samples_per_second": 420.661,
"eval_steps_per_second": 13.24,
"step": 21915
},
{
"epoch": 46.0,
"grad_norm": 4.907774448394775,
"learning_rate": 1.6880958505791537e-05,
"loss": 0.0023,
"step": 22402
},
{
"epoch": 46.0,
"eval_f1_aspect": 0.8204426192105864,
"eval_f1_macro_aspect": 0.5082924274745652,
"eval_f1_sentiment": 0.5082924274745652,
"eval_loss": 0.36888423562049866,
"eval_runtime": 5.2716,
"eval_samples_per_second": 421.884,
"eval_steps_per_second": 13.279,
"step": 22402
},
{
"epoch": 47.0,
"grad_norm": 0.04665122181177139,
"learning_rate": 1.641258803877647e-05,
"loss": 0.0021,
"step": 22889
},
{
"epoch": 47.0,
"eval_f1_aspect": 0.8187053674268602,
"eval_f1_macro_aspect": 0.509624571236536,
"eval_f1_sentiment": 0.509624571236536,
"eval_loss": 0.36498305201530457,
"eval_runtime": 5.301,
"eval_samples_per_second": 419.544,
"eval_steps_per_second": 13.205,
"step": 22889
},
{
"epoch": 48.0,
"grad_norm": 4.29740571975708,
"learning_rate": 1.5942823517910956e-05,
"loss": 0.0026,
"step": 23376
},
{
"epoch": 48.0,
"eval_f1_aspect": 0.8177655677655677,
"eval_f1_macro_aspect": 0.5109795293385091,
"eval_f1_sentiment": 0.5109795293385091,
"eval_loss": 0.368932843208313,
"eval_runtime": 5.2831,
"eval_samples_per_second": 420.965,
"eval_steps_per_second": 13.25,
"step": 23376
},
{
"epoch": 49.0,
"grad_norm": 0.8426123857498169,
"learning_rate": 1.5472128544061724e-05,
"loss": 0.0019,
"step": 23863
},
{
"epoch": 49.0,
"eval_f1_aspect": 0.8196042337781868,
"eval_f1_macro_aspect": 0.5148117758549687,
"eval_f1_sentiment": 0.5148117758549687,
"eval_loss": 0.3668632209300995,
"eval_runtime": 5.331,
"eval_samples_per_second": 417.179,
"eval_steps_per_second": 13.131,
"step": 23863
},
{
"epoch": 50.0,
"grad_norm": 0.0009361854754388332,
"learning_rate": 1.5000967636340272e-05,
"loss": 0.0015,
"step": 24350
},
{
"epoch": 50.0,
"eval_f1_aspect": 0.8164571428571429,
"eval_f1_macro_aspect": 0.516046635549457,
"eval_f1_sentiment": 0.516046635549457,
"eval_loss": 0.3719753623008728,
"eval_runtime": 5.3067,
"eval_samples_per_second": 419.092,
"eval_steps_per_second": 13.191,
"step": 24350
},
{
"epoch": 51.0,
"grad_norm": 0.024893753230571747,
"learning_rate": 1.4529805773678577e-05,
"loss": 0.0013,
"step": 24837
},
{
"epoch": 51.0,
"eval_f1_aspect": 0.8113293741434445,
"eval_f1_macro_aspect": 0.5109673118759412,
"eval_f1_sentiment": 0.5109673118759412,
"eval_loss": 0.38190165162086487,
"eval_runtime": 5.2708,
"eval_samples_per_second": 421.949,
"eval_steps_per_second": 13.281,
"step": 24837
},
{
"epoch": 52.0,
"grad_norm": 0.0006926740752533078,
"learning_rate": 1.4059107935951025e-05,
"loss": 0.0016,
"step": 25324
},
{
"epoch": 52.0,
"eval_f1_aspect": 0.8156450137236962,
"eval_f1_macro_aspect": 0.517296046782514,
"eval_f1_sentiment": 0.517296046782514,
"eval_loss": 0.3835242986679077,
"eval_runtime": 5.3451,
"eval_samples_per_second": 416.079,
"eval_steps_per_second": 13.096,
"step": 25324
},
{
"epoch": 53.0,
"grad_norm": 0.08918596804141998,
"learning_rate": 1.3589338645095411e-05,
"loss": 0.0022,
"step": 25811
},
{
"epoch": 53.0,
"eval_f1_aspect": 0.8180987202925045,
"eval_f1_macro_aspect": 0.5149898328099092,
"eval_f1_sentiment": 0.5149898328099092,
"eval_loss": 0.3808538615703583,
"eval_runtime": 5.2576,
"eval_samples_per_second": 423.008,
"eval_steps_per_second": 13.314,
"step": 25811
},
{
"epoch": 54.0,
"grad_norm": 0.01354946754872799,
"learning_rate": 1.312096150668588e-05,
"loss": 0.0014,
"step": 26298
},
{
"epoch": 54.0,
"eval_f1_aspect": 0.8204540243063517,
"eval_f1_macro_aspect": 0.5117108970702628,
"eval_f1_sentiment": 0.5117108970702628,
"eval_loss": 0.3776213824748993,
"eval_runtime": 5.2933,
"eval_samples_per_second": 420.157,
"eval_steps_per_second": 13.224,
"step": 26298
},
{
"epoch": 55.0,
"grad_norm": 0.4617435038089752,
"learning_rate": 1.2654438752410183e-05,
"loss": 0.001,
"step": 26785
},
{
"epoch": 55.0,
"eval_f1_aspect": 0.8272706474490963,
"eval_f1_macro_aspect": 0.5293099134739596,
"eval_f1_sentiment": 0.5293099134739596,
"eval_loss": 0.37219125032424927,
"eval_runtime": 5.2642,
"eval_samples_per_second": 422.478,
"eval_steps_per_second": 13.297,
"step": 26785
}
],
"logging_steps": 487,
"max_steps": 48700,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.508401968847866e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}