LibMTL_term2 / checkpoint-24012 /trainer_state.json
Mandour-101's picture
Upload folder using huggingface_hub
f7f8e29 verified
{
"best_metric": 0.9661077074670175,
"best_model_checkpoint": "/kaggle/working/mmoe_vit_results/checkpoint-24012",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 24012,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12493753123438281,
"grad_norm": 2.205951452255249,
"learning_rate": 5e-05,
"loss": 0.7943,
"step": 500
},
{
"epoch": 0.24987506246876562,
"grad_norm": 2.1307880878448486,
"learning_rate": 4.9091371665334015e-05,
"loss": 0.474,
"step": 1000
},
{
"epoch": 0.3748125937031484,
"grad_norm": 1.7951563596725464,
"learning_rate": 4.818274333066802e-05,
"loss": 0.4167,
"step": 1500
},
{
"epoch": 0.49975012493753124,
"grad_norm": 1.9876887798309326,
"learning_rate": 4.727411499600204e-05,
"loss": 0.3916,
"step": 2000
},
{
"epoch": 0.624687656171914,
"grad_norm": 3.0330495834350586,
"learning_rate": 4.636548666133605e-05,
"loss": 0.3704,
"step": 2500
},
{
"epoch": 0.7496251874062968,
"grad_norm": 2.0157809257507324,
"learning_rate": 4.545685832667006e-05,
"loss": 0.3577,
"step": 3000
},
{
"epoch": 0.8745627186406797,
"grad_norm": 1.7754454612731934,
"learning_rate": 4.4548229992004074e-05,
"loss": 0.3447,
"step": 3500
},
{
"epoch": 0.9995002498750625,
"grad_norm": 1.7371801137924194,
"learning_rate": 4.3639601657338087e-05,
"loss": 0.3348,
"step": 4000
},
{
"epoch": 1.0,
"eval_category_macro_f1": 0.6952318859438807,
"eval_category_precision": 0.956608825785308,
"eval_category_recall": 0.9564824339910845,
"eval_category_weighted_f1": 0.9548015657704888,
"eval_color_macro_f1": 0.26886041835616176,
"eval_color_precision": 0.742503302340083,
"eval_color_recall": 0.7711898749961034,
"eval_color_weighted_f1": 0.7494002787544956,
"eval_gender_macro_f1": 0.5551422720580631,
"eval_gender_precision": 0.9135315056899179,
"eval_gender_recall": 0.9142429626858692,
"eval_gender_weighted_f1": 0.9133261341390547,
"eval_loss": 0.3139565885066986,
"eval_material_macro_f1": 0.18175933111933457,
"eval_material_precision": 0.5510795574472717,
"eval_material_recall": 0.5989276473705539,
"eval_material_weighted_f1": 0.5328149786359672,
"eval_neck_macro_f1": 0.17476784999699163,
"eval_neck_precision": 0.770231966081002,
"eval_neck_recall": 0.767043860469466,
"eval_neck_weighted_f1": 0.7547329696141571,
"eval_pattern_macro_f1": 0.07495456820600468,
"eval_pattern_precision": 0.500470399822932,
"eval_pattern_recall": 0.5943763833037189,
"eval_pattern_weighted_f1": 0.5043529662664364,
"eval_product_type_macro_f1": 0.5286677539064947,
"eval_product_type_precision": 0.6574438114334396,
"eval_product_type_recall": 0.6384238910190467,
"eval_product_type_weighted_f1": 0.6249330379628022,
"eval_runtime": 307.6972,
"eval_samples_per_second": 104.255,
"eval_sleeve_macro_f1": 0.3129635925848871,
"eval_sleeve_precision": 0.8408246980460057,
"eval_sleeve_recall": 0.8423579288631192,
"eval_sleeve_weighted_f1": 0.830038616651521,
"eval_steps_per_second": 1.631,
"eval_style_macro_f1": 0.14293232343466503,
"eval_style_precision": 0.5728463487218707,
"eval_style_recall": 0.6262975778546713,
"eval_style_weighted_f1": 0.5804908017903951,
"step": 4002
},
{
"epoch": 1.1244377811094453,
"grad_norm": 1.4916481971740723,
"learning_rate": 4.27309733226721e-05,
"loss": 0.3134,
"step": 4500
},
{
"epoch": 1.249375312343828,
"grad_norm": 1.7547612190246582,
"learning_rate": 4.1822344988006106e-05,
"loss": 0.3112,
"step": 5000
},
{
"epoch": 1.3743128435782108,
"grad_norm": 1.4588732719421387,
"learning_rate": 4.091371665334012e-05,
"loss": 0.3051,
"step": 5500
},
{
"epoch": 1.4992503748125938,
"grad_norm": 1.2639210224151611,
"learning_rate": 4.000508831867413e-05,
"loss": 0.3029,
"step": 6000
},
{
"epoch": 1.6241879060469766,
"grad_norm": 1.4789787530899048,
"learning_rate": 3.9096459984008145e-05,
"loss": 0.2961,
"step": 6500
},
{
"epoch": 1.7491254372813594,
"grad_norm": 1.6470601558685303,
"learning_rate": 3.818783164934215e-05,
"loss": 0.2873,
"step": 7000
},
{
"epoch": 1.8740629685157422,
"grad_norm": 2.157299280166626,
"learning_rate": 3.7279203314676164e-05,
"loss": 0.2847,
"step": 7500
},
{
"epoch": 1.999000499750125,
"grad_norm": 1.4557589292526245,
"learning_rate": 3.637057498001018e-05,
"loss": 0.284,
"step": 8000
},
{
"epoch": 2.0,
"eval_category_macro_f1": 0.7731459035999274,
"eval_category_precision": 0.9621854954403526,
"eval_category_recall": 0.9620935814707441,
"eval_category_weighted_f1": 0.9611122266129147,
"eval_color_macro_f1": 0.33282064622069346,
"eval_color_precision": 0.7628507936505963,
"eval_color_recall": 0.7826927273294055,
"eval_color_weighted_f1": 0.7666531131477731,
"eval_gender_macro_f1": 0.6529045193901017,
"eval_gender_precision": 0.9310067709590051,
"eval_gender_recall": 0.9307023286262041,
"eval_gender_weighted_f1": 0.9300238040883521,
"eval_loss": 0.27808678150177,
"eval_material_macro_f1": 0.2672650569326831,
"eval_material_precision": 0.5829820463502581,
"eval_material_recall": 0.6170703575547867,
"eval_material_weighted_f1": 0.5634078038324131,
"eval_neck_macro_f1": 0.20473481727652498,
"eval_neck_precision": 0.7723201257575838,
"eval_neck_recall": 0.7876180678948845,
"eval_neck_weighted_f1": 0.768304127766941,
"eval_pattern_macro_f1": 0.11918967355596528,
"eval_pattern_precision": 0.5371433805351696,
"eval_pattern_recall": 0.6123320552386297,
"eval_pattern_weighted_f1": 0.5328905424758658,
"eval_product_type_macro_f1": 0.6181561032086245,
"eval_product_type_precision": 0.6896032942264115,
"eval_product_type_recall": 0.6778577885844321,
"eval_product_type_weighted_f1": 0.67171279677715,
"eval_runtime": 302.2512,
"eval_samples_per_second": 106.134,
"eval_sleeve_macro_f1": 0.34627852813600124,
"eval_sleeve_precision": 0.8389224468125812,
"eval_sleeve_recall": 0.8489042675893888,
"eval_sleeve_weighted_f1": 0.8375757784906263,
"eval_steps_per_second": 1.661,
"eval_style_macro_f1": 0.18292409377977412,
"eval_style_precision": 0.5948748921293634,
"eval_style_recall": 0.6488356868979707,
"eval_style_weighted_f1": 0.6017236316787142,
"step": 8004
},
{
"epoch": 2.1239380309845077,
"grad_norm": 1.367946743965149,
"learning_rate": 3.546194664534419e-05,
"loss": 0.2605,
"step": 8500
},
{
"epoch": 2.2488755622188905,
"grad_norm": 1.5504860877990723,
"learning_rate": 3.45533183106782e-05,
"loss": 0.2633,
"step": 9000
},
{
"epoch": 2.3738130934532733,
"grad_norm": 1.5913927555084229,
"learning_rate": 3.364468997601221e-05,
"loss": 0.2561,
"step": 9500
},
{
"epoch": 2.498750624687656,
"grad_norm": 1.7354265451431274,
"learning_rate": 3.273606164134623e-05,
"loss": 0.2571,
"step": 10000
},
{
"epoch": 2.623688155922039,
"grad_norm": 1.5740700960159302,
"learning_rate": 3.1827433306680235e-05,
"loss": 0.2533,
"step": 10500
},
{
"epoch": 2.7486256871564216,
"grad_norm": 1.3308007717132568,
"learning_rate": 3.091880497201425e-05,
"loss": 0.2539,
"step": 11000
},
{
"epoch": 2.873563218390805,
"grad_norm": 1.4897780418395996,
"learning_rate": 3.0010176637348258e-05,
"loss": 0.2471,
"step": 11500
},
{
"epoch": 2.9985007496251876,
"grad_norm": 1.9843637943267822,
"learning_rate": 2.9101548302682274e-05,
"loss": 0.2483,
"step": 12000
},
{
"epoch": 3.0,
"eval_category_macro_f1": 0.7903733270490069,
"eval_category_precision": 0.9645144186683499,
"eval_category_recall": 0.9642756943795006,
"eval_category_weighted_f1": 0.963310209667099,
"eval_color_macro_f1": 0.3621249155624305,
"eval_color_precision": 0.7802552212263318,
"eval_color_recall": 0.7986533246048817,
"eval_color_weighted_f1": 0.7810869424554918,
"eval_gender_macro_f1": 0.682238598199015,
"eval_gender_precision": 0.9267564337966526,
"eval_gender_recall": 0.9252782193958664,
"eval_gender_weighted_f1": 0.9249918474296774,
"eval_loss": 0.25919318199157715,
"eval_material_macro_f1": 0.3398667069927169,
"eval_material_precision": 0.5978318966832341,
"eval_material_recall": 0.6357741824869853,
"eval_material_weighted_f1": 0.5944051235584699,
"eval_neck_macro_f1": 0.23924719225254232,
"eval_neck_precision": 0.7806692719164586,
"eval_neck_recall": 0.7925122354188098,
"eval_neck_weighted_f1": 0.7790870348414296,
"eval_pattern_macro_f1": 0.1641511847711344,
"eval_pattern_precision": 0.5819393078338299,
"eval_pattern_recall": 0.6225879859097853,
"eval_pattern_weighted_f1": 0.5502139317041554,
"eval_product_type_macro_f1": 0.6583201290239673,
"eval_product_type_precision": 0.7128711001410035,
"eval_product_type_recall": 0.6994918794226753,
"eval_product_type_weighted_f1": 0.6929448155349042,
"eval_runtime": 301.2799,
"eval_samples_per_second": 106.476,
"eval_sleeve_macro_f1": 0.3563735801589765,
"eval_sleeve_precision": 0.8406013881476055,
"eval_sleeve_recall": 0.8499018049191059,
"eval_sleeve_weighted_f1": 0.840146351199092,
"eval_steps_per_second": 1.666,
"eval_style_macro_f1": 0.22006801367696224,
"eval_style_precision": 0.6224502543435393,
"eval_style_recall": 0.6560678325384208,
"eval_style_weighted_f1": 0.622977679977427,
"step": 12006
},
{
"epoch": 3.1234382808595704,
"grad_norm": 2.0150625705718994,
"learning_rate": 2.8192919968016284e-05,
"loss": 0.2251,
"step": 12500
},
{
"epoch": 3.248375812093953,
"grad_norm": 1.2645131349563599,
"learning_rate": 2.7284291633350297e-05,
"loss": 0.2251,
"step": 13000
},
{
"epoch": 3.373313343328336,
"grad_norm": 1.7234032154083252,
"learning_rate": 2.6375663298684306e-05,
"loss": 0.2239,
"step": 13500
},
{
"epoch": 3.4982508745627188,
"grad_norm": 1.676547646522522,
"learning_rate": 2.5467034964018323e-05,
"loss": 0.2254,
"step": 14000
},
{
"epoch": 3.6231884057971016,
"grad_norm": 1.3010063171386719,
"learning_rate": 2.4558406629352332e-05,
"loss": 0.2241,
"step": 14500
},
{
"epoch": 3.7481259370314843,
"grad_norm": 1.72947359085083,
"learning_rate": 2.3649778294686342e-05,
"loss": 0.2222,
"step": 15000
},
{
"epoch": 3.873063468265867,
"grad_norm": 1.47541081905365,
"learning_rate": 2.2741149960020355e-05,
"loss": 0.2177,
"step": 15500
},
{
"epoch": 3.99800099950025,
"grad_norm": 1.4984639883041382,
"learning_rate": 2.1832521625354364e-05,
"loss": 0.2146,
"step": 16000
},
{
"epoch": 4.0,
"eval_category_macro_f1": 0.8070428225617673,
"eval_category_precision": 0.9639301959345535,
"eval_category_recall": 0.9639951370055176,
"eval_category_weighted_f1": 0.9633956909548311,
"eval_color_macro_f1": 0.4160977179764728,
"eval_color_precision": 0.79551435648077,
"eval_color_recall": 0.80382804950279,
"eval_color_weighted_f1": 0.7932115174135331,
"eval_gender_macro_f1": 0.7138550749176814,
"eval_gender_precision": 0.9418271715431166,
"eval_gender_recall": 0.9409894323389133,
"eval_gender_weighted_f1": 0.9408785911526341,
"eval_loss": 0.24342668056488037,
"eval_material_macro_f1": 0.3788597805248959,
"eval_material_precision": 0.6203377475947315,
"eval_material_recall": 0.6495214938121513,
"eval_material_weighted_f1": 0.6137247593163535,
"eval_neck_macro_f1": 0.26417595340931804,
"eval_neck_precision": 0.7888860680739177,
"eval_neck_recall": 0.8017394557186944,
"eval_neck_weighted_f1": 0.7912357278702308,
"eval_pattern_macro_f1": 0.21137706069820827,
"eval_pattern_precision": 0.5896656959683353,
"eval_pattern_recall": 0.63402849215998,
"eval_pattern_weighted_f1": 0.5805939336450403,
"eval_product_type_macro_f1": 0.7027375784160153,
"eval_product_type_precision": 0.7257145790424636,
"eval_product_type_recall": 0.7205025094298451,
"eval_product_type_weighted_f1": 0.7160893066264747,
"eval_runtime": 301.2031,
"eval_samples_per_second": 106.503,
"eval_sleeve_macro_f1": 0.3797203601033849,
"eval_sleeve_precision": 0.8450184667180418,
"eval_sleeve_recall": 0.8539543003210823,
"eval_sleeve_weighted_f1": 0.8447728073486299,
"eval_steps_per_second": 1.667,
"eval_style_macro_f1": 0.25776208112470533,
"eval_style_precision": 0.636768112985809,
"eval_style_recall": 0.6687552604507622,
"eval_style_weighted_f1": 0.6370942948799759,
"step": 16008
},
{
"epoch": 4.122938530734633,
"grad_norm": 1.4649860858917236,
"learning_rate": 2.0923893290688377e-05,
"loss": 0.1968,
"step": 16500
},
{
"epoch": 4.2478760619690155,
"grad_norm": 1.7522532939910889,
"learning_rate": 2.001526495602239e-05,
"loss": 0.1952,
"step": 17000
},
{
"epoch": 4.372813593203398,
"grad_norm": 1.5114365816116333,
"learning_rate": 1.9106636621356403e-05,
"loss": 0.198,
"step": 17500
},
{
"epoch": 4.497751124437781,
"grad_norm": 1.7840216159820557,
"learning_rate": 1.8198008286690413e-05,
"loss": 0.1954,
"step": 18000
},
{
"epoch": 4.622688655672164,
"grad_norm": 1.5770485401153564,
"learning_rate": 1.7289379952024426e-05,
"loss": 0.1927,
"step": 18500
},
{
"epoch": 4.747626186906547,
"grad_norm": 1.5942214727401733,
"learning_rate": 1.6380751617358436e-05,
"loss": 0.1936,
"step": 19000
},
{
"epoch": 4.872563718140929,
"grad_norm": 1.1230299472808838,
"learning_rate": 1.547212328269245e-05,
"loss": 0.1907,
"step": 19500
},
{
"epoch": 4.997501249375312,
"grad_norm": 1.336006999015808,
"learning_rate": 1.456349494802646e-05,
"loss": 0.1921,
"step": 20000
},
{
"epoch": 5.0,
"eval_category_macro_f1": 0.814575381252187,
"eval_category_precision": 0.9663094811768377,
"eval_category_recall": 0.9665824994544717,
"eval_category_weighted_f1": 0.965923844387987,
"eval_color_macro_f1": 0.4369174781614063,
"eval_color_precision": 0.8028535608404194,
"eval_color_recall": 0.8148321331712335,
"eval_color_weighted_f1": 0.8028476456898314,
"eval_gender_macro_f1": 0.7053936478562223,
"eval_gender_precision": 0.9416777760492416,
"eval_gender_recall": 0.9407712210480377,
"eval_gender_weighted_f1": 0.9403681152060941,
"eval_loss": 0.23490393161773682,
"eval_material_macro_f1": 0.40133288032222664,
"eval_material_precision": 0.6313613296699556,
"eval_material_recall": 0.6625206521400293,
"eval_material_weighted_f1": 0.6270470446515838,
"eval_neck_macro_f1": 0.2814408126866265,
"eval_neck_precision": 0.8008124167021559,
"eval_neck_recall": 0.8062907197855295,
"eval_neck_weighted_f1": 0.7975722055581634,
"eval_pattern_macro_f1": 0.22699854595816307,
"eval_pattern_precision": 0.6042854492310323,
"eval_pattern_recall": 0.6455001714517286,
"eval_pattern_weighted_f1": 0.5909107309557631,
"eval_product_type_macro_f1": 0.7113036346548861,
"eval_product_type_precision": 0.7388496345969355,
"eval_product_type_recall": 0.7295426914804077,
"eval_product_type_weighted_f1": 0.726666644027167,
"eval_runtime": 301.3794,
"eval_samples_per_second": 106.441,
"eval_sleeve_macro_f1": 0.38500573083937306,
"eval_sleeve_precision": 0.8532874153559135,
"eval_sleeve_recall": 0.8566040088531438,
"eval_sleeve_weighted_f1": 0.8483550688145379,
"eval_steps_per_second": 1.666,
"eval_style_macro_f1": 0.2759097194358481,
"eval_style_precision": 0.636595640254917,
"eval_style_recall": 0.6738676392655631,
"eval_style_weighted_f1": 0.6414452136845251,
"step": 20010
},
{
"epoch": 5.122438780609695,
"grad_norm": 1.5047273635864258,
"learning_rate": 1.3654866613360473e-05,
"loss": 0.1717,
"step": 20500
},
{
"epoch": 5.247376311844078,
"grad_norm": 1.4382556676864624,
"learning_rate": 1.2746238278694484e-05,
"loss": 0.1693,
"step": 21000
},
{
"epoch": 5.3723138430784605,
"grad_norm": 1.5730925798416138,
"learning_rate": 1.1837609944028495e-05,
"loss": 0.1724,
"step": 21500
},
{
"epoch": 5.497251374312843,
"grad_norm": 1.8472751379013062,
"learning_rate": 1.0928981609362507e-05,
"loss": 0.1721,
"step": 22000
},
{
"epoch": 5.622188905547226,
"grad_norm": 1.3081119060516357,
"learning_rate": 1.0020353274696518e-05,
"loss": 0.1694,
"step": 22500
},
{
"epoch": 5.747126436781609,
"grad_norm": 1.9150363206863403,
"learning_rate": 9.111724940030531e-06,
"loss": 0.1701,
"step": 23000
},
{
"epoch": 5.872063968015992,
"grad_norm": 1.6012868881225586,
"learning_rate": 8.203096605364542e-06,
"loss": 0.1692,
"step": 23500
},
{
"epoch": 5.997001499250375,
"grad_norm": 2.1426620483398438,
"learning_rate": 7.294468270698554e-06,
"loss": 0.1663,
"step": 24000
},
{
"epoch": 6.0,
"eval_category_macro_f1": 0.8135808160841039,
"eval_category_precision": 0.9662816427280644,
"eval_category_recall": 0.9666136724960255,
"eval_category_weighted_f1": 0.9661077074670175,
"eval_color_macro_f1": 0.4600635003318152,
"eval_color_precision": 0.8112357172847174,
"eval_color_recall": 0.8206926649833224,
"eval_color_weighted_f1": 0.809040612878886,
"eval_gender_macro_f1": 0.7077574180619189,
"eval_gender_precision": 0.9483675936419935,
"eval_gender_recall": 0.9483150971040244,
"eval_gender_weighted_f1": 0.9479706364362375,
"eval_loss": 0.22788158059120178,
"eval_material_macro_f1": 0.41327594900590064,
"eval_material_precision": 0.6451838446086702,
"eval_material_recall": 0.6689422987000841,
"eval_material_weighted_f1": 0.6426236021542818,
"eval_neck_macro_f1": 0.29755762158064153,
"eval_neck_precision": 0.8025765460608351,
"eval_neck_recall": 0.8098444465226472,
"eval_neck_weighted_f1": 0.8028234665625918,
"eval_pattern_macro_f1": 0.25250915316096467,
"eval_pattern_precision": 0.6176590799884946,
"eval_pattern_recall": 0.6519218180117834,
"eval_pattern_weighted_f1": 0.601571471109787,
"eval_product_type_macro_f1": 0.7308082280687357,
"eval_product_type_precision": 0.7450374918864295,
"eval_product_type_recall": 0.7416690046447831,
"eval_product_type_weighted_f1": 0.7391305618836747,
"eval_runtime": 301.8571,
"eval_samples_per_second": 106.272,
"eval_sleeve_macro_f1": 0.40274936834626424,
"eval_sleeve_precision": 0.855686478933816,
"eval_sleeve_recall": 0.8593472365098662,
"eval_sleeve_weighted_f1": 0.851732539861904,
"eval_steps_per_second": 1.663,
"eval_style_macro_f1": 0.2996676235011164,
"eval_style_precision": 0.6484348800461253,
"eval_style_recall": 0.6805386701580474,
"eval_style_weighted_f1": 0.6505079281930336,
"step": 24012
}
],
"logging_steps": 500,
"max_steps": 28014,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}