{ "best_metric": 0.9661077074670175, "best_model_checkpoint": "/kaggle/working/mmoe_vit_results/checkpoint-24012", "epoch": 6.0, "eval_steps": 500, "global_step": 24012, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12493753123438281, "grad_norm": 2.205951452255249, "learning_rate": 5e-05, "loss": 0.7943, "step": 500 }, { "epoch": 0.24987506246876562, "grad_norm": 2.1307880878448486, "learning_rate": 4.9091371665334015e-05, "loss": 0.474, "step": 1000 }, { "epoch": 0.3748125937031484, "grad_norm": 1.7951563596725464, "learning_rate": 4.818274333066802e-05, "loss": 0.4167, "step": 1500 }, { "epoch": 0.49975012493753124, "grad_norm": 1.9876887798309326, "learning_rate": 4.727411499600204e-05, "loss": 0.3916, "step": 2000 }, { "epoch": 0.624687656171914, "grad_norm": 3.0330495834350586, "learning_rate": 4.636548666133605e-05, "loss": 0.3704, "step": 2500 }, { "epoch": 0.7496251874062968, "grad_norm": 2.0157809257507324, "learning_rate": 4.545685832667006e-05, "loss": 0.3577, "step": 3000 }, { "epoch": 0.8745627186406797, "grad_norm": 1.7754454612731934, "learning_rate": 4.4548229992004074e-05, "loss": 0.3447, "step": 3500 }, { "epoch": 0.9995002498750625, "grad_norm": 1.7371801137924194, "learning_rate": 4.3639601657338087e-05, "loss": 0.3348, "step": 4000 }, { "epoch": 1.0, "eval_category_macro_f1": 0.6952318859438807, "eval_category_precision": 0.956608825785308, "eval_category_recall": 0.9564824339910845, "eval_category_weighted_f1": 0.9548015657704888, "eval_color_macro_f1": 0.26886041835616176, "eval_color_precision": 0.742503302340083, "eval_color_recall": 0.7711898749961034, "eval_color_weighted_f1": 0.7494002787544956, "eval_gender_macro_f1": 0.5551422720580631, "eval_gender_precision": 0.9135315056899179, "eval_gender_recall": 0.9142429626858692, "eval_gender_weighted_f1": 0.9133261341390547, "eval_loss": 0.3139565885066986, "eval_material_macro_f1": 0.18175933111933457, "eval_material_precision": 0.5510795574472717, "eval_material_recall": 0.5989276473705539, "eval_material_weighted_f1": 0.5328149786359672, "eval_neck_macro_f1": 0.17476784999699163, "eval_neck_precision": 0.770231966081002, "eval_neck_recall": 0.767043860469466, "eval_neck_weighted_f1": 0.7547329696141571, "eval_pattern_macro_f1": 0.07495456820600468, "eval_pattern_precision": 0.500470399822932, "eval_pattern_recall": 0.5943763833037189, "eval_pattern_weighted_f1": 0.5043529662664364, "eval_product_type_macro_f1": 0.5286677539064947, "eval_product_type_precision": 0.6574438114334396, "eval_product_type_recall": 0.6384238910190467, "eval_product_type_weighted_f1": 0.6249330379628022, "eval_runtime": 307.6972, "eval_samples_per_second": 104.255, "eval_sleeve_macro_f1": 0.3129635925848871, "eval_sleeve_precision": 0.8408246980460057, "eval_sleeve_recall": 0.8423579288631192, "eval_sleeve_weighted_f1": 0.830038616651521, "eval_steps_per_second": 1.631, "eval_style_macro_f1": 0.14293232343466503, "eval_style_precision": 0.5728463487218707, "eval_style_recall": 0.6262975778546713, "eval_style_weighted_f1": 0.5804908017903951, "step": 4002 }, { "epoch": 1.1244377811094453, "grad_norm": 1.4916481971740723, "learning_rate": 4.27309733226721e-05, "loss": 0.3134, "step": 4500 }, { "epoch": 1.249375312343828, "grad_norm": 1.7547612190246582, "learning_rate": 4.1822344988006106e-05, "loss": 0.3112, "step": 5000 }, { "epoch": 1.3743128435782108, "grad_norm": 1.4588732719421387, "learning_rate": 4.091371665334012e-05, "loss": 0.3051, "step": 5500 }, { "epoch": 1.4992503748125938, "grad_norm": 1.2639210224151611, "learning_rate": 4.000508831867413e-05, "loss": 0.3029, "step": 6000 }, { "epoch": 1.6241879060469766, "grad_norm": 1.4789787530899048, "learning_rate": 3.9096459984008145e-05, "loss": 0.2961, "step": 6500 }, { "epoch": 1.7491254372813594, "grad_norm": 1.6470601558685303, "learning_rate": 3.818783164934215e-05, "loss": 0.2873, "step": 7000 }, { "epoch": 1.8740629685157422, "grad_norm": 2.157299280166626, "learning_rate": 3.7279203314676164e-05, "loss": 0.2847, "step": 7500 }, { "epoch": 1.999000499750125, "grad_norm": 1.4557589292526245, "learning_rate": 3.637057498001018e-05, "loss": 0.284, "step": 8000 }, { "epoch": 2.0, "eval_category_macro_f1": 0.7731459035999274, "eval_category_precision": 0.9621854954403526, "eval_category_recall": 0.9620935814707441, "eval_category_weighted_f1": 0.9611122266129147, "eval_color_macro_f1": 0.33282064622069346, "eval_color_precision": 0.7628507936505963, "eval_color_recall": 0.7826927273294055, "eval_color_weighted_f1": 0.7666531131477731, "eval_gender_macro_f1": 0.6529045193901017, "eval_gender_precision": 0.9310067709590051, "eval_gender_recall": 0.9307023286262041, "eval_gender_weighted_f1": 0.9300238040883521, "eval_loss": 0.27808678150177, "eval_material_macro_f1": 0.2672650569326831, "eval_material_precision": 0.5829820463502581, "eval_material_recall": 0.6170703575547867, "eval_material_weighted_f1": 0.5634078038324131, "eval_neck_macro_f1": 0.20473481727652498, "eval_neck_precision": 0.7723201257575838, "eval_neck_recall": 0.7876180678948845, "eval_neck_weighted_f1": 0.768304127766941, "eval_pattern_macro_f1": 0.11918967355596528, "eval_pattern_precision": 0.5371433805351696, "eval_pattern_recall": 0.6123320552386297, "eval_pattern_weighted_f1": 0.5328905424758658, "eval_product_type_macro_f1": 0.6181561032086245, "eval_product_type_precision": 0.6896032942264115, "eval_product_type_recall": 0.6778577885844321, "eval_product_type_weighted_f1": 0.67171279677715, "eval_runtime": 302.2512, "eval_samples_per_second": 106.134, "eval_sleeve_macro_f1": 0.34627852813600124, "eval_sleeve_precision": 0.8389224468125812, "eval_sleeve_recall": 0.8489042675893888, "eval_sleeve_weighted_f1": 0.8375757784906263, "eval_steps_per_second": 1.661, "eval_style_macro_f1": 0.18292409377977412, "eval_style_precision": 0.5948748921293634, "eval_style_recall": 0.6488356868979707, "eval_style_weighted_f1": 0.6017236316787142, "step": 8004 }, { "epoch": 2.1239380309845077, "grad_norm": 1.367946743965149, "learning_rate": 3.546194664534419e-05, "loss": 0.2605, "step": 8500 }, { "epoch": 2.2488755622188905, "grad_norm": 1.5504860877990723, "learning_rate": 3.45533183106782e-05, "loss": 0.2633, "step": 9000 }, { "epoch": 2.3738130934532733, "grad_norm": 1.5913927555084229, "learning_rate": 3.364468997601221e-05, "loss": 0.2561, "step": 9500 }, { "epoch": 2.498750624687656, "grad_norm": 1.7354265451431274, "learning_rate": 3.273606164134623e-05, "loss": 0.2571, "step": 10000 }, { "epoch": 2.623688155922039, "grad_norm": 1.5740700960159302, "learning_rate": 3.1827433306680235e-05, "loss": 0.2533, "step": 10500 }, { "epoch": 2.7486256871564216, "grad_norm": 1.3308007717132568, "learning_rate": 3.091880497201425e-05, "loss": 0.2539, "step": 11000 }, { "epoch": 2.873563218390805, "grad_norm": 1.4897780418395996, "learning_rate": 3.0010176637348258e-05, "loss": 0.2471, "step": 11500 }, { "epoch": 2.9985007496251876, "grad_norm": 1.9843637943267822, "learning_rate": 2.9101548302682274e-05, "loss": 0.2483, "step": 12000 }, { "epoch": 3.0, "eval_category_macro_f1": 0.7903733270490069, "eval_category_precision": 0.9645144186683499, "eval_category_recall": 0.9642756943795006, "eval_category_weighted_f1": 0.963310209667099, "eval_color_macro_f1": 0.3621249155624305, "eval_color_precision": 0.7802552212263318, "eval_color_recall": 0.7986533246048817, "eval_color_weighted_f1": 0.7810869424554918, "eval_gender_macro_f1": 0.682238598199015, "eval_gender_precision": 0.9267564337966526, "eval_gender_recall": 0.9252782193958664, "eval_gender_weighted_f1": 0.9249918474296774, "eval_loss": 0.25919318199157715, "eval_material_macro_f1": 0.3398667069927169, "eval_material_precision": 0.5978318966832341, "eval_material_recall": 0.6357741824869853, "eval_material_weighted_f1": 0.5944051235584699, "eval_neck_macro_f1": 0.23924719225254232, "eval_neck_precision": 0.7806692719164586, "eval_neck_recall": 0.7925122354188098, "eval_neck_weighted_f1": 0.7790870348414296, "eval_pattern_macro_f1": 0.1641511847711344, "eval_pattern_precision": 0.5819393078338299, "eval_pattern_recall": 0.6225879859097853, "eval_pattern_weighted_f1": 0.5502139317041554, "eval_product_type_macro_f1": 0.6583201290239673, "eval_product_type_precision": 0.7128711001410035, "eval_product_type_recall": 0.6994918794226753, "eval_product_type_weighted_f1": 0.6929448155349042, "eval_runtime": 301.2799, "eval_samples_per_second": 106.476, "eval_sleeve_macro_f1": 0.3563735801589765, "eval_sleeve_precision": 0.8406013881476055, "eval_sleeve_recall": 0.8499018049191059, "eval_sleeve_weighted_f1": 0.840146351199092, "eval_steps_per_second": 1.666, "eval_style_macro_f1": 0.22006801367696224, "eval_style_precision": 0.6224502543435393, "eval_style_recall": 0.6560678325384208, "eval_style_weighted_f1": 0.622977679977427, "step": 12006 }, { "epoch": 3.1234382808595704, "grad_norm": 2.0150625705718994, "learning_rate": 2.8192919968016284e-05, "loss": 0.2251, "step": 12500 }, { "epoch": 3.248375812093953, "grad_norm": 1.2645131349563599, "learning_rate": 2.7284291633350297e-05, "loss": 0.2251, "step": 13000 }, { "epoch": 3.373313343328336, "grad_norm": 1.7234032154083252, "learning_rate": 2.6375663298684306e-05, "loss": 0.2239, "step": 13500 }, { "epoch": 3.4982508745627188, "grad_norm": 1.676547646522522, "learning_rate": 2.5467034964018323e-05, "loss": 0.2254, "step": 14000 }, { "epoch": 3.6231884057971016, "grad_norm": 1.3010063171386719, "learning_rate": 2.4558406629352332e-05, "loss": 0.2241, "step": 14500 }, { "epoch": 3.7481259370314843, "grad_norm": 1.72947359085083, "learning_rate": 2.3649778294686342e-05, "loss": 0.2222, "step": 15000 }, { "epoch": 3.873063468265867, "grad_norm": 1.47541081905365, "learning_rate": 2.2741149960020355e-05, "loss": 0.2177, "step": 15500 }, { "epoch": 3.99800099950025, "grad_norm": 1.4984639883041382, "learning_rate": 2.1832521625354364e-05, "loss": 0.2146, "step": 16000 }, { "epoch": 4.0, "eval_category_macro_f1": 0.8070428225617673, "eval_category_precision": 0.9639301959345535, "eval_category_recall": 0.9639951370055176, "eval_category_weighted_f1": 0.9633956909548311, "eval_color_macro_f1": 0.4160977179764728, "eval_color_precision": 0.79551435648077, "eval_color_recall": 0.80382804950279, "eval_color_weighted_f1": 0.7932115174135331, "eval_gender_macro_f1": 0.7138550749176814, "eval_gender_precision": 0.9418271715431166, "eval_gender_recall": 0.9409894323389133, "eval_gender_weighted_f1": 0.9408785911526341, "eval_loss": 0.24342668056488037, "eval_material_macro_f1": 0.3788597805248959, "eval_material_precision": 0.6203377475947315, "eval_material_recall": 0.6495214938121513, "eval_material_weighted_f1": 0.6137247593163535, "eval_neck_macro_f1": 0.26417595340931804, "eval_neck_precision": 0.7888860680739177, "eval_neck_recall": 0.8017394557186944, "eval_neck_weighted_f1": 0.7912357278702308, "eval_pattern_macro_f1": 0.21137706069820827, "eval_pattern_precision": 0.5896656959683353, "eval_pattern_recall": 0.63402849215998, "eval_pattern_weighted_f1": 0.5805939336450403, "eval_product_type_macro_f1": 0.7027375784160153, "eval_product_type_precision": 0.7257145790424636, "eval_product_type_recall": 0.7205025094298451, "eval_product_type_weighted_f1": 0.7160893066264747, "eval_runtime": 301.2031, "eval_samples_per_second": 106.503, "eval_sleeve_macro_f1": 0.3797203601033849, "eval_sleeve_precision": 0.8450184667180418, "eval_sleeve_recall": 0.8539543003210823, "eval_sleeve_weighted_f1": 0.8447728073486299, "eval_steps_per_second": 1.667, "eval_style_macro_f1": 0.25776208112470533, "eval_style_precision": 0.636768112985809, "eval_style_recall": 0.6687552604507622, "eval_style_weighted_f1": 0.6370942948799759, "step": 16008 }, { "epoch": 4.122938530734633, "grad_norm": 1.4649860858917236, "learning_rate": 2.0923893290688377e-05, "loss": 0.1968, "step": 16500 }, { "epoch": 4.2478760619690155, "grad_norm": 1.7522532939910889, "learning_rate": 2.001526495602239e-05, "loss": 0.1952, "step": 17000 }, { "epoch": 4.372813593203398, "grad_norm": 1.5114365816116333, "learning_rate": 1.9106636621356403e-05, "loss": 0.198, "step": 17500 }, { "epoch": 4.497751124437781, "grad_norm": 1.7840216159820557, "learning_rate": 1.8198008286690413e-05, "loss": 0.1954, "step": 18000 }, { "epoch": 4.622688655672164, "grad_norm": 1.5770485401153564, "learning_rate": 1.7289379952024426e-05, "loss": 0.1927, "step": 18500 }, { "epoch": 4.747626186906547, "grad_norm": 1.5942214727401733, "learning_rate": 1.6380751617358436e-05, "loss": 0.1936, "step": 19000 }, { "epoch": 4.872563718140929, "grad_norm": 1.1230299472808838, "learning_rate": 1.547212328269245e-05, "loss": 0.1907, "step": 19500 }, { "epoch": 4.997501249375312, "grad_norm": 1.336006999015808, "learning_rate": 1.456349494802646e-05, "loss": 0.1921, "step": 20000 }, { "epoch": 5.0, "eval_category_macro_f1": 0.814575381252187, "eval_category_precision": 0.9663094811768377, "eval_category_recall": 0.9665824994544717, "eval_category_weighted_f1": 0.965923844387987, "eval_color_macro_f1": 0.4369174781614063, "eval_color_precision": 0.8028535608404194, "eval_color_recall": 0.8148321331712335, "eval_color_weighted_f1": 0.8028476456898314, "eval_gender_macro_f1": 0.7053936478562223, "eval_gender_precision": 0.9416777760492416, "eval_gender_recall": 0.9407712210480377, "eval_gender_weighted_f1": 0.9403681152060941, "eval_loss": 0.23490393161773682, "eval_material_macro_f1": 0.40133288032222664, "eval_material_precision": 0.6313613296699556, "eval_material_recall": 0.6625206521400293, "eval_material_weighted_f1": 0.6270470446515838, "eval_neck_macro_f1": 0.2814408126866265, "eval_neck_precision": 0.8008124167021559, "eval_neck_recall": 0.8062907197855295, "eval_neck_weighted_f1": 0.7975722055581634, "eval_pattern_macro_f1": 0.22699854595816307, "eval_pattern_precision": 0.6042854492310323, "eval_pattern_recall": 0.6455001714517286, "eval_pattern_weighted_f1": 0.5909107309557631, "eval_product_type_macro_f1": 0.7113036346548861, "eval_product_type_precision": 0.7388496345969355, "eval_product_type_recall": 0.7295426914804077, "eval_product_type_weighted_f1": 0.726666644027167, "eval_runtime": 301.3794, "eval_samples_per_second": 106.441, "eval_sleeve_macro_f1": 0.38500573083937306, "eval_sleeve_precision": 0.8532874153559135, "eval_sleeve_recall": 0.8566040088531438, "eval_sleeve_weighted_f1": 0.8483550688145379, "eval_steps_per_second": 1.666, "eval_style_macro_f1": 0.2759097194358481, "eval_style_precision": 0.636595640254917, "eval_style_recall": 0.6738676392655631, "eval_style_weighted_f1": 0.6414452136845251, "step": 20010 }, { "epoch": 5.122438780609695, "grad_norm": 1.5047273635864258, "learning_rate": 1.3654866613360473e-05, "loss": 0.1717, "step": 20500 }, { "epoch": 5.247376311844078, "grad_norm": 1.4382556676864624, "learning_rate": 1.2746238278694484e-05, "loss": 0.1693, "step": 21000 }, { "epoch": 5.3723138430784605, "grad_norm": 1.5730925798416138, "learning_rate": 1.1837609944028495e-05, "loss": 0.1724, "step": 21500 }, { "epoch": 5.497251374312843, "grad_norm": 1.8472751379013062, "learning_rate": 1.0928981609362507e-05, "loss": 0.1721, "step": 22000 }, { "epoch": 5.622188905547226, "grad_norm": 1.3081119060516357, "learning_rate": 1.0020353274696518e-05, "loss": 0.1694, "step": 22500 }, { "epoch": 5.747126436781609, "grad_norm": 1.9150363206863403, "learning_rate": 9.111724940030531e-06, "loss": 0.1701, "step": 23000 }, { "epoch": 5.872063968015992, "grad_norm": 1.6012868881225586, "learning_rate": 8.203096605364542e-06, "loss": 0.1692, "step": 23500 }, { "epoch": 5.997001499250375, "grad_norm": 2.1426620483398438, "learning_rate": 7.294468270698554e-06, "loss": 0.1663, "step": 24000 }, { "epoch": 6.0, "eval_category_macro_f1": 0.8135808160841039, "eval_category_precision": 0.9662816427280644, "eval_category_recall": 0.9666136724960255, "eval_category_weighted_f1": 0.9661077074670175, "eval_color_macro_f1": 0.4600635003318152, "eval_color_precision": 0.8112357172847174, "eval_color_recall": 0.8206926649833224, "eval_color_weighted_f1": 0.809040612878886, "eval_gender_macro_f1": 0.7077574180619189, "eval_gender_precision": 0.9483675936419935, "eval_gender_recall": 0.9483150971040244, "eval_gender_weighted_f1": 0.9479706364362375, "eval_loss": 0.22788158059120178, "eval_material_macro_f1": 0.41327594900590064, "eval_material_precision": 0.6451838446086702, "eval_material_recall": 0.6689422987000841, "eval_material_weighted_f1": 0.6426236021542818, "eval_neck_macro_f1": 0.29755762158064153, "eval_neck_precision": 0.8025765460608351, "eval_neck_recall": 0.8098444465226472, "eval_neck_weighted_f1": 0.8028234665625918, "eval_pattern_macro_f1": 0.25250915316096467, "eval_pattern_precision": 0.6176590799884946, "eval_pattern_recall": 0.6519218180117834, "eval_pattern_weighted_f1": 0.601571471109787, "eval_product_type_macro_f1": 0.7308082280687357, "eval_product_type_precision": 0.7450374918864295, "eval_product_type_recall": 0.7416690046447831, "eval_product_type_weighted_f1": 0.7391305618836747, "eval_runtime": 301.8571, "eval_samples_per_second": 106.272, "eval_sleeve_macro_f1": 0.40274936834626424, "eval_sleeve_precision": 0.855686478933816, "eval_sleeve_recall": 0.8593472365098662, "eval_sleeve_weighted_f1": 0.851732539861904, "eval_steps_per_second": 1.663, "eval_style_macro_f1": 0.2996676235011164, "eval_style_precision": 0.6484348800461253, "eval_style_recall": 0.6805386701580474, "eval_style_weighted_f1": 0.6505079281930336, "step": 24012 } ], "logging_steps": 500, "max_steps": 28014, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }