Upload ablation summaries
Browse files
mbert_rtx6000_metrics/docs/ablation_results.csv
CHANGED
|
@@ -67,6 +67,8 @@ result_ablation_mbert,sst2,mBERT,mha_attention,43,eval_accuracy,0.88188073394495
|
|
| 67 |
result_ablation_mbert,sst2,mBERT,mha_attention,44,eval_accuracy,0.8853211009174312,0.4112522304058075,0.2333670342296843,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json
|
| 68 |
result_ablation_mbert,sst2,mBERT,multi_branch_average,42,eval_accuracy,0.8853211009174312,0.4263913333415985,0.2324217217996476,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json
|
| 69 |
result_ablation_mbert,sst2,mBERT,multi_branch_average,43,eval_accuracy,0.8784403669724771,0.41880160570144653,0.23121720937355744,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json
|
|
|
|
|
|
|
| 70 |
result_ablation_mbert,sst2,mBERT,gated_multi_branch,43,eval_accuracy,0.8784403669724771,0.43847355246543884,0.23293366598041423,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json
|
| 71 |
result_ablation_mbert,sst2,mBERT,gated_multi_branch,44,eval_accuracy,0.8876146788990825,0.41072866320610046,0.2325162212280353,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json
|
| 72 |
result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,42,eval_accuracy,0.9393556538218573,0.22358696162700653,0.20489713470639875,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json
|
|
@@ -75,11 +77,18 @@ result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,44,eval_accuracy,0.93240
|
|
| 75 |
result_ablation_mbert,vsfc,mBERT,cls,42,eval_accuracy,0.9412507896399241,0.21872149407863617,0.2038555185166363,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json
|
| 76 |
result_ablation_mbert,vsfc,mBERT,cls,43,eval_accuracy,0.9336702463676564,0.24253493547439575,0.2038297117837137,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json
|
| 77 |
result_ablation_mbert,vsfc,mBERT,cls,44,eval_accuracy,0.9368288060644346,0.22460007667541504,0.20489413286020233,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json
|
|
|
|
|
|
|
| 78 |
result_ablation_mbert,vsfc,mBERT,mean,44,eval_accuracy,0.936197094125079,0.22665680944919586,0.20191558268612234,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json
|
|
|
|
| 79 |
result_ablation_mbert,vsfc,mBERT,max,43,eval_accuracy,0.9317751105495894,0.23970364034175873,0.20938005839764157,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json
|
| 80 |
result_ablation_mbert,vsfc,mBERT,max,44,eval_accuracy,0.9330385344283006,0.2280229926109314,0.20747514532533484,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json
|
|
|
|
|
|
|
| 81 |
result_ablation_mbert,vsfc,mBERT,attention,44,eval_accuracy,0.9330385344283006,0.2232467234134674,0.2006188094034022,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json
|
|
|
|
| 82 |
result_ablation_mbert,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9387239418825016,0.2242736667394638,0.20716787132757977,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json
|
|
|
|
| 83 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
|
| 84 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
|
| 85 |
result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
|
|
|
|
| 67 |
result_ablation_mbert,sst2,mBERT,mha_attention,44,eval_accuracy,0.8853211009174312,0.4112522304058075,0.2333670342296843,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json
|
| 68 |
result_ablation_mbert,sst2,mBERT,multi_branch_average,42,eval_accuracy,0.8853211009174312,0.4263913333415985,0.2324217217996476,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json
|
| 69 |
result_ablation_mbert,sst2,mBERT,multi_branch_average,43,eval_accuracy,0.8784403669724771,0.41880160570144653,0.23121720937355744,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json
|
| 70 |
+
result_ablation_mbert,sst2,mBERT,multi_branch_average,44,eval_accuracy,0.8646788990825688,0.4119800925254822,0.23205033346571927,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_44/all_results.json
|
| 71 |
+
result_ablation_mbert,sst2,mBERT,gated_multi_branch,42,eval_accuracy,0.8669724770642202,0.42526304721832275,0.23464352019831666,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_42/all_results.json
|
| 72 |
result_ablation_mbert,sst2,mBERT,gated_multi_branch,43,eval_accuracy,0.8784403669724771,0.43847355246543884,0.23293366598041423,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json
|
| 73 |
result_ablation_mbert,sst2,mBERT,gated_multi_branch,44,eval_accuracy,0.8876146788990825,0.41072866320610046,0.2325162212280353,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json
|
| 74 |
result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,42,eval_accuracy,0.9393556538218573,0.22358696162700653,0.20489713470639875,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json
|
|
|
|
| 77 |
result_ablation_mbert,vsfc,mBERT,cls,42,eval_accuracy,0.9412507896399241,0.21872149407863617,0.2038555185166363,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json
|
| 78 |
result_ablation_mbert,vsfc,mBERT,cls,43,eval_accuracy,0.9336702463676564,0.24253493547439575,0.2038297117837137,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json
|
| 79 |
result_ablation_mbert,vsfc,mBERT,cls,44,eval_accuracy,0.9368288060644346,0.22460007667541504,0.20489413286020233,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json
|
| 80 |
+
result_ablation_mbert,vsfc,mBERT,mean,42,eval_accuracy,0.9374605180037903,0.21570518612861633,0.2002154952811397,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_42/all_results.json
|
| 81 |
+
result_ablation_mbert,vsfc,mBERT,mean,43,eval_accuracy,0.9349336702463676,0.23304706811904907,0.20186518625235458,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_43/all_results.json
|
| 82 |
result_ablation_mbert,vsfc,mBERT,mean,44,eval_accuracy,0.936197094125079,0.22665680944919586,0.20191558268612234,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json
|
| 83 |
+
result_ablation_mbert,vsfc,mBERT,max,42,eval_accuracy,0.9425142135186355,0.2218012511730194,0.20883273712594472,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_42/all_results.json
|
| 84 |
result_ablation_mbert,vsfc,mBERT,max,43,eval_accuracy,0.9317751105495894,0.23970364034175873,0.20938005839764157,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json
|
| 85 |
result_ablation_mbert,vsfc,mBERT,max,44,eval_accuracy,0.9330385344283006,0.2280229926109314,0.20747514532533484,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json
|
| 86 |
+
result_ablation_mbert,vsfc,mBERT,attention,42,eval_accuracy,0.9431459254579911,0.22031328082084656,0.2008532669753709,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_42/all_results.json
|
| 87 |
+
result_ablation_mbert,vsfc,mBERT,attention,43,eval_accuracy,0.9368288060644346,0.2278706282377243,0.20126758842122439,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_43/all_results.json
|
| 88 |
result_ablation_mbert,vsfc,mBERT,attention,44,eval_accuracy,0.9330385344283006,0.2232467234134674,0.2006188094034022,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json
|
| 89 |
+
result_ablation_mbert,vsfc,mBERT,mha_attention,42,eval_accuracy,0.9368288060644346,0.21603453159332275,0.2058336245963763,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_42/all_results.json
|
| 90 |
result_ablation_mbert,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9387239418825016,0.2242736667394638,0.20716787132757977,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json
|
| 91 |
+
result_ablation_mbert,vsfc,mBERT,mha_attention,44,eval_accuracy,0.9336702463676564,0.2265038937330246,0.21097227918553052,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json
|
| 92 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
|
| 93 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
|
| 94 |
result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
|
mbert_rtx6000_metrics/docs/ablation_results_aggregate.csv
CHANGED
|
@@ -17,17 +17,17 @@ result_ablation_mbert,mrpc,mBERT,mha_attention,eval_combined_score,3,0.869748361
|
|
| 17 |
result_ablation_mbert,mrpc,mBERT,multi_branch_average,eval_combined_score,3,0.8723060123547228,0.005738234218203849,0.8670419052576783,0.878422920892495
|
| 18 |
result_ablation_mbert,sst2,mBERT,attention,eval_accuracy,3,0.8765290519877675,0.010019374940428994,0.8681192660550459,0.8876146788990825
|
| 19 |
result_ablation_mbert,sst2,mBERT,cls,eval_accuracy,3,0.8772935779816513,0.005255247356600745,0.8727064220183486,0.8830275229357798
|
| 20 |
-
result_ablation_mbert,sst2,mBERT,gated_multi_branch,eval_accuracy,
|
| 21 |
result_ablation_mbert,sst2,mBERT,hf_sequence_classifier,eval_accuracy,3,0.8795871559633027,0.009102355427974529,0.8692660550458715,0.8864678899082569
|
| 22 |
result_ablation_mbert,sst2,mBERT,max,eval_accuracy,3,0.8761467889908257,0.00397259359534147,0.8715596330275229,0.8784403669724771
|
| 23 |
result_ablation_mbert,sst2,mBERT,mean,eval_accuracy,3,0.8730886850152905,0.004027390578307669,0.8692660550458715,0.8772935779816514
|
| 24 |
result_ablation_mbert,sst2,mBERT,mha_attention,eval_accuracy,3,0.8799694189602446,0.006520918237474033,0.8727064220183486,0.8853211009174312
|
| 25 |
-
result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,
|
| 26 |
-
result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,
|
| 27 |
result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
|
| 28 |
result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,2,0.9384080859128238,0.0004466877960749482,0.9380922299431459,0.9387239418825016
|
| 29 |
result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
|
| 30 |
-
result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,
|
| 31 |
-
result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,
|
| 32 |
-
result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,
|
| 33 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9371446620341124,0.005806941348973541,0.9330385344283006,0.9412507896399241
|
|
|
|
| 17 |
result_ablation_mbert,mrpc,mBERT,multi_branch_average,eval_combined_score,3,0.8723060123547228,0.005738234218203849,0.8670419052576783,0.878422920892495
|
| 18 |
result_ablation_mbert,sst2,mBERT,attention,eval_accuracy,3,0.8765290519877675,0.010019374940428994,0.8681192660550459,0.8876146788990825
|
| 19 |
result_ablation_mbert,sst2,mBERT,cls,eval_accuracy,3,0.8772935779816513,0.005255247356600745,0.8727064220183486,0.8830275229357798
|
| 20 |
+
result_ablation_mbert,sst2,mBERT,gated_multi_branch,eval_accuracy,3,0.8776758409785933,0.010342315946841468,0.8669724770642202,0.8876146788990825
|
| 21 |
result_ablation_mbert,sst2,mBERT,hf_sequence_classifier,eval_accuracy,3,0.8795871559633027,0.009102355427974529,0.8692660550458715,0.8864678899082569
|
| 22 |
result_ablation_mbert,sst2,mBERT,max,eval_accuracy,3,0.8761467889908257,0.00397259359534147,0.8715596330275229,0.8784403669724771
|
| 23 |
result_ablation_mbert,sst2,mBERT,mean,eval_accuracy,3,0.8730886850152905,0.004027390578307669,0.8692660550458715,0.8772935779816514
|
| 24 |
result_ablation_mbert,sst2,mBERT,mha_attention,eval_accuracy,3,0.8799694189602446,0.006520918237474033,0.8727064220183486,0.8853211009174312
|
| 25 |
+
result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8761467889908257,0.01051049471320149,0.8646788990825688,0.8853211009174312
|
| 26 |
+
result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,3,0.9376710886502422,0.005106066815322034,0.9330385344283006,0.9431459254579911
|
| 27 |
result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
|
| 28 |
result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,2,0.9384080859128238,0.0004466877960749482,0.9380922299431459,0.9387239418825016
|
| 29 |
result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
|
| 30 |
+
result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,3,0.9357759528321752,0.00586959775311282,0.9317751105495894,0.9425142135186355
|
| 31 |
+
result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,3,0.936197094125079,0.0012634238787113117,0.9349336702463676,0.9374605180037903
|
| 32 |
+
result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9364076647715309,0.0025530334076610166,0.9336702463676564,0.9387239418825016
|
| 33 |
result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9371446620341124,0.005806941348973541,0.9330385344283006,0.9412507896399241
|
mbert_rtx6000_metrics/docs/ablation_summary.md
CHANGED
|
@@ -23,19 +23,19 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 23 |
| result_ablation_mbert | mrpc | mBERT | multi_branch_average | eval_combined_score | 3 | 0.8723 | 0.0057 | 0.8670 | 0.8784 |
|
| 24 |
| result_ablation_mbert | sst2 | mBERT | attention | eval_accuracy | 3 | 0.8765 | 0.0100 | 0.8681 | 0.8876 |
|
| 25 |
| result_ablation_mbert | sst2 | mBERT | cls | eval_accuracy | 3 | 0.8773 | 0.0053 | 0.8727 | 0.8830 |
|
| 26 |
-
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | eval_accuracy |
|
| 27 |
| result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.8796 | 0.0091 | 0.8693 | 0.8865 |
|
| 28 |
| result_ablation_mbert | sst2 | mBERT | max | eval_accuracy | 3 | 0.8761 | 0.0040 | 0.8716 | 0.8784 |
|
| 29 |
| result_ablation_mbert | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8731 | 0.0040 | 0.8693 | 0.8773 |
|
| 30 |
| result_ablation_mbert | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8800 | 0.0065 | 0.8727 | 0.8853 |
|
| 31 |
-
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy |
|
| 32 |
-
| result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy |
|
| 33 |
| result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
|
| 34 |
| result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.9384 | 0.0004 | 0.9381 | 0.9387 |
|
| 35 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
|
| 36 |
-
| result_ablation_mbert | vsfc | mBERT | max | eval_accuracy |
|
| 37 |
-
| result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy |
|
| 38 |
-
| result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy |
|
| 39 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9371 | 0.0058 | 0.9330 | 0.9413 |
|
| 40 |
|
| 41 |
## Gated Multi-Branch Deltas
|
|
@@ -49,12 +49,12 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 49 |
| result_ablation_mbert | mrpc | mBERT | mha_attention | 0.8651 | 0.8697 | -0.0046 |
|
| 50 |
| result_ablation_mbert | mrpc | mBERT | multi_branch_average | 0.8651 | 0.8723 | -0.0072 |
|
| 51 |
| result_ablation_mbert | mrpc | mBERT | hf_sequence_classifier | 0.8651 | 0.8521 | 0.0130 |
|
| 52 |
-
| result_ablation_mbert | sst2 | mBERT | attention | 0.
|
| 53 |
-
| result_ablation_mbert | sst2 | mBERT | mha_attention | 0.
|
| 54 |
-
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.
|
| 55 |
-
| result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.
|
| 56 |
-
| result_ablation_mbert | vsfc | mBERT | attention | 0.9384 | 0.
|
| 57 |
-
| result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9384 | 0.
|
| 58 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9384 | 0.9371 | 0.0013 |
|
| 59 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9384 | 0.9364 | 0.0020 |
|
| 60 |
|
|
@@ -129,6 +129,8 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 129 |
| result_ablation_mbert | sst2 | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.8853 | 0.4113 | 0.2334 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json |
|
| 130 |
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.8853 | 0.4264 | 0.2324 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json |
|
| 131 |
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.8784 | 0.4188 | 0.2312 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json |
|
|
|
|
|
|
|
| 132 |
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.8784 | 0.4385 | 0.2329 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json |
|
| 133 |
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.8876 | 0.4107 | 0.2325 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json |
|
| 134 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 42.0000 | eval_accuracy | 0.9394 | 0.2236 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json |
|
|
@@ -137,11 +139,18 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 137 |
| result_ablation_mbert | vsfc | mBERT | cls | 42.0000 | eval_accuracy | 0.9413 | 0.2187 | 0.2039 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json |
|
| 138 |
| result_ablation_mbert | vsfc | mBERT | cls | 43.0000 | eval_accuracy | 0.9337 | 0.2425 | 0.2038 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json |
|
| 139 |
| result_ablation_mbert | vsfc | mBERT | cls | 44.0000 | eval_accuracy | 0.9368 | 0.2246 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json |
|
|
|
|
|
|
|
| 140 |
| result_ablation_mbert | vsfc | mBERT | mean | 44.0000 | eval_accuracy | 0.9362 | 0.2267 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json |
|
|
|
|
| 141 |
| result_ablation_mbert | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9318 | 0.2397 | 0.2094 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json |
|
| 142 |
| result_ablation_mbert | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9330 | 0.2280 | 0.2075 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json |
|
|
|
|
|
|
|
| 143 |
| result_ablation_mbert | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9330 | 0.2232 | 0.2006 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json |
|
|
|
|
| 144 |
| result_ablation_mbert | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9387 | 0.2243 | 0.2072 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json |
|
|
|
|
| 145 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
|
| 146 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
|
| 147 |
| result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
|
|
|
|
| 23 |
| result_ablation_mbert | mrpc | mBERT | multi_branch_average | eval_combined_score | 3 | 0.8723 | 0.0057 | 0.8670 | 0.8784 |
|
| 24 |
| result_ablation_mbert | sst2 | mBERT | attention | eval_accuracy | 3 | 0.8765 | 0.0100 | 0.8681 | 0.8876 |
|
| 25 |
| result_ablation_mbert | sst2 | mBERT | cls | eval_accuracy | 3 | 0.8773 | 0.0053 | 0.8727 | 0.8830 |
|
| 26 |
+
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | eval_accuracy | 3 | 0.8777 | 0.0103 | 0.8670 | 0.8876 |
|
| 27 |
| result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.8796 | 0.0091 | 0.8693 | 0.8865 |
|
| 28 |
| result_ablation_mbert | sst2 | mBERT | max | eval_accuracy | 3 | 0.8761 | 0.0040 | 0.8716 | 0.8784 |
|
| 29 |
| result_ablation_mbert | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8731 | 0.0040 | 0.8693 | 0.8773 |
|
| 30 |
| result_ablation_mbert | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8800 | 0.0065 | 0.8727 | 0.8853 |
|
| 31 |
+
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8761 | 0.0105 | 0.8647 | 0.8853 |
|
| 32 |
+
| result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy | 3 | 0.9377 | 0.0051 | 0.9330 | 0.9431 |
|
| 33 |
| result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
|
| 34 |
| result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.9384 | 0.0004 | 0.9381 | 0.9387 |
|
| 35 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
|
| 36 |
+
| result_ablation_mbert | vsfc | mBERT | max | eval_accuracy | 3 | 0.9358 | 0.0059 | 0.9318 | 0.9425 |
|
| 37 |
+
| result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9362 | 0.0013 | 0.9349 | 0.9375 |
|
| 38 |
+
| result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9364 | 0.0026 | 0.9337 | 0.9387 |
|
| 39 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9371 | 0.0058 | 0.9330 | 0.9413 |
|
| 40 |
|
| 41 |
## Gated Multi-Branch Deltas
|
|
|
|
| 49 |
| result_ablation_mbert | mrpc | mBERT | mha_attention | 0.8651 | 0.8697 | -0.0046 |
|
| 50 |
| result_ablation_mbert | mrpc | mBERT | multi_branch_average | 0.8651 | 0.8723 | -0.0072 |
|
| 51 |
| result_ablation_mbert | mrpc | mBERT | hf_sequence_classifier | 0.8651 | 0.8521 | 0.0130 |
|
| 52 |
+
| result_ablation_mbert | sst2 | mBERT | attention | 0.8777 | 0.8765 | 0.0011 |
|
| 53 |
+
| result_ablation_mbert | sst2 | mBERT | mha_attention | 0.8777 | 0.8800 | -0.0023 |
|
| 54 |
+
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.8777 | 0.8761 | 0.0015 |
|
| 55 |
+
| result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.8777 | 0.8796 | -0.0019 |
|
| 56 |
+
| result_ablation_mbert | vsfc | mBERT | attention | 0.9384 | 0.9377 | 0.0007 |
|
| 57 |
+
| result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9384 | 0.9364 | 0.0020 |
|
| 58 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9384 | 0.9371 | 0.0013 |
|
| 59 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9384 | 0.9364 | 0.0020 |
|
| 60 |
|
|
|
|
| 129 |
| result_ablation_mbert | sst2 | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.8853 | 0.4113 | 0.2334 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json |
|
| 130 |
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.8853 | 0.4264 | 0.2324 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json |
|
| 131 |
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.8784 | 0.4188 | 0.2312 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json |
|
| 132 |
+
| result_ablation_mbert | sst2 | mBERT | multi_branch_average | 44.0000 | eval_accuracy | 0.8647 | 0.4120 | 0.2321 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_44/all_results.json |
|
| 133 |
+
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.8670 | 0.4253 | 0.2346 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_42/all_results.json |
|
| 134 |
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.8784 | 0.4385 | 0.2329 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json |
|
| 135 |
| result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.8876 | 0.4107 | 0.2325 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json |
|
| 136 |
| result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 42.0000 | eval_accuracy | 0.9394 | 0.2236 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json |
|
|
|
|
| 139 |
| result_ablation_mbert | vsfc | mBERT | cls | 42.0000 | eval_accuracy | 0.9413 | 0.2187 | 0.2039 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json |
|
| 140 |
| result_ablation_mbert | vsfc | mBERT | cls | 43.0000 | eval_accuracy | 0.9337 | 0.2425 | 0.2038 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json |
|
| 141 |
| result_ablation_mbert | vsfc | mBERT | cls | 44.0000 | eval_accuracy | 0.9368 | 0.2246 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json |
|
| 142 |
+
| result_ablation_mbert | vsfc | mBERT | mean | 42.0000 | eval_accuracy | 0.9375 | 0.2157 | 0.2002 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_42/all_results.json |
|
| 143 |
+
| result_ablation_mbert | vsfc | mBERT | mean | 43.0000 | eval_accuracy | 0.9349 | 0.2330 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_43/all_results.json |
|
| 144 |
| result_ablation_mbert | vsfc | mBERT | mean | 44.0000 | eval_accuracy | 0.9362 | 0.2267 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json |
|
| 145 |
+
| result_ablation_mbert | vsfc | mBERT | max | 42.0000 | eval_accuracy | 0.9425 | 0.2218 | 0.2088 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_42/all_results.json |
|
| 146 |
| result_ablation_mbert | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9318 | 0.2397 | 0.2094 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json |
|
| 147 |
| result_ablation_mbert | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9330 | 0.2280 | 0.2075 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json |
|
| 148 |
+
| result_ablation_mbert | vsfc | mBERT | attention | 42.0000 | eval_accuracy | 0.9431 | 0.2203 | 0.2009 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_42/all_results.json |
|
| 149 |
+
| result_ablation_mbert | vsfc | mBERT | attention | 43.0000 | eval_accuracy | 0.9368 | 0.2279 | 0.2013 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_43/all_results.json |
|
| 150 |
| result_ablation_mbert | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9330 | 0.2232 | 0.2006 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json |
|
| 151 |
+
| result_ablation_mbert | vsfc | mBERT | mha_attention | 42.0000 | eval_accuracy | 0.9368 | 0.2160 | 0.2058 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_42/all_results.json |
|
| 152 |
| result_ablation_mbert | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9387 | 0.2243 | 0.2072 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json |
|
| 153 |
+
| result_ablation_mbert | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9337 | 0.2265 | 0.2110 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json |
|
| 154 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
|
| 155 |
| result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
|
| 156 |
| result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
|