PuxAI commited on
Commit
aedd10e
·
verified ·
1 Parent(s): 457a489

Upload ablation summaries

Browse files
mbert_rtx6000_metrics/docs/ablation_results.csv CHANGED
@@ -67,6 +67,8 @@ result_ablation_mbert,sst2,mBERT,mha_attention,43,eval_accuracy,0.88188073394495
67
  result_ablation_mbert,sst2,mBERT,mha_attention,44,eval_accuracy,0.8853211009174312,0.4112522304058075,0.2333670342296843,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json
68
  result_ablation_mbert,sst2,mBERT,multi_branch_average,42,eval_accuracy,0.8853211009174312,0.4263913333415985,0.2324217217996476,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json
69
  result_ablation_mbert,sst2,mBERT,multi_branch_average,43,eval_accuracy,0.8784403669724771,0.41880160570144653,0.23121720937355744,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json
 
 
70
  result_ablation_mbert,sst2,mBERT,gated_multi_branch,43,eval_accuracy,0.8784403669724771,0.43847355246543884,0.23293366598041423,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json
71
  result_ablation_mbert,sst2,mBERT,gated_multi_branch,44,eval_accuracy,0.8876146788990825,0.41072866320610046,0.2325162212280353,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json
72
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,42,eval_accuracy,0.9393556538218573,0.22358696162700653,0.20489713470639875,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json
@@ -75,11 +77,18 @@ result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,44,eval_accuracy,0.93240
75
  result_ablation_mbert,vsfc,mBERT,cls,42,eval_accuracy,0.9412507896399241,0.21872149407863617,0.2038555185166363,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json
76
  result_ablation_mbert,vsfc,mBERT,cls,43,eval_accuracy,0.9336702463676564,0.24253493547439575,0.2038297117837137,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json
77
  result_ablation_mbert,vsfc,mBERT,cls,44,eval_accuracy,0.9368288060644346,0.22460007667541504,0.20489413286020233,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json
 
 
78
  result_ablation_mbert,vsfc,mBERT,mean,44,eval_accuracy,0.936197094125079,0.22665680944919586,0.20191558268612234,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json
 
79
  result_ablation_mbert,vsfc,mBERT,max,43,eval_accuracy,0.9317751105495894,0.23970364034175873,0.20938005839764157,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json
80
  result_ablation_mbert,vsfc,mBERT,max,44,eval_accuracy,0.9330385344283006,0.2280229926109314,0.20747514532533484,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json
 
 
81
  result_ablation_mbert,vsfc,mBERT,attention,44,eval_accuracy,0.9330385344283006,0.2232467234134674,0.2006188094034022,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json
 
82
  result_ablation_mbert,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9387239418825016,0.2242736667394638,0.20716787132757977,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json
 
83
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
84
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
85
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
 
67
  result_ablation_mbert,sst2,mBERT,mha_attention,44,eval_accuracy,0.8853211009174312,0.4112522304058075,0.2333670342296843,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json
68
  result_ablation_mbert,sst2,mBERT,multi_branch_average,42,eval_accuracy,0.8853211009174312,0.4263913333415985,0.2324217217996476,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json
69
  result_ablation_mbert,sst2,mBERT,multi_branch_average,43,eval_accuracy,0.8784403669724771,0.41880160570144653,0.23121720937355744,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json
70
+ result_ablation_mbert,sst2,mBERT,multi_branch_average,44,eval_accuracy,0.8646788990825688,0.4119800925254822,0.23205033346571927,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_44/all_results.json
71
+ result_ablation_mbert,sst2,mBERT,gated_multi_branch,42,eval_accuracy,0.8669724770642202,0.42526304721832275,0.23464352019831666,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_42/all_results.json
72
  result_ablation_mbert,sst2,mBERT,gated_multi_branch,43,eval_accuracy,0.8784403669724771,0.43847355246543884,0.23293366598041423,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json
73
  result_ablation_mbert,sst2,mBERT,gated_multi_branch,44,eval_accuracy,0.8876146788990825,0.41072866320610046,0.2325162212280353,3.0,872,/workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json
74
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,42,eval_accuracy,0.9393556538218573,0.22358696162700653,0.20489713470639875,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json
 
77
  result_ablation_mbert,vsfc,mBERT,cls,42,eval_accuracy,0.9412507896399241,0.21872149407863617,0.2038555185166363,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json
78
  result_ablation_mbert,vsfc,mBERT,cls,43,eval_accuracy,0.9336702463676564,0.24253493547439575,0.2038297117837137,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json
79
  result_ablation_mbert,vsfc,mBERT,cls,44,eval_accuracy,0.9368288060644346,0.22460007667541504,0.20489413286020233,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json
80
+ result_ablation_mbert,vsfc,mBERT,mean,42,eval_accuracy,0.9374605180037903,0.21570518612861633,0.2002154952811397,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_42/all_results.json
81
+ result_ablation_mbert,vsfc,mBERT,mean,43,eval_accuracy,0.9349336702463676,0.23304706811904907,0.20186518625235458,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_43/all_results.json
82
  result_ablation_mbert,vsfc,mBERT,mean,44,eval_accuracy,0.936197094125079,0.22665680944919586,0.20191558268612234,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json
83
+ result_ablation_mbert,vsfc,mBERT,max,42,eval_accuracy,0.9425142135186355,0.2218012511730194,0.20883273712594472,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_42/all_results.json
84
  result_ablation_mbert,vsfc,mBERT,max,43,eval_accuracy,0.9317751105495894,0.23970364034175873,0.20938005839764157,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json
85
  result_ablation_mbert,vsfc,mBERT,max,44,eval_accuracy,0.9330385344283006,0.2280229926109314,0.20747514532533484,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json
86
+ result_ablation_mbert,vsfc,mBERT,attention,42,eval_accuracy,0.9431459254579911,0.22031328082084656,0.2008532669753709,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_42/all_results.json
87
+ result_ablation_mbert,vsfc,mBERT,attention,43,eval_accuracy,0.9368288060644346,0.2278706282377243,0.20126758842122439,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_43/all_results.json
88
  result_ablation_mbert,vsfc,mBERT,attention,44,eval_accuracy,0.9330385344283006,0.2232467234134674,0.2006188094034022,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json
89
+ result_ablation_mbert,vsfc,mBERT,mha_attention,42,eval_accuracy,0.9368288060644346,0.21603453159332275,0.2058336245963763,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_42/all_results.json
90
  result_ablation_mbert,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9387239418825016,0.2242736667394638,0.20716787132757977,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json
91
+ result_ablation_mbert,vsfc,mBERT,mha_attention,44,eval_accuracy,0.9336702463676564,0.2265038937330246,0.21097227918553052,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json
92
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
93
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
94
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
mbert_rtx6000_metrics/docs/ablation_results_aggregate.csv CHANGED
@@ -17,17 +17,17 @@ result_ablation_mbert,mrpc,mBERT,mha_attention,eval_combined_score,3,0.869748361
17
  result_ablation_mbert,mrpc,mBERT,multi_branch_average,eval_combined_score,3,0.8723060123547228,0.005738234218203849,0.8670419052576783,0.878422920892495
18
  result_ablation_mbert,sst2,mBERT,attention,eval_accuracy,3,0.8765290519877675,0.010019374940428994,0.8681192660550459,0.8876146788990825
19
  result_ablation_mbert,sst2,mBERT,cls,eval_accuracy,3,0.8772935779816513,0.005255247356600745,0.8727064220183486,0.8830275229357798
20
- result_ablation_mbert,sst2,mBERT,gated_multi_branch,eval_accuracy,2,0.8830275229357798,0.0064872181760233325,0.8784403669724771,0.8876146788990825
21
  result_ablation_mbert,sst2,mBERT,hf_sequence_classifier,eval_accuracy,3,0.8795871559633027,0.009102355427974529,0.8692660550458715,0.8864678899082569
22
  result_ablation_mbert,sst2,mBERT,max,eval_accuracy,3,0.8761467889908257,0.00397259359534147,0.8715596330275229,0.8784403669724771
23
  result_ablation_mbert,sst2,mBERT,mean,eval_accuracy,3,0.8730886850152905,0.004027390578307669,0.8692660550458715,0.8772935779816514
24
  result_ablation_mbert,sst2,mBERT,mha_attention,eval_accuracy,3,0.8799694189602446,0.006520918237474033,0.8727064220183486,0.8853211009174312
25
- result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,2,0.8818807339449541,0.004865413632017539,0.8784403669724771,0.8853211009174312
26
- result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,1,0.9330385344283006,0.0,0.9330385344283006,0.9330385344283006
27
  result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
28
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,2,0.9384080859128238,0.0004466877960749482,0.9380922299431459,0.9387239418825016
29
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
30
- result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,2,0.932406822488945,0.0008933755921497394,0.9317751105495894,0.9330385344283006
31
- result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,1,0.936197094125079,0.0,0.936197094125079,0.936197094125079
32
- result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,1,0.9387239418825016,0.0,0.9387239418825016,0.9387239418825016
33
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9371446620341124,0.005806941348973541,0.9330385344283006,0.9412507896399241
 
17
  result_ablation_mbert,mrpc,mBERT,multi_branch_average,eval_combined_score,3,0.8723060123547228,0.005738234218203849,0.8670419052576783,0.878422920892495
18
  result_ablation_mbert,sst2,mBERT,attention,eval_accuracy,3,0.8765290519877675,0.010019374940428994,0.8681192660550459,0.8876146788990825
19
  result_ablation_mbert,sst2,mBERT,cls,eval_accuracy,3,0.8772935779816513,0.005255247356600745,0.8727064220183486,0.8830275229357798
20
+ result_ablation_mbert,sst2,mBERT,gated_multi_branch,eval_accuracy,3,0.8776758409785933,0.010342315946841468,0.8669724770642202,0.8876146788990825
21
  result_ablation_mbert,sst2,mBERT,hf_sequence_classifier,eval_accuracy,3,0.8795871559633027,0.009102355427974529,0.8692660550458715,0.8864678899082569
22
  result_ablation_mbert,sst2,mBERT,max,eval_accuracy,3,0.8761467889908257,0.00397259359534147,0.8715596330275229,0.8784403669724771
23
  result_ablation_mbert,sst2,mBERT,mean,eval_accuracy,3,0.8730886850152905,0.004027390578307669,0.8692660550458715,0.8772935779816514
24
  result_ablation_mbert,sst2,mBERT,mha_attention,eval_accuracy,3,0.8799694189602446,0.006520918237474033,0.8727064220183486,0.8853211009174312
25
+ result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8761467889908257,0.01051049471320149,0.8646788990825688,0.8853211009174312
26
+ result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,3,0.9376710886502422,0.005106066815322034,0.9330385344283006,0.9431459254579911
27
  result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
28
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,2,0.9384080859128238,0.0004466877960749482,0.9380922299431459,0.9387239418825016
29
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
30
+ result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,3,0.9357759528321752,0.00586959775311282,0.9317751105495894,0.9425142135186355
31
+ result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,3,0.936197094125079,0.0012634238787113117,0.9349336702463676,0.9374605180037903
32
+ result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9364076647715309,0.0025530334076610166,0.9336702463676564,0.9387239418825016
33
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9371446620341124,0.005806941348973541,0.9330385344283006,0.9412507896399241
mbert_rtx6000_metrics/docs/ablation_summary.md CHANGED
@@ -23,19 +23,19 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
23
  | result_ablation_mbert | mrpc | mBERT | multi_branch_average | eval_combined_score | 3 | 0.8723 | 0.0057 | 0.8670 | 0.8784 |
24
  | result_ablation_mbert | sst2 | mBERT | attention | eval_accuracy | 3 | 0.8765 | 0.0100 | 0.8681 | 0.8876 |
25
  | result_ablation_mbert | sst2 | mBERT | cls | eval_accuracy | 3 | 0.8773 | 0.0053 | 0.8727 | 0.8830 |
26
- | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.8830 | 0.0065 | 0.8784 | 0.8876 |
27
  | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.8796 | 0.0091 | 0.8693 | 0.8865 |
28
  | result_ablation_mbert | sst2 | mBERT | max | eval_accuracy | 3 | 0.8761 | 0.0040 | 0.8716 | 0.8784 |
29
  | result_ablation_mbert | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8731 | 0.0040 | 0.8693 | 0.8773 |
30
  | result_ablation_mbert | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8800 | 0.0065 | 0.8727 | 0.8853 |
31
- | result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy | 2 | 0.8819 | 0.0049 | 0.8784 | 0.8853 |
32
- | result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy | 1 | 0.9330 | 0.0000 | 0.9330 | 0.9330 |
33
  | result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
34
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.9384 | 0.0004 | 0.9381 | 0.9387 |
35
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
36
- | result_ablation_mbert | vsfc | mBERT | max | eval_accuracy | 2 | 0.9324 | 0.0009 | 0.9318 | 0.9330 |
37
- | result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy | 1 | 0.9362 | 0.0000 | 0.9362 | 0.9362 |
38
- | result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy | 1 | 0.9387 | 0.0000 | 0.9387 | 0.9387 |
39
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9371 | 0.0058 | 0.9330 | 0.9413 |
40
 
41
  ## Gated Multi-Branch Deltas
@@ -49,12 +49,12 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
49
  | result_ablation_mbert | mrpc | mBERT | mha_attention | 0.8651 | 0.8697 | -0.0046 |
50
  | result_ablation_mbert | mrpc | mBERT | multi_branch_average | 0.8651 | 0.8723 | -0.0072 |
51
  | result_ablation_mbert | mrpc | mBERT | hf_sequence_classifier | 0.8651 | 0.8521 | 0.0130 |
52
- | result_ablation_mbert | sst2 | mBERT | attention | 0.8830 | 0.8765 | 0.0065 |
53
- | result_ablation_mbert | sst2 | mBERT | mha_attention | 0.8830 | 0.8800 | 0.0031 |
54
- | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.8830 | 0.8819 | 0.0011 |
55
- | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.8830 | 0.8796 | 0.0034 |
56
- | result_ablation_mbert | vsfc | mBERT | attention | 0.9384 | 0.9330 | 0.0054 |
57
- | result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9384 | 0.9387 | -0.0003 |
58
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9384 | 0.9371 | 0.0013 |
59
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9384 | 0.9364 | 0.0020 |
60
 
@@ -129,6 +129,8 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
129
  | result_ablation_mbert | sst2 | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.8853 | 0.4113 | 0.2334 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json |
130
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.8853 | 0.4264 | 0.2324 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json |
131
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.8784 | 0.4188 | 0.2312 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json |
 
 
132
  | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.8784 | 0.4385 | 0.2329 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json |
133
  | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.8876 | 0.4107 | 0.2325 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json |
134
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 42.0000 | eval_accuracy | 0.9394 | 0.2236 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json |
@@ -137,11 +139,18 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
137
  | result_ablation_mbert | vsfc | mBERT | cls | 42.0000 | eval_accuracy | 0.9413 | 0.2187 | 0.2039 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json |
138
  | result_ablation_mbert | vsfc | mBERT | cls | 43.0000 | eval_accuracy | 0.9337 | 0.2425 | 0.2038 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json |
139
  | result_ablation_mbert | vsfc | mBERT | cls | 44.0000 | eval_accuracy | 0.9368 | 0.2246 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json |
 
 
140
  | result_ablation_mbert | vsfc | mBERT | mean | 44.0000 | eval_accuracy | 0.9362 | 0.2267 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json |
 
141
  | result_ablation_mbert | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9318 | 0.2397 | 0.2094 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json |
142
  | result_ablation_mbert | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9330 | 0.2280 | 0.2075 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json |
 
 
143
  | result_ablation_mbert | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9330 | 0.2232 | 0.2006 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json |
 
144
  | result_ablation_mbert | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9387 | 0.2243 | 0.2072 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json |
 
145
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
146
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
147
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
 
23
  | result_ablation_mbert | mrpc | mBERT | multi_branch_average | eval_combined_score | 3 | 0.8723 | 0.0057 | 0.8670 | 0.8784 |
24
  | result_ablation_mbert | sst2 | mBERT | attention | eval_accuracy | 3 | 0.8765 | 0.0100 | 0.8681 | 0.8876 |
25
  | result_ablation_mbert | sst2 | mBERT | cls | eval_accuracy | 3 | 0.8773 | 0.0053 | 0.8727 | 0.8830 |
26
+ | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | eval_accuracy | 3 | 0.8777 | 0.0103 | 0.8670 | 0.8876 |
27
  | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.8796 | 0.0091 | 0.8693 | 0.8865 |
28
  | result_ablation_mbert | sst2 | mBERT | max | eval_accuracy | 3 | 0.8761 | 0.0040 | 0.8716 | 0.8784 |
29
  | result_ablation_mbert | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8731 | 0.0040 | 0.8693 | 0.8773 |
30
  | result_ablation_mbert | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8800 | 0.0065 | 0.8727 | 0.8853 |
31
+ | result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8761 | 0.0105 | 0.8647 | 0.8853 |
32
+ | result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy | 3 | 0.9377 | 0.0051 | 0.9330 | 0.9431 |
33
  | result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
34
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.9384 | 0.0004 | 0.9381 | 0.9387 |
35
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
36
+ | result_ablation_mbert | vsfc | mBERT | max | eval_accuracy | 3 | 0.9358 | 0.0059 | 0.9318 | 0.9425 |
37
+ | result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9362 | 0.0013 | 0.9349 | 0.9375 |
38
+ | result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9364 | 0.0026 | 0.9337 | 0.9387 |
39
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9371 | 0.0058 | 0.9330 | 0.9413 |
40
 
41
  ## Gated Multi-Branch Deltas
 
49
  | result_ablation_mbert | mrpc | mBERT | mha_attention | 0.8651 | 0.8697 | -0.0046 |
50
  | result_ablation_mbert | mrpc | mBERT | multi_branch_average | 0.8651 | 0.8723 | -0.0072 |
51
  | result_ablation_mbert | mrpc | mBERT | hf_sequence_classifier | 0.8651 | 0.8521 | 0.0130 |
52
+ | result_ablation_mbert | sst2 | mBERT | attention | 0.8777 | 0.8765 | 0.0011 |
53
+ | result_ablation_mbert | sst2 | mBERT | mha_attention | 0.8777 | 0.8800 | -0.0023 |
54
+ | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.8777 | 0.8761 | 0.0015 |
55
+ | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.8777 | 0.8796 | -0.0019 |
56
+ | result_ablation_mbert | vsfc | mBERT | attention | 0.9384 | 0.9377 | 0.0007 |
57
+ | result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9384 | 0.9364 | 0.0020 |
58
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9384 | 0.9371 | 0.0013 |
59
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9384 | 0.9364 | 0.0020 |
60
 
 
129
  | result_ablation_mbert | sst2 | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.8853 | 0.4113 | 0.2334 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/mha_attention/seed_44/all_results.json |
130
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.8853 | 0.4264 | 0.2324 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_42/all_results.json |
131
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.8784 | 0.4188 | 0.2312 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_43/all_results.json |
132
+ | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 44.0000 | eval_accuracy | 0.8647 | 0.4120 | 0.2321 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/multi_branch_average/seed_44/all_results.json |
133
+ | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.8670 | 0.4253 | 0.2346 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_42/all_results.json |
134
  | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.8784 | 0.4385 | 0.2329 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_43/all_results.json |
135
  | result_ablation_mbert | sst2 | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.8876 | 0.4107 | 0.2325 | 3.0000 | 872 | /workspace/result_ablation_mbert/sst2/mBERT/gated_multi_branch/seed_44/all_results.json |
136
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 42.0000 | eval_accuracy | 0.9394 | 0.2236 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/hf_sequence_classifier/seed_42/all_results.json |
 
139
  | result_ablation_mbert | vsfc | mBERT | cls | 42.0000 | eval_accuracy | 0.9413 | 0.2187 | 0.2039 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_42/all_results.json |
140
  | result_ablation_mbert | vsfc | mBERT | cls | 43.0000 | eval_accuracy | 0.9337 | 0.2425 | 0.2038 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_43/all_results.json |
141
  | result_ablation_mbert | vsfc | mBERT | cls | 44.0000 | eval_accuracy | 0.9368 | 0.2246 | 0.2049 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/cls/seed_44/all_results.json |
142
+ | result_ablation_mbert | vsfc | mBERT | mean | 42.0000 | eval_accuracy | 0.9375 | 0.2157 | 0.2002 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_42/all_results.json |
143
+ | result_ablation_mbert | vsfc | mBERT | mean | 43.0000 | eval_accuracy | 0.9349 | 0.2330 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_43/all_results.json |
144
  | result_ablation_mbert | vsfc | mBERT | mean | 44.0000 | eval_accuracy | 0.9362 | 0.2267 | 0.2019 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mean/seed_44/all_results.json |
145
+ | result_ablation_mbert | vsfc | mBERT | max | 42.0000 | eval_accuracy | 0.9425 | 0.2218 | 0.2088 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_42/all_results.json |
146
  | result_ablation_mbert | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9318 | 0.2397 | 0.2094 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_43/all_results.json |
147
  | result_ablation_mbert | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9330 | 0.2280 | 0.2075 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/max/seed_44/all_results.json |
148
+ | result_ablation_mbert | vsfc | mBERT | attention | 42.0000 | eval_accuracy | 0.9431 | 0.2203 | 0.2009 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_42/all_results.json |
149
+ | result_ablation_mbert | vsfc | mBERT | attention | 43.0000 | eval_accuracy | 0.9368 | 0.2279 | 0.2013 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_43/all_results.json |
150
  | result_ablation_mbert | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9330 | 0.2232 | 0.2006 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/attention/seed_44/all_results.json |
151
+ | result_ablation_mbert | vsfc | mBERT | mha_attention | 42.0000 | eval_accuracy | 0.9368 | 0.2160 | 0.2058 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_42/all_results.json |
152
  | result_ablation_mbert | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9387 | 0.2243 | 0.2072 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_43/all_results.json |
153
+ | result_ablation_mbert | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9337 | 0.2265 | 0.2110 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json |
154
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
155
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
156
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |