Upload ablation summaries
Browse files
mbert_paper_metrics/docs/ablation_results.csv
CHANGED
|
@@ -83,11 +83,14 @@ result_ablation_mbert_paper,vsfc,mBERT,mean,44,eval_accuracy,0.932406822488945,0
|
|
| 83 |
result_ablation_mbert_paper,vsfc,mBERT,max,42,eval_accuracy,0.9336702463676564,0.2267763763666153,0.23415064422678558,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_42/all_results.json
|
| 84 |
result_ablation_mbert_paper,vsfc,mBERT,max,43,eval_accuracy,0.9349336702463676,0.22572965919971466,0.23497871141055804,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_43/all_results.json
|
| 85 |
result_ablation_mbert_paper,vsfc,mBERT,max,44,eval_accuracy,0.9336702463676564,0.2205778807401657,0.23737809152314157,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_44/all_results.json
|
|
|
|
|
|
|
| 86 |
result_ablation_mbert_paper,vsfc,mBERT,attention,44,eval_accuracy,0.9368288060644346,0.20899568498134613,0.22297021336766668,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_44/all_results.json
|
| 87 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,42,eval_accuracy,0.9330385344283006,0.21529895067214966,0.22224652350365698,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_42/all_results.json
|
| 88 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9330385344283006,0.21815043687820435,0.22466682554124953,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_43/all_results.json
|
| 89 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,44,eval_accuracy,0.934301958307012,0.21530689299106598,0.22445858664168067,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_44/all_results.json
|
| 90 |
result_ablation_mbert_paper,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9355653821857233,0.2141973078250885,0.21831525011218234,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
|
|
|
|
| 91 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9374605180037903,0.22037982940673828,0.2221188189544322,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
|
| 92 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,43,eval_accuracy,0.932406822488945,0.2144102156162262,0.22194261373061955,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json
|
| 93 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,44,eval_accuracy,0.9279848389134555,0.21946457028388977,0.2245002004094335,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json
|
|
|
|
| 83 |
result_ablation_mbert_paper,vsfc,mBERT,max,42,eval_accuracy,0.9336702463676564,0.2267763763666153,0.23415064422678558,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_42/all_results.json
|
| 84 |
result_ablation_mbert_paper,vsfc,mBERT,max,43,eval_accuracy,0.9349336702463676,0.22572965919971466,0.23497871141055804,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_43/all_results.json
|
| 85 |
result_ablation_mbert_paper,vsfc,mBERT,max,44,eval_accuracy,0.9336702463676564,0.2205778807401657,0.23737809152314157,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_44/all_results.json
|
| 86 |
+
result_ablation_mbert_paper,vsfc,mBERT,attention,42,eval_accuracy,0.930511686670878,0.2223110944032669,0.22780775857138466,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_42/all_results.json
|
| 87 |
+
result_ablation_mbert_paper,vsfc,mBERT,attention,43,eval_accuracy,0.9336702463676564,0.21957743167877197,0.22140424123732916,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_43/all_results.json
|
| 88 |
result_ablation_mbert_paper,vsfc,mBERT,attention,44,eval_accuracy,0.9368288060644346,0.20899568498134613,0.22297021336766668,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_44/all_results.json
|
| 89 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,42,eval_accuracy,0.9330385344283006,0.21529895067214966,0.22224652350365698,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_42/all_results.json
|
| 90 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,43,eval_accuracy,0.9330385344283006,0.21815043687820435,0.22466682554124953,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_43/all_results.json
|
| 91 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,44,eval_accuracy,0.934301958307012,0.21530689299106598,0.22445858664168067,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_44/all_results.json
|
| 92 |
result_ablation_mbert_paper,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9355653821857233,0.2141973078250885,0.21831525011218234,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
|
| 93 |
+
result_ablation_mbert_paper,vsfc,mBERT,multi_branch_average,44,eval_accuracy,0.9317751105495894,0.21137550473213196,0.2217927779351081,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_44/all_results.json
|
| 94 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9374605180037903,0.22037982940673828,0.2221188189544322,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
|
| 95 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,43,eval_accuracy,0.932406822488945,0.2144102156162262,0.22194261373061955,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json
|
| 96 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,44,eval_accuracy,0.9279848389134555,0.21946457028388977,0.2245002004094335,3.0,1583,/workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json
|
mbert_paper_metrics/docs/ablation_results_aggregate.csv
CHANGED
|
@@ -23,11 +23,11 @@ result_ablation_mbert_paper,sst2,mBERT,max,eval_accuracy,3,0.8746177370030581,0.
|
|
| 23 |
result_ablation_mbert_paper,sst2,mBERT,mean,eval_accuracy,3,0.8807339449541284,0.00865806701292519,0.8727064220183486,0.8899082568807339
|
| 24 |
result_ablation_mbert_paper,sst2,mBERT,mha_attention,eval_accuracy,3,0.8761467889908258,0.001986296797670735,0.8738532110091743,0.8772935779816514
|
| 25 |
result_ablation_mbert_paper,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8814984709480123,0.01288969059639708,0.8704128440366973,0.8956422018348624
|
| 26 |
-
result_ablation_mbert_paper,vsfc,mBERT,attention,eval_accuracy,
|
| 27 |
result_ablation_mbert_paper,vsfc,mBERT,cls,eval_accuracy,3,0.9347230995999157,0.0025530334076610166,0.932406822488945,0.9374605180037903
|
| 28 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,eval_accuracy,3,0.9326173931353969,0.004741347757084709,0.9279848389134555,0.9374605180037903
|
| 29 |
result_ablation_mbert_paper,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9317751105495894,0.0006317119393556281,0.9311433986102338,0.932406822488945
|
| 30 |
result_ablation_mbert_paper,vsfc,mBERT,max,eval_accuracy,3,0.9340913876605601,0.0007294381164745449,0.9336702463676564,0.9349336702463676
|
| 31 |
result_ablation_mbert_paper,vsfc,mBERT,mean,eval_accuracy,3,0.9334596757212045,0.0013150132656134848,0.932406822488945,0.9349336702463676
|
| 32 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9334596757212045,0.0007294381164746089,0.9330385344283006,0.934301958307012
|
| 33 |
-
result_ablation_mbert_paper,vsfc,mBERT,multi_branch_average,eval_accuracy,
|
|
|
|
| 23 |
result_ablation_mbert_paper,sst2,mBERT,mean,eval_accuracy,3,0.8807339449541284,0.00865806701292519,0.8727064220183486,0.8899082568807339
|
| 24 |
result_ablation_mbert_paper,sst2,mBERT,mha_attention,eval_accuracy,3,0.8761467889908258,0.001986296797670735,0.8738532110091743,0.8772935779816514
|
| 25 |
result_ablation_mbert_paper,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8814984709480123,0.01288969059639708,0.8704128440366973,0.8956422018348624
|
| 26 |
+
result_ablation_mbert_paper,vsfc,mBERT,attention,eval_accuracy,3,0.9336702463676564,0.003158559696778307,0.930511686670878,0.9368288060644346
|
| 27 |
result_ablation_mbert_paper,vsfc,mBERT,cls,eval_accuracy,3,0.9347230995999157,0.0025530334076610166,0.932406822488945,0.9374605180037903
|
| 28 |
result_ablation_mbert_paper,vsfc,mBERT,gated_multi_branch,eval_accuracy,3,0.9326173931353969,0.004741347757084709,0.9279848389134555,0.9374605180037903
|
| 29 |
result_ablation_mbert_paper,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9317751105495894,0.0006317119393556281,0.9311433986102338,0.932406822488945
|
| 30 |
result_ablation_mbert_paper,vsfc,mBERT,max,eval_accuracy,3,0.9340913876605601,0.0007294381164745449,0.9336702463676564,0.9349336702463676
|
| 31 |
result_ablation_mbert_paper,vsfc,mBERT,mean,eval_accuracy,3,0.9334596757212045,0.0013150132656134848,0.932406822488945,0.9349336702463676
|
| 32 |
result_ablation_mbert_paper,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9334596757212045,0.0007294381164746089,0.9330385344283006,0.934301958307012
|
| 33 |
+
result_ablation_mbert_paper,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9336702463676563,0.0026801267764492965,0.9317751105495894,0.9355653821857233
|
mbert_paper_metrics/docs/ablation_summary.md
CHANGED
|
@@ -29,14 +29,14 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 29 |
| result_ablation_mbert_paper | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8807 | 0.0087 | 0.8727 | 0.8899 |
|
| 30 |
| result_ablation_mbert_paper | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8761 | 0.0020 | 0.8739 | 0.8773 |
|
| 31 |
| result_ablation_mbert_paper | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8815 | 0.0129 | 0.8704 | 0.8956 |
|
| 32 |
-
| result_ablation_mbert_paper | vsfc | mBERT | attention | eval_accuracy |
|
| 33 |
| result_ablation_mbert_paper | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9347 | 0.0026 | 0.9324 | 0.9375 |
|
| 34 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | eval_accuracy | 3 | 0.9326 | 0.0047 | 0.9280 | 0.9375 |
|
| 35 |
| result_ablation_mbert_paper | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9318 | 0.0006 | 0.9311 | 0.9324 |
|
| 36 |
| result_ablation_mbert_paper | vsfc | mBERT | max | eval_accuracy | 3 | 0.9341 | 0.0007 | 0.9337 | 0.9349 |
|
| 37 |
| result_ablation_mbert_paper | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9335 | 0.0013 | 0.9324 | 0.9349 |
|
| 38 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9335 | 0.0007 | 0.9330 | 0.9343 |
|
| 39 |
-
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | eval_accuracy |
|
| 40 |
|
| 41 |
## Gated Multi-Branch Deltas
|
| 42 |
| source | task | model | baseline | gated_mean | baseline_mean | delta |
|
|
@@ -53,9 +53,9 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 53 |
| result_ablation_mbert_paper | sst2 | mBERT | mha_attention | 0.8758 | 0.8761 | -0.0004 |
|
| 54 |
| result_ablation_mbert_paper | sst2 | mBERT | multi_branch_average | 0.8758 | 0.8815 | -0.0057 |
|
| 55 |
| result_ablation_mbert_paper | sst2 | mBERT | hf_sequence_classifier | 0.8758 | 0.8788 | -0.0031 |
|
| 56 |
-
| result_ablation_mbert_paper | vsfc | mBERT | attention | 0.9326 | 0.
|
| 57 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 0.9326 | 0.9335 | -0.0008 |
|
| 58 |
-
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | 0.9326 | 0.
|
| 59 |
| result_ablation_mbert_paper | vsfc | mBERT | hf_sequence_classifier | 0.9326 | 0.9318 | 0.0008 |
|
| 60 |
|
| 61 |
## Raw Runs
|
|
@@ -145,11 +145,14 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
|
|
| 145 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 42.0000 | eval_accuracy | 0.9337 | 0.2268 | 0.2342 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_42/all_results.json |
|
| 146 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9349 | 0.2257 | 0.2350 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_43/all_results.json |
|
| 147 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9337 | 0.2206 | 0.2374 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_44/all_results.json |
|
|
|
|
|
|
|
| 148 |
| result_ablation_mbert_paper | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9368 | 0.2090 | 0.2230 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_44/all_results.json |
|
| 149 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 42.0000 | eval_accuracy | 0.9330 | 0.2153 | 0.2222 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_42/all_results.json |
|
| 150 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9330 | 0.2182 | 0.2247 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_43/all_results.json |
|
| 151 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9343 | 0.2153 | 0.2245 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_44/all_results.json |
|
| 152 |
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9356 | 0.2142 | 0.2183 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
|
|
|
|
| 153 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9375 | 0.2204 | 0.2221 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
|
| 154 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.9324 | 0.2144 | 0.2219 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json |
|
| 155 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.9280 | 0.2195 | 0.2245 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json |
|
|
|
|
| 29 |
| result_ablation_mbert_paper | sst2 | mBERT | mean | eval_accuracy | 3 | 0.8807 | 0.0087 | 0.8727 | 0.8899 |
|
| 30 |
| result_ablation_mbert_paper | sst2 | mBERT | mha_attention | eval_accuracy | 3 | 0.8761 | 0.0020 | 0.8739 | 0.8773 |
|
| 31 |
| result_ablation_mbert_paper | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8815 | 0.0129 | 0.8704 | 0.8956 |
|
| 32 |
+
| result_ablation_mbert_paper | vsfc | mBERT | attention | eval_accuracy | 3 | 0.9337 | 0.0032 | 0.9305 | 0.9368 |
|
| 33 |
| result_ablation_mbert_paper | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9347 | 0.0026 | 0.9324 | 0.9375 |
|
| 34 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | eval_accuracy | 3 | 0.9326 | 0.0047 | 0.9280 | 0.9375 |
|
| 35 |
| result_ablation_mbert_paper | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9318 | 0.0006 | 0.9311 | 0.9324 |
|
| 36 |
| result_ablation_mbert_paper | vsfc | mBERT | max | eval_accuracy | 3 | 0.9341 | 0.0007 | 0.9337 | 0.9349 |
|
| 37 |
| result_ablation_mbert_paper | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9335 | 0.0013 | 0.9324 | 0.9349 |
|
| 38 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9335 | 0.0007 | 0.9330 | 0.9343 |
|
| 39 |
+
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9337 | 0.0027 | 0.9318 | 0.9356 |
|
| 40 |
|
| 41 |
## Gated Multi-Branch Deltas
|
| 42 |
| source | task | model | baseline | gated_mean | baseline_mean | delta |
|
|
|
|
| 53 |
| result_ablation_mbert_paper | sst2 | mBERT | mha_attention | 0.8758 | 0.8761 | -0.0004 |
|
| 54 |
| result_ablation_mbert_paper | sst2 | mBERT | multi_branch_average | 0.8758 | 0.8815 | -0.0057 |
|
| 55 |
| result_ablation_mbert_paper | sst2 | mBERT | hf_sequence_classifier | 0.8758 | 0.8788 | -0.0031 |
|
| 56 |
+
| result_ablation_mbert_paper | vsfc | mBERT | attention | 0.9326 | 0.9337 | -0.0011 |
|
| 57 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 0.9326 | 0.9335 | -0.0008 |
|
| 58 |
+
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | 0.9326 | 0.9337 | -0.0011 |
|
| 59 |
| result_ablation_mbert_paper | vsfc | mBERT | hf_sequence_classifier | 0.9326 | 0.9318 | 0.0008 |
|
| 60 |
|
| 61 |
## Raw Runs
|
|
|
|
| 145 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 42.0000 | eval_accuracy | 0.9337 | 0.2268 | 0.2342 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_42/all_results.json |
|
| 146 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 43.0000 | eval_accuracy | 0.9349 | 0.2257 | 0.2350 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_43/all_results.json |
|
| 147 |
| result_ablation_mbert_paper | vsfc | mBERT | max | 44.0000 | eval_accuracy | 0.9337 | 0.2206 | 0.2374 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/max/seed_44/all_results.json |
|
| 148 |
+
| result_ablation_mbert_paper | vsfc | mBERT | attention | 42.0000 | eval_accuracy | 0.9305 | 0.2223 | 0.2278 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_42/all_results.json |
|
| 149 |
+
| result_ablation_mbert_paper | vsfc | mBERT | attention | 43.0000 | eval_accuracy | 0.9337 | 0.2196 | 0.2214 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_43/all_results.json |
|
| 150 |
| result_ablation_mbert_paper | vsfc | mBERT | attention | 44.0000 | eval_accuracy | 0.9368 | 0.2090 | 0.2230 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/attention/seed_44/all_results.json |
|
| 151 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 42.0000 | eval_accuracy | 0.9330 | 0.2153 | 0.2222 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_42/all_results.json |
|
| 152 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 43.0000 | eval_accuracy | 0.9330 | 0.2182 | 0.2247 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_43/all_results.json |
|
| 153 |
| result_ablation_mbert_paper | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9343 | 0.2153 | 0.2245 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/mha_attention/seed_44/all_results.json |
|
| 154 |
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9356 | 0.2142 | 0.2183 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
|
| 155 |
+
| result_ablation_mbert_paper | vsfc | mBERT | multi_branch_average | 44.0000 | eval_accuracy | 0.9318 | 0.2114 | 0.2218 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/multi_branch_average/seed_44/all_results.json |
|
| 156 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9375 | 0.2204 | 0.2221 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
|
| 157 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.9324 | 0.2144 | 0.2219 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json |
|
| 158 |
| result_ablation_mbert_paper | vsfc | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.9280 | 0.2195 | 0.2245 | 3.0000 | 1583 | /workspace/result_ablation_mbert_paper/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json |
|