PuxAI commited on
Commit
fabd724
·
verified ·
1 Parent(s): 1112fa4

Upload ablation summaries

Browse files
mbert_rtx6000_metrics/docs/ablation_results.csv CHANGED
@@ -91,5 +91,7 @@ result_ablation_mbert,vsfc,mBERT,mha_attention,43,eval_accuracy,0.93872394188250
91
  result_ablation_mbert,vsfc,mBERT,mha_attention,44,eval_accuracy,0.9336702463676564,0.2265038937330246,0.21097227918553052,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json
92
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
93
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
 
94
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
 
95
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,44,eval_accuracy,0.9387239418825016,0.22616708278656006,0.20627780971500498,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json
 
91
  result_ablation_mbert,vsfc,mBERT,mha_attention,44,eval_accuracy,0.9336702463676564,0.2265038937330246,0.21097227918553052,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json
92
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,42,eval_accuracy,0.9412507896399241,0.21253333985805511,0.20452363604115975,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json
93
  result_ablation_mbert,vsfc,mBERT,multi_branch_average,43,eval_accuracy,0.9330385344283006,0.22735266387462616,0.20476110576752007,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json
94
+ result_ablation_mbert,vsfc,mBERT,multi_branch_average,44,eval_accuracy,0.9374605180037903,0.2184186577796936,0.20509551093501857,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_44/all_results.json
95
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,42,eval_accuracy,0.9380922299431459,0.2133793830871582,0.2020011867605659,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json
96
+ result_ablation_mbert,vsfc,mBERT,gated_multi_branch,43,eval_accuracy,0.9355653821857233,0.22262658178806305,0.20581150786480976,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json
97
  result_ablation_mbert,vsfc,mBERT,gated_multi_branch,44,eval_accuracy,0.9387239418825016,0.22616708278656006,0.20627780971500498,3.0,1583,/workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json
mbert_rtx6000_metrics/docs/ablation_results_aggregate.csv CHANGED
@@ -25,9 +25,9 @@ result_ablation_mbert,sst2,mBERT,mha_attention,eval_accuracy,3,0.879969418960244
25
  result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8761467889908257,0.01051049471320149,0.8646788990825688,0.8853211009174312
26
  result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,3,0.9376710886502422,0.005106066815322034,0.9330385344283006,0.9431459254579911
27
  result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
28
- result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,2,0.9384080859128238,0.0004466877960749482,0.9380922299431459,0.9387239418825016
29
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
30
  result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,3,0.9357759528321752,0.00586959775311282,0.9317751105495894,0.9425142135186355
31
  result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,3,0.936197094125079,0.0012634238787113117,0.9349336702463676,0.9374605180037903
32
  result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9364076647715309,0.0025530334076610166,0.9336702463676564,0.9387239418825016
33
- result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,2,0.9371446620341124,0.005806941348973541,0.9330385344283006,0.9412507896399241
 
25
  result_ablation_mbert,sst2,mBERT,multi_branch_average,eval_accuracy,3,0.8761467889908257,0.01051049471320149,0.8646788990825688,0.8853211009174312
26
  result_ablation_mbert,vsfc,mBERT,attention,eval_accuracy,3,0.9376710886502422,0.005106066815322034,0.9330385344283006,0.9431459254579911
27
  result_ablation_mbert,vsfc,mBERT,cls,eval_accuracy,3,0.9372499473573384,0.0038077787576384315,0.9336702463676564,0.9412507896399241
28
+ result_ablation_mbert,vsfc,mBERT,gated_multi_branch,eval_accuracy,3,0.9374605180037903,0.001671352691765413,0.9355653821857233,0.9387239418825016
29
  result_ablation_mbert,vsfc,mBERT,hf_sequence_classifier,eval_accuracy,3,0.9364076647715308,0.0035920661421840576,0.932406822488945,0.9393556538218573
30
  result_ablation_mbert,vsfc,mBERT,max,eval_accuracy,3,0.9357759528321752,0.00586959775311282,0.9317751105495894,0.9425142135186355
31
  result_ablation_mbert,vsfc,mBERT,mean,eval_accuracy,3,0.936197094125079,0.0012634238787113117,0.9349336702463676,0.9374605180037903
32
  result_ablation_mbert,vsfc,mBERT,mha_attention,eval_accuracy,3,0.9364076647715309,0.0025530334076610166,0.9336702463676564,0.9387239418825016
33
+ result_ablation_mbert,vsfc,mBERT,multi_branch_average,eval_accuracy,3,0.9372499473573384,0.0041101750465241385,0.9330385344283006,0.9412507896399241
mbert_rtx6000_metrics/docs/ablation_summary.md CHANGED
@@ -31,12 +31,12 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
31
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8761 | 0.0105 | 0.8647 | 0.8853 |
32
  | result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy | 3 | 0.9377 | 0.0051 | 0.9330 | 0.9431 |
33
  | result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
34
- | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 2 | 0.9384 | 0.0004 | 0.9381 | 0.9387 |
35
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
36
  | result_ablation_mbert | vsfc | mBERT | max | eval_accuracy | 3 | 0.9358 | 0.0059 | 0.9318 | 0.9425 |
37
  | result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9362 | 0.0013 | 0.9349 | 0.9375 |
38
  | result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9364 | 0.0026 | 0.9337 | 0.9387 |
39
- | result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 2 | 0.9371 | 0.0058 | 0.9330 | 0.9413 |
40
 
41
  ## Gated Multi-Branch Deltas
42
  | source | task | model | baseline | gated_mean | baseline_mean | delta |
@@ -53,10 +53,10 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
53
  | result_ablation_mbert | sst2 | mBERT | mha_attention | 0.8777 | 0.8800 | -0.0023 |
54
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.8777 | 0.8761 | 0.0015 |
55
  | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.8777 | 0.8796 | -0.0019 |
56
- | result_ablation_mbert | vsfc | mBERT | attention | 0.9384 | 0.9377 | 0.0007 |
57
- | result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9384 | 0.9364 | 0.0020 |
58
- | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9384 | 0.9371 | 0.0013 |
59
- | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9384 | 0.9364 | 0.0020 |
60
 
61
  ## Raw Runs
62
  | source | task | model | strategy | seed | metric | score | eval_loss | train_loss | epoch | eval_samples | path |
@@ -153,5 +153,7 @@ Main metric is selected per task: CoLA uses Matthews correlation; MRPC/QQP/STSB
153
  | result_ablation_mbert | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9337 | 0.2265 | 0.2110 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json |
154
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
155
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
 
156
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
 
157
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.9387 | 0.2262 | 0.2063 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json |
 
31
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | eval_accuracy | 3 | 0.8761 | 0.0105 | 0.8647 | 0.8853 |
32
  | result_ablation_mbert | vsfc | mBERT | attention | eval_accuracy | 3 | 0.9377 | 0.0051 | 0.9330 | 0.9431 |
33
  | result_ablation_mbert | vsfc | mBERT | cls | eval_accuracy | 3 | 0.9372 | 0.0038 | 0.9337 | 0.9413 |
34
+ | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | eval_accuracy | 3 | 0.9375 | 0.0017 | 0.9356 | 0.9387 |
35
  | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | eval_accuracy | 3 | 0.9364 | 0.0036 | 0.9324 | 0.9394 |
36
  | result_ablation_mbert | vsfc | mBERT | max | eval_accuracy | 3 | 0.9358 | 0.0059 | 0.9318 | 0.9425 |
37
  | result_ablation_mbert | vsfc | mBERT | mean | eval_accuracy | 3 | 0.9362 | 0.0013 | 0.9349 | 0.9375 |
38
  | result_ablation_mbert | vsfc | mBERT | mha_attention | eval_accuracy | 3 | 0.9364 | 0.0026 | 0.9337 | 0.9387 |
39
+ | result_ablation_mbert | vsfc | mBERT | multi_branch_average | eval_accuracy | 3 | 0.9372 | 0.0041 | 0.9330 | 0.9413 |
40
 
41
  ## Gated Multi-Branch Deltas
42
  | source | task | model | baseline | gated_mean | baseline_mean | delta |
 
53
  | result_ablation_mbert | sst2 | mBERT | mha_attention | 0.8777 | 0.8800 | -0.0023 |
54
  | result_ablation_mbert | sst2 | mBERT | multi_branch_average | 0.8777 | 0.8761 | 0.0015 |
55
  | result_ablation_mbert | sst2 | mBERT | hf_sequence_classifier | 0.8777 | 0.8796 | -0.0019 |
56
+ | result_ablation_mbert | vsfc | mBERT | attention | 0.9375 | 0.9377 | -0.0002 |
57
+ | result_ablation_mbert | vsfc | mBERT | mha_attention | 0.9375 | 0.9364 | 0.0011 |
58
+ | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 0.9375 | 0.9372 | 0.0002 |
59
+ | result_ablation_mbert | vsfc | mBERT | hf_sequence_classifier | 0.9375 | 0.9364 | 0.0011 |
60
 
61
  ## Raw Runs
62
  | source | task | model | strategy | seed | metric | score | eval_loss | train_loss | epoch | eval_samples | path |
 
153
  | result_ablation_mbert | vsfc | mBERT | mha_attention | 44.0000 | eval_accuracy | 0.9337 | 0.2265 | 0.2110 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/mha_attention/seed_44/all_results.json |
154
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 42.0000 | eval_accuracy | 0.9413 | 0.2125 | 0.2045 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_42/all_results.json |
155
  | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 43.0000 | eval_accuracy | 0.9330 | 0.2274 | 0.2048 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_43/all_results.json |
156
+ | result_ablation_mbert | vsfc | mBERT | multi_branch_average | 44.0000 | eval_accuracy | 0.9375 | 0.2184 | 0.2051 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/multi_branch_average/seed_44/all_results.json |
157
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 42.0000 | eval_accuracy | 0.9381 | 0.2134 | 0.2020 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_42/all_results.json |
158
+ | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 43.0000 | eval_accuracy | 0.9356 | 0.2226 | 0.2058 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_43/all_results.json |
159
  | result_ablation_mbert | vsfc | mBERT | gated_multi_branch | 44.0000 | eval_accuracy | 0.9387 | 0.2262 | 0.2063 | 3.0000 | 1583 | /workspace/result_ablation_mbert/vsfc/mBERT/gated_multi_branch/seed_44/all_results.json |