Upload organize_model_results.json with huggingface_hub
Browse files- organize_model_results.json +54 -17
organize_model_results.json
CHANGED
|
@@ -13,7 +13,8 @@
|
|
| 13 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
| 14 |
},
|
| 15 |
"gpt4o_judge": {
|
| 16 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894
|
|
|
|
| 17 |
}
|
| 18 |
},
|
| 19 |
"mmau_mini": {
|
|
@@ -41,7 +42,8 @@
|
|
| 41 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
|
| 42 |
},
|
| 43 |
"gpt4o_judge": {
|
| 44 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9
|
|
|
|
| 45 |
}
|
| 46 |
},
|
| 47 |
"mmau_mini_music": {
|
|
@@ -69,7 +71,8 @@
|
|
| 69 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
|
| 70 |
},
|
| 71 |
"gpt4o_judge": {
|
| 72 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557
|
|
|
|
| 73 |
}
|
| 74 |
},
|
| 75 |
"mmau_mini_sound": {
|
|
@@ -97,7 +100,8 @@
|
|
| 97 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
|
| 98 |
},
|
| 99 |
"gpt4o_judge": {
|
| 100 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997
|
|
|
|
| 101 |
}
|
| 102 |
},
|
| 103 |
"mmau_mini_speech": {
|
|
@@ -125,7 +129,8 @@
|
|
| 125 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
|
| 126 |
},
|
| 127 |
"gpt4o_judge": {
|
| 128 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826
|
|
|
|
| 129 |
}
|
| 130 |
},
|
| 131 |
"slue_p2_sqa5_test": {
|
|
@@ -143,6 +148,7 @@
|
|
| 143 |
},
|
| 144 |
"gpt4o_judge": {
|
| 145 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
|
|
|
| 146 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 147 |
}
|
| 148 |
},
|
|
@@ -160,6 +166,7 @@
|
|
| 160 |
},
|
| 161 |
"gpt4o_judge": {
|
| 162 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 14.813295034878948,
|
|
|
|
| 163 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
| 164 |
}
|
| 165 |
},
|
|
@@ -201,6 +208,7 @@
|
|
| 201 |
},
|
| 202 |
"gpt4o_judge": {
|
| 203 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 31.641618497109825,
|
|
|
|
| 204 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
| 205 |
}
|
| 206 |
},
|
|
@@ -232,6 +240,7 @@
|
|
| 232 |
},
|
| 233 |
"gpt4o_judge": {
|
| 234 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 71.6,
|
|
|
|
| 235 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
| 236 |
}
|
| 237 |
},
|
|
@@ -272,7 +281,8 @@
|
|
| 272 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
| 273 |
},
|
| 274 |
"gpt4o_judge": {
|
| 275 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 57.87700084245998
|
|
|
|
| 276 |
}
|
| 277 |
},
|
| 278 |
"imda_30s_sqa_human_test": {
|
|
@@ -383,6 +393,7 @@
|
|
| 383 |
},
|
| 384 |
"gpt4o_judge": {
|
| 385 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.2,
|
|
|
|
| 386 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
| 387 |
}
|
| 388 |
},
|
|
@@ -398,7 +409,8 @@
|
|
| 398 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
| 399 |
},
|
| 400 |
"gpt4o_judge": {
|
| 401 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 37.45019920318725
|
|
|
|
| 402 |
}
|
| 403 |
},
|
| 404 |
"public_sg_speech_qa_test": {
|
|
@@ -415,6 +427,7 @@
|
|
| 415 |
},
|
| 416 |
"gpt4o_judge": {
|
| 417 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.18604651162791,
|
|
|
|
| 418 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
| 419 |
}
|
| 420 |
},
|
|
@@ -458,6 +471,7 @@
|
|
| 458 |
},
|
| 459 |
"gpt4o_judge": {
|
| 460 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.99159035694262,
|
|
|
|
| 461 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
| 462 |
}
|
| 463 |
},
|
|
@@ -489,7 +503,8 @@
|
|
| 489 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
| 490 |
},
|
| 491 |
"gpt4o_judge": {
|
| 492 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 47.356321839080465
|
|
|
|
| 493 |
}
|
| 494 |
},
|
| 495 |
"imda_part4_30s_sqa_human_test": {
|
|
@@ -506,6 +521,7 @@
|
|
| 506 |
},
|
| 507 |
"gpt4o_judge": {
|
| 508 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.0,
|
|
|
|
| 509 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
| 510 |
}
|
| 511 |
},
|
|
@@ -521,7 +537,8 @@
|
|
| 521 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
| 522 |
},
|
| 523 |
"gpt4o_judge": {
|
| 524 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 99.46655724251129
|
|
|
|
| 525 |
}
|
| 526 |
},
|
| 527 |
"imda_gr_dialogue": {
|
|
@@ -537,7 +554,8 @@
|
|
| 537 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
| 538 |
},
|
| 539 |
"gpt4o_judge": {
|
| 540 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 93.86666666666666
|
|
|
|
| 541 |
}
|
| 542 |
},
|
| 543 |
"imda_30s_ds_human_test": {
|
|
@@ -693,6 +711,7 @@
|
|
| 693 |
},
|
| 694 |
"gpt4o_judge": {
|
| 695 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 59.2,
|
|
|
|
| 696 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
| 697 |
}
|
| 698 |
},
|
|
@@ -709,7 +728,8 @@
|
|
| 709 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
| 710 |
},
|
| 711 |
"gpt4o_judge": {
|
| 712 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 77.13333333333333
|
|
|
|
| 713 |
}
|
| 714 |
},
|
| 715 |
"iemocap_gender_test": {
|
|
@@ -725,7 +745,8 @@
|
|
| 725 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
| 726 |
},
|
| 727 |
"gpt4o_judge": {
|
| 728 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 98.20717131474103
|
|
|
|
| 729 |
}
|
| 730 |
},
|
| 731 |
"ytb_asr_batch2": {
|
|
@@ -766,7 +787,8 @@
|
|
| 766 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
| 767 |
},
|
| 768 |
"gpt4o_judge": {
|
| 769 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 84.31782540512285
|
|
|
|
| 770 |
}
|
| 771 |
},
|
| 772 |
"imda_part5_30s_ds_human_test": {
|
|
@@ -783,6 +805,7 @@
|
|
| 783 |
},
|
| 784 |
"gpt4o_judge": {
|
| 785 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.0,
|
|
|
|
| 786 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
| 787 |
}
|
| 788 |
},
|
|
@@ -814,6 +837,7 @@
|
|
| 814 |
},
|
| 815 |
"gpt4o_judge": {
|
| 816 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.599999999999994,
|
|
|
|
| 817 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
| 818 |
}
|
| 819 |
},
|
|
@@ -831,6 +855,7 @@
|
|
| 831 |
},
|
| 832 |
"gpt4o_judge": {
|
| 833 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 55.199999999999996,
|
|
|
|
| 834 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
| 835 |
}
|
| 836 |
},
|
|
@@ -871,6 +896,7 @@
|
|
| 871 |
},
|
| 872 |
"gpt4o_judge": {
|
| 873 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 39.29545454545455,
|
|
|
|
| 874 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
| 875 |
}
|
| 876 |
},
|
|
@@ -887,7 +913,8 @@
|
|
| 887 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
| 888 |
},
|
| 889 |
"gpt4o_judge": {
|
| 890 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 10.116666666666667
|
|
|
|
| 891 |
}
|
| 892 |
},
|
| 893 |
"imda_part6_30s_sqa_test": {
|
|
@@ -944,6 +971,7 @@
|
|
| 944 |
},
|
| 945 |
"gpt4o_judge": {
|
| 946 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.09333981526495,
|
|
|
|
| 947 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
| 948 |
}
|
| 949 |
},
|
|
@@ -988,6 +1016,7 @@
|
|
| 988 |
},
|
| 989 |
"gpt4o_judge": {
|
| 990 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 78.60000000000001,
|
|
|
|
| 991 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
| 992 |
}
|
| 993 |
},
|
|
@@ -1028,6 +1057,7 @@
|
|
| 1028 |
},
|
| 1029 |
"gpt4o_judge": {
|
| 1030 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 50.60702875399361,
|
|
|
|
| 1031 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
| 1032 |
}
|
| 1033 |
},
|
|
@@ -1086,6 +1116,7 @@
|
|
| 1086 |
},
|
| 1087 |
"gpt4o_judge": {
|
| 1088 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 46.31578947368421,
|
|
|
|
| 1089 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
| 1090 |
}
|
| 1091 |
},
|
|
@@ -1140,7 +1171,8 @@
|
|
| 1140 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
| 1141 |
},
|
| 1142 |
"gpt4o_judge": {
|
| 1143 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.9
|
|
|
|
| 1144 |
}
|
| 1145 |
},
|
| 1146 |
"tedlium3_long_form_test": {
|
|
@@ -1170,7 +1202,8 @@
|
|
| 1170 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
| 1171 |
},
|
| 1172 |
"gpt4o_judge": {
|
| 1173 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 36.206896551724135
|
|
|
|
| 1174 |
}
|
| 1175 |
},
|
| 1176 |
"tedlium3_test": {
|
|
@@ -1228,6 +1261,7 @@
|
|
| 1228 |
},
|
| 1229 |
"gpt4o_judge": {
|
| 1230 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.8,
|
|
|
|
| 1231 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
| 1232 |
}
|
| 1233 |
},
|
|
@@ -1245,6 +1279,7 @@
|
|
| 1245 |
},
|
| 1246 |
"gpt4o_judge": {
|
| 1247 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 70.0,
|
|
|
|
| 1248 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
| 1249 |
}
|
| 1250 |
},
|
|
@@ -1299,12 +1334,14 @@
|
|
| 1299 |
},
|
| 1300 |
"gigaspeech2_viet": {
|
| 1301 |
"wer": {
|
| 1302 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.4960741822016732
|
|
|
|
| 1303 |
}
|
| 1304 |
},
|
| 1305 |
"gigaspeech2_thai": {
|
| 1306 |
"wer": {
|
| 1307 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.8901628256099774,
|
|
|
|
| 1308 |
"seallms_audio_7b": 0.3332398502070376
|
| 1309 |
}
|
| 1310 |
},
|
|
|
|
| 13 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 85.2928225451343
|
| 14 |
},
|
| 15 |
"gpt4o_judge": {
|
| 16 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.77146631439894,
|
| 17 |
+
"Qwen2-Audio-7B-Instruct": 79.0
|
| 18 |
}
|
| 19 |
},
|
| 20 |
"mmau_mini": {
|
|
|
|
| 42 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
|
| 43 |
},
|
| 44 |
"gpt4o_judge": {
|
| 45 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9,
|
| 46 |
+
"Qwen2-Audio-7B-Instruct": 53.0
|
| 47 |
}
|
| 48 |
},
|
| 49 |
"mmau_mini_music": {
|
|
|
|
| 71 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
|
| 72 |
},
|
| 73 |
"gpt4o_judge": {
|
| 74 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557,
|
| 75 |
+
"Qwen2-Audio-7B-Instruct": 0.5473684210526316
|
| 76 |
}
|
| 77 |
},
|
| 78 |
"mmau_mini_sound": {
|
|
|
|
| 100 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
|
| 101 |
},
|
| 102 |
"gpt4o_judge": {
|
| 103 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997,
|
| 104 |
+
"Qwen2-Audio-7B-Instruct": 0.5980392156862745
|
| 105 |
}
|
| 106 |
},
|
| 107 |
"mmau_mini_speech": {
|
|
|
|
| 129 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
|
| 130 |
},
|
| 131 |
"gpt4o_judge": {
|
| 132 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826,
|
| 133 |
+
"Qwen2-Audio-7B-Instruct": 0.44660194174757284
|
| 134 |
}
|
| 135 |
},
|
| 136 |
"slue_p2_sqa5_test": {
|
|
|
|
| 148 |
},
|
| 149 |
"gpt4o_judge": {
|
| 150 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
| 151 |
+
"Qwen2-Audio-7B-Instruct": 84.86666666666666,
|
| 152 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 153 |
}
|
| 154 |
},
|
|
|
|
| 166 |
},
|
| 167 |
"gpt4o_judge": {
|
| 168 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 14.813295034878948,
|
| 169 |
+
"Qwen2-Audio-7B-Instruct": 22.666666666666664,
|
| 170 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 39.462453836684446
|
| 171 |
}
|
| 172 |
},
|
|
|
|
| 208 |
},
|
| 209 |
"gpt4o_judge": {
|
| 210 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 31.641618497109825,
|
| 211 |
+
"Qwen2-Audio-7B-Instruct": 34.86666666666667,
|
| 212 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.61271676300578
|
| 213 |
}
|
| 214 |
},
|
|
|
|
| 240 |
},
|
| 241 |
"gpt4o_judge": {
|
| 242 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 71.6,
|
| 243 |
+
"Qwen2-Audio-7B-Instruct": 56.0,
|
| 244 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 67.0
|
| 245 |
}
|
| 246 |
},
|
|
|
|
| 281 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.44481887110362
|
| 282 |
},
|
| 283 |
"gpt4o_judge": {
|
| 284 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 57.87700084245998,
|
| 285 |
+
"Qwen2-Audio-7B-Instruct": 64.66666666666666
|
| 286 |
}
|
| 287 |
},
|
| 288 |
"imda_30s_sqa_human_test": {
|
|
|
|
| 393 |
},
|
| 394 |
"gpt4o_judge": {
|
| 395 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.2,
|
| 396 |
+
"Qwen2-Audio-7B-Instruct": 57.199999999999996,
|
| 397 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 75.0
|
| 398 |
}
|
| 399 |
},
|
|
|
|
| 409 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 46.713147410358566
|
| 410 |
},
|
| 411 |
"gpt4o_judge": {
|
| 412 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 37.45019920318725,
|
| 413 |
+
"Qwen2-Audio-7B-Instruct": 35.333333333333336
|
| 414 |
}
|
| 415 |
},
|
| 416 |
"public_sg_speech_qa_test": {
|
|
|
|
| 427 |
},
|
| 428 |
"gpt4o_judge": {
|
| 429 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.18604651162791,
|
| 430 |
+
"Qwen2-Audio-7B-Instruct": 62.733333333333334,
|
| 431 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 73.02325581395348
|
| 432 |
}
|
| 433 |
},
|
|
|
|
| 471 |
},
|
| 472 |
"gpt4o_judge": {
|
| 473 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 74.99159035694262,
|
| 474 |
+
"Qwen2-Audio-7B-Instruct": 65.6,
|
| 475 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 90.12521024107643
|
| 476 |
}
|
| 477 |
},
|
|
|
|
| 503 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 45.593869731800766
|
| 504 |
},
|
| 505 |
"gpt4o_judge": {
|
| 506 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 47.356321839080465,
|
| 507 |
+
"Qwen2-Audio-7B-Instruct": 57.666666666666664
|
| 508 |
}
|
| 509 |
},
|
| 510 |
"imda_part4_30s_sqa_human_test": {
|
|
|
|
| 521 |
},
|
| 522 |
"gpt4o_judge": {
|
| 523 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.0,
|
| 524 |
+
"Qwen2-Audio-7B-Instruct": 43.4,
|
| 525 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 61.4
|
| 526 |
}
|
| 527 |
},
|
|
|
|
| 537 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 42.921624948707425
|
| 538 |
},
|
| 539 |
"gpt4o_judge": {
|
| 540 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 99.46655724251129,
|
| 541 |
+
"Qwen2-Audio-7B-Instruct": 99.66666666666667
|
| 542 |
}
|
| 543 |
},
|
| 544 |
"imda_gr_dialogue": {
|
|
|
|
| 554 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 25.433333333333337
|
| 555 |
},
|
| 556 |
"gpt4o_judge": {
|
| 557 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 93.86666666666666,
|
| 558 |
+
"Qwen2-Audio-7B-Instruct": 61.0
|
| 559 |
}
|
| 560 |
},
|
| 561 |
"imda_30s_ds_human_test": {
|
|
|
|
| 711 |
},
|
| 712 |
"gpt4o_judge": {
|
| 713 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 59.2,
|
| 714 |
+
"Qwen2-Audio-7B-Instruct": 43.2,
|
| 715 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 47.400000000000006
|
| 716 |
}
|
| 717 |
},
|
|
|
|
| 728 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 9.666666666666666
|
| 729 |
},
|
| 730 |
"gpt4o_judge": {
|
| 731 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 77.13333333333333,
|
| 732 |
+
"Qwen2-Audio-7B-Instruct": 0.33333333333333337
|
| 733 |
}
|
| 734 |
},
|
| 735 |
"iemocap_gender_test": {
|
|
|
|
| 745 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 44.22310756972111
|
| 746 |
},
|
| 747 |
"gpt4o_judge": {
|
| 748 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 98.20717131474103,
|
| 749 |
+
"Qwen2-Audio-7B-Instruct": 98.33333333333333
|
| 750 |
}
|
| 751 |
},
|
| 752 |
"ytb_asr_batch2": {
|
|
|
|
| 787 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 86.4610559330894
|
| 788 |
},
|
| 789 |
"gpt4o_judge": {
|
| 790 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 84.31782540512285,
|
| 791 |
+
"Qwen2-Audio-7B-Instruct": 68.66666666666667
|
| 792 |
}
|
| 793 |
},
|
| 794 |
"imda_part5_30s_ds_human_test": {
|
|
|
|
| 805 |
},
|
| 806 |
"gpt4o_judge": {
|
| 807 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.0,
|
| 808 |
+
"Qwen2-Audio-7B-Instruct": 50.8,
|
| 809 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 56.8
|
| 810 |
}
|
| 811 |
},
|
|
|
|
| 837 |
},
|
| 838 |
"gpt4o_judge": {
|
| 839 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.599999999999994,
|
| 840 |
+
"Qwen2-Audio-7B-Instruct": 50.199999999999996,
|
| 841 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 52.800000000000004
|
| 842 |
}
|
| 843 |
},
|
|
|
|
| 855 |
},
|
| 856 |
"gpt4o_judge": {
|
| 857 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 55.199999999999996,
|
| 858 |
+
"Qwen2-Audio-7B-Instruct": 35.8,
|
| 859 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 48.2
|
| 860 |
}
|
| 861 |
},
|
|
|
|
| 896 |
},
|
| 897 |
"gpt4o_judge": {
|
| 898 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 39.29545454545455,
|
| 899 |
+
"Qwen2-Audio-7B-Instruct": 41.53333333333333,
|
| 900 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 4.868181818181818
|
| 901 |
}
|
| 902 |
},
|
|
|
|
| 913 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 12.416666666666666
|
| 914 |
},
|
| 915 |
"gpt4o_judge": {
|
| 916 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 10.116666666666667,
|
| 917 |
+
"Qwen2-Audio-7B-Instruct": 4.666666666666667
|
| 918 |
}
|
| 919 |
},
|
| 920 |
"imda_part6_30s_sqa_test": {
|
|
|
|
| 971 |
},
|
| 972 |
"gpt4o_judge": {
|
| 973 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.09333981526495,
|
| 974 |
+
"Qwen2-Audio-7B-Instruct": 56.86666666666667,
|
| 975 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 28.076410484229232
|
| 976 |
}
|
| 977 |
},
|
|
|
|
| 1016 |
},
|
| 1017 |
"gpt4o_judge": {
|
| 1018 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 78.60000000000001,
|
| 1019 |
+
"Qwen2-Audio-7B-Instruct": 61.6,
|
| 1020 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 77.8
|
| 1021 |
}
|
| 1022 |
},
|
|
|
|
| 1057 |
},
|
| 1058 |
"gpt4o_judge": {
|
| 1059 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 50.60702875399361,
|
| 1060 |
+
"Qwen2-Audio-7B-Instruct": 50.599999999999994,
|
| 1061 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.63258785942492
|
| 1062 |
}
|
| 1063 |
},
|
|
|
|
| 1116 |
},
|
| 1117 |
"gpt4o_judge": {
|
| 1118 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 46.31578947368421,
|
| 1119 |
+
"Qwen2-Audio-7B-Instruct": 48.2,
|
| 1120 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 14.736842105263158
|
| 1121 |
}
|
| 1122 |
},
|
|
|
|
| 1171 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.016666666666666
|
| 1172 |
},
|
| 1173 |
"gpt4o_judge": {
|
| 1174 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.9,
|
| 1175 |
+
"Qwen2-Audio-7B-Instruct": 66.33333333333333
|
| 1176 |
}
|
| 1177 |
},
|
| 1178 |
"tedlium3_long_form_test": {
|
|
|
|
| 1202 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 36.81992337164751
|
| 1203 |
},
|
| 1204 |
"gpt4o_judge": {
|
| 1205 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 36.206896551724135,
|
| 1206 |
+
"Qwen2-Audio-7B-Instruct": 39.0
|
| 1207 |
}
|
| 1208 |
},
|
| 1209 |
"tedlium3_test": {
|
|
|
|
| 1261 |
},
|
| 1262 |
"gpt4o_judge": {
|
| 1263 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 66.8,
|
| 1264 |
+
"Qwen2-Audio-7B-Instruct": 58.0,
|
| 1265 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.80000000000001
|
| 1266 |
}
|
| 1267 |
},
|
|
|
|
| 1279 |
},
|
| 1280 |
"gpt4o_judge": {
|
| 1281 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 70.0,
|
| 1282 |
+
"Qwen2-Audio-7B-Instruct": 51.4,
|
| 1283 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 64.4
|
| 1284 |
}
|
| 1285 |
},
|
|
|
|
| 1334 |
},
|
| 1335 |
"gigaspeech2_viet": {
|
| 1336 |
"wer": {
|
| 1337 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.4960741822016732,
|
| 1338 |
+
"Qwen2-Audio-7B-Instruct": 1.5011671350211242
|
| 1339 |
}
|
| 1340 |
},
|
| 1341 |
"gigaspeech2_thai": {
|
| 1342 |
"wer": {
|
| 1343 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.8901628256099774,
|
| 1344 |
+
"Qwen2-Audio-7B-Instruct": 1.2449725324578913,
|
| 1345 |
"seallms_audio_7b": 0.3332398502070376
|
| 1346 |
}
|
| 1347 |
},
|