{ "generatedAt": "2026-03-13T16:26:56.022330+00:00", "sourceRoot": "data/results_real", "tasks": [ { "id": "K-disentQA", "label": "SCA-QA", "metricLabel": "Speech Context Faithfulness", "shortMetric": "Faithfulness", "lowerBetter": false, "datasets": [ { "id": "history_after_chosun", "label": "History_after_chosun" }, { "id": "history_before_chosun", "label": "History_before_chosun" }, { "id": "k-sports", "label": "K-sports" }, { "id": "kpop", "label": "K-pop" } ] }, { "id": "SQA", "label": "Speech QA", "metricLabel": "Accuracy (%)", "shortMetric": "Acc(%)", "lowerBetter": false, "datasets": [ { "id": "click", "label": "CLICk" }, { "id": "click_other", "label": "CLICk Other" }, { "id": "kobest_boolq", "label": "KoBest BoolQ" }, { "id": "kobest_boolq_other", "label": "KoBest BoolQ Other" }, { "id": "kcsat", "label": "KCSAT" }, { "id": "kcsat_other", "label": "KCSAT Other" } ] }, { "id": "Instruct", "label": "Speech Instruction", "metricLabel": "Score (GPT-4o as Judge)", "shortMetric": "Score (GPT-4o as Judge)", "lowerBetter": false, "datasets": [ { "id": "alpaca", "label": "Alpaca" }, { "id": "alpaca_other", "label": "Alpaca Other" }, { "id": "kudge", "label": "KUDGE" }, { "id": "kudge_other", "label": "KUDGE Other" }, { "id": "openhermes", "label": "OpenHermes" }, { "id": "openhermes_other", "label": "OpenHermes Other" }, { "id": "vicuna", "label": "Vicuna" }, { "id": "vicuna_other", "label": "Vicuna Other" } ] }, { "id": "ASR", "label": "ASR", "metricLabel": "CER (%)", "shortMetric": "CER", "lowerBetter": true, "datasets": [ { "id": "common_voice_korea", "label": "CommonVoice-KO" }, { "id": "ksponspeech_eval_clean", "label": "KsponSpeech Clean" }, { "id": "ksponspeech_eval_other", "label": "KsponSpeech Other" }, { "id": "zeroth_korean_test", "label": "Zeroth-Korean" }, { "id": "zeroth_korean_test_other", "label": "Zeroth-Korean Other" } ] }, { "id": "Translation", "label": "Translation", "metricLabel": "BERTScore", "shortMetric": "BERTScore", "lowerBetter": false, "datasets": [ { "id": "etri_tst-COMMON", "label": "ETRI-TST-Common" }, { "id": "etri_tst-HE", "label": "ETRI-TST-HE" } ] }, { "id": "PA-QA", "label": "Postion Aware Question Answering", "metricLabel": "Accuracy (%)", "shortMetric": "Acc(%)", "lowerBetter": false, "datasets": [ { "id": "mctest", "label": "MCTest" }, { "id": "mctest_other", "label": "MCTest Other" } ] } ], "entries": [ { "id": "gemini_flash", "rank_name": "gemini_flash", "model": "gemini-2.5-flash-lite", "url": "", "tasks": { "ASR": { "common_voice_korea": { "value": 13.738049713193117, "display": "13.74" }, "ksponspeech_eval_clean": { "value": 83.18526725056962, "display": "83.19" }, "ksponspeech_eval_other": { "value": 45.13922315086276, "display": "45.14" }, "zeroth_korean_test": { "value": 13.599128992119452, "display": "13.60" }, "zeroth_korean_test_other": { "value": 14.558274574865202, "display": "14.56" } }, "Instruct": { "alpaca": { "value": 85.94202898550726, "display": "85.94" }, "alpaca_other": { "value": 86.8840579710145, "display": "86.88" }, "kudge": { "value": 70.28725314183116, "display": "70.29" }, "kudge_other": { "value": 70.3859964093356, "display": "70.39" }, "openhermes": { "value": 82.69230769230771, "display": "82.69" }, "openhermes_other": { "value": 80.70512820512825, "display": "80.71" }, "vicuna": { "value": 76.42857142857144, "display": "76.43" }, "vicuna_other": { "value": 73.28571428571429, "display": "73.29" } }, "K-disentQA": { "history_after_chosun": { "value": 59.72, "display": "59.72" }, "history_before_chosun": { "value": 66.25, "display": "66.25" }, "k-sports": { "value": 86.96, "display": "86.96" }, "kpop": { "value": 78.26, "display": "78.26" } }, "PA-QA": { "mctest_other": { "value": 92.00, "display": "92.00" }, "mctest": { "value": 92.31, "display": "92.31" } }, "SQA": { "click": { "value": 62.27, "display": "62.27" }, "click_other": { "value": 66.69, "display": "66.69" }, "kobest_boolq": { "value": 52.86, "display": "52.86" }, "kobest_boolq_other": { "value": 54.92, "display": "54.92" }, "kcsat": { "value": 81.18 , "display": "81.18 " }, "kcsat_other": { "value": 78.82, "display": "78.82" } }, "Translation": { "etri_tst-COMMON": { "value": 91.60, "display": "91.60" }, "etri_tst-HE": { "value": 92.17, "display": "92.17" } } } }, { "id": "gemma3n_vllm", "rank_name": "gemma3n_vllm", "model": "google/gemma-3n-E4B-it", "url": "https://huggingface.co/google/gemma-3n-E4B-it", "tasks": { "ASR": { "common_voice_korea": { "value": 144.5793499043977, "display": "144.58" }, "ksponspeech_eval_clean": { "value": 142.99420974518696, "display": "142.99" }, "ksponspeech_eval_other": { "value": 130.76182558088652, "display": "130.76" }, "zeroth_korean_test": { "value": 107.88054749066778, "display": "107.88" }, "zeroth_korean_test_other": { "value": 118.62297801742017, "display": "118.62" } }, "Instruct": { "alpaca": { "value": 82.97101449275362, "display": "82.97" }, "alpaca_other": { "value": 83.36231884057973, "display": "83.36" }, "kudge": { "value": 71.38240574506275, "display": "71.38" }, "kudge_other": { "value": 70.69120287253139, "display": "70.69" }, "openhermes": { "value": 84.61538461538464, "display": "84.62" }, "openhermes_other": { "value": 85.96153846153848, "display": "85.96" }, "vicuna": { "value": 80.21428571428574, "display": "80.21" }, "vicuna_other": { "value": 80.00000000000003, "display": "80.00" } }, "K-disentQA": { "history_after_chosun": { "value": 45.76, "display": "45.76" }, "history_before_chosun": { "value": 67.35, "display": "67.35" }, "k-sports": { "value": 76.60, "display": "76.60" }, "kpop": { "value": 73.24, "display": "73.24" } }, "PA-QA": { "mctest": { "value": 48.92 , "display": "48.92 " }, "mctest_other": { "value": 48.92, "display": "48.92" } }, "SQA": { "click": { "value": 35.79, "display": "35.79" }, "click_other": { "value": 35.708367854183926, "display": "35.71" }, "kobest_boolq": { "value": 50.89 , "display": "50.89" }, "kobest_boolq_other": { "value": 50.54, "display": "50.54" }, "kcsat": { "value": 34.12 , "display": "34.12" }, "kcsat_other": { "value": 40.00, "display": "40.00" } }, "Translation": { "etri_tst-COMMON": { "value": 87.39, "display": "87.39" }, "etri_tst-HE": { "value": 87.79, "display": "87.79" } } } }, { "id": "gpt_audio", "rank_name": "gpt_audio", "model": "gpt-audio-mini", "url": "", "tasks": { "ASR": { "common_voice_korea": { "value": 33.04971319311664, "display": "33.05" }, "ksponspeech_eval_clean": { "value": 134.18967787788205, "display": "134.19" }, "ksponspeech_eval_other": { "value": 63.6444522236322, "display": "63.64" }, "zeroth_korean_test": { "value": 6.87, "display": "6.87" }, "zeroth_korean_test_other": { "value": 9.00, "display": "9.00" } }, "Instruct": { "alpaca": { "value": 90.57971014492755, "display": "90.58" }, "alpaca_other": { "value": 90.57971014492755, "display": "90.58" }, "kudge": { "value": 74.06642728904846, "display": "74.07" }, "kudge_other": { "value": 73.98563734290842, "display": "73.99" }, "openhermes": { "value": 89.42307692307693, "display": "89.42" }, "openhermes_other": { "value": 89.61538461538464, "display": "89.62" }, "vicuna": { "value": 82.14285714285717, "display": "82.14" }, "vicuna_other": { "value": 81.78571428571429, "display": "81.79" } }, "K-disentQA": { "history_after_chosun": { "value": 32.30, "display": "32.30" }, "history_before_chosun": { "value": 61.40, "display": "61.40" }, "k-sports": { "value": 39.30, "display": "39.30" }, "kpop": { "value": 37.50, "display": "37.50" } }, "PA-QA": { "mctest_other": { "value": 79.69, "display": "79.69" }, "mctest": { "value": 77.23 , "display": "77.23 " } }, "SQA": { "click": { "value": 61.64043082021541, "display": "61.64" }, "click_other": { "value": 60.06628003314002, "display": "60.07" }, "kobest_boolq": { "value": 51.878354203935594, "display": "51.88" }, "kobest_boolq_other": { "value": 50.44722719141323, "display": "50.45" }, "kcsat": { "value": 52.90 , "display": "52.90 " }, "kcsat_other": { "value": 47.10, "display": "47.10" } }, "Translation": { "etri_tst-COMMON": { "value": 93.10, "display": "93.10" }, "etri_tst-HE": { "value": 93.69, "display": "93.69" } } } }, { "id": "qwen3_onmi", "rank_name": "qwen3_onmi", "model": "Qwen/Qwen3-Omni-30B-A3B-Instruct", "url": "https://huggingface.co/Qwen/Qwen3-Omni-30B-A3B-Instruct", "tasks": { "ASR": { "common_voice_korea": { "value": 4.961759082217973, "display": "4.96" }, "ksponspeech_eval_clean": { "value": 8.459624992161208, "display": "8.46" }, "ksponspeech_eval_other": { "value": 7.907058154290465, "display": "7.91" }, "zeroth_korean_test": { "value": 3.3336789713811696, "display": "3.33" }, "zeroth_korean_test_other": { "value": 3.9143508917461634, "display": "3.91" } }, "Instruct": { "alpaca": { "value": 84.05797101449278, "display": "84.06" }, "alpaca_other": { "value": 83.04347826086959, "display": "83.04" }, "kudge": { "value": 71.86714542190298, "display": "71.87" }, "kudge_other": { "value": 71.82226211849184, "display": "71.82" }, "openhermes": { "value": 86.5384615384616, "display": "86.54" }, "openhermes_other": { "value": 85.19230769230771, "display": "85.19" }, "vicuna": { "value": 79.64285714285715, "display": "79.64" }, "vicuna_other": { "value": 78.42857142857143, "display": "78.43" } }, "K-disentQA": { "history_after_chosun": { "value": 92.31, "display": "92.31" }, "history_before_chosun": { "value": 94.64, "display": "94.64" }, "k-sports": { "value": 93.88, "display": "93.88" }, "kpop": { "value": 95.71, "display": "95.71" } }, "PA-QA": { "mctest": { "value": 93.54, "display": "93.54" }, "mctest_other": { "value": 93.85, "display": "93.85" } }, "SQA": { "click": { "value": 64.04, "display": "64.04" }, "click_other": { "value": 62.30, "display": "62.30" }, "kobest_boolq": { "value": 51.341681574239715, "display": "51.34" }, "kobest_boolq_other": { "value": 51.16, "display": "51.16" }, "kcsat": { "value": 83.53, "display": "83.53" }, "kcsat_other": { "value": 84.71, "display": "84.71" } }, "Translation": { "etri_tst-COMMON": { "value": 93.40, "display": "93.40" }, "etri_tst-HE": { "value": 93.96, "display": "93.96" } } } }, { "id": "voxtral", "rank_name": "voxtral", "model": "mistralai/Voxtral-Mini-3B-2507", "url": "https://huggingface.co/mistralai/Voxtral-Mini-3B-2507", "tasks": { "ASR": { "common_voice_korea": { "value": 60.09560229445506, "display": "60.10" }, "ksponspeech_eval_clean": { "value": 62.62463680260875, "display": "62.62" }, "ksponspeech_eval_other": { "value": 56.042240989512685, "display": "56.04" }, "zeroth_korean_test": { "value": 40.92181667357943, "display": "40.92" }, "zeroth_korean_test_other": { "value": 39.06574035669846, "display": "39.07" } }, "Instruct": { "alpaca": { "value": 72.89855072463769, "display": "72.90" }, "alpaca_other": { "value": 72.46376811594205, "display": "72.46" }, "kudge": { "value": 61.9658886894074, "display": "61.97" }, "kudge_other": { "value": 61.69658886894065, "display": "61.70" }, "openhermes": { "value": 69.10256410256412, "display": "69.10" }, "openhermes_other": { "value": 69.61538461538463, "display": "69.62" }, "vicuna": { "value": 67.78571428571428, "display": "67.79" }, "vicuna_other": { "value": 69.49999999999997, "display": "69.50" } }, "K-disentQA": { "history_after_chosun": { "value": 85.71, "display": "85.71" }, "history_before_chosun": { "value": 85.71, "display": "85.71" }, "k-sports": { "value": 95.83, "display": "95.83" }, "kpop": { "value": 88.64, "display": "88.64" } }, "PA-QA": { "mctest_other": { "value": 84.92, "display": "84.92" }, "mctest": { "value": 86.15, "display": "86.15" } }, "SQA": { "click": { "value": 42.58 , "display": "42.58 " }, "click_other": { "value": 42.92, "display": " 42.92" }, "kobest_boolq": { "value": 50.54, "display": "50.54" }, "kobest_boolq_other": { "value": 50.54, "display": "50.54" }, "kcsat": { "value": 69.41, "display": "69.41" }, "kcsat_other": { "value": 72.94, "display": "72.94" } }, "Translation": { "etri_tst-COMMON": { "value": 92.73, "display": "92.73" }, "etri_tst-HE": { "value": 93.09, "display": "93.09" } } } } ] }