Spaces:
Sleeping
Sleeping
tjdmstj commited on
Commit ยท
11907d4
1
Parent(s): 53c4627
leaderboard
Browse filesThis view is limited to 50 files because it contains too many changes. ย See raw diff
- README.md +34 -12
- app.py +1100 -0
- build_leaderboard_data.py +292 -0
- data/leaderboard-data.json +1022 -0
- data/results_real/ASR/gemini_flash/common_voice_korea/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemini_flash/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemini_flash/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemini_flash/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemini_flash/ksponspeech_eval_other/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemini_flash/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemini_flash/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemini_flash/zeroth_korean_test/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemini_flash/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemini_flash/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v1/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v3/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v4/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v1/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v3/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v4/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v1/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v3/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v4/ksponspeech_eval_clean_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v1/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v3/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v4/ksponspeech_eval_other_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v1/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v3/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v4/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/zeroth_korean_test_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v1/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v3/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v4/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json +12 -0
- data/results_real/ASR/gpt_realtime_mini/common_voice_korea/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gpt_realtime_mini/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
- data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
- data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
README.md
CHANGED
|
@@ -1,12 +1,34 @@
|
|
| 1 |
-
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# KoALa-Bench Gradio
|
| 2 |
+
|
| 3 |
+
`KoALa-Bench`์ ์ ์ leaderboard ํ์ด์ง๋ฅผ `gradio` ๊ธฐ๋ฐ UI๋ก ์ฎ๊ธด ๋ฒ์ ์
๋๋ค.
|
| 4 |
+
์๋ณธ `data/`์ `images/`๋ ํจ๊ป ๋ณต์ฌํด ๋ ์ํ์
๋๋ค.
|
| 5 |
+
|
| 6 |
+
## Run
|
| 7 |
+
|
| 8 |
+
```bash
|
| 9 |
+
cd /data/esseo/PycharmProject/KoALa-Bench-gradio
|
| 10 |
+
pip install -r requirements.txt
|
| 11 |
+
python app.py
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
If port `7860` is already in use:
|
| 15 |
+
|
| 16 |
+
```bash
|
| 17 |
+
GRADIO_SERVER_PORT=7861 python app.py
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
## Rebuild leaderboard data
|
| 21 |
+
|
| 22 |
+
If `data/results_real` changes, rebuild `data/leaderboard-data.json` with:
|
| 23 |
+
|
| 24 |
+
```bash
|
| 25 |
+
cd /data/esseo/PycharmProject/KoALa-Bench-gradio
|
| 26 |
+
python build_leaderboard_data.py
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
## Files
|
| 30 |
+
|
| 31 |
+
- `app.py`: leaderboard ๊ณ์ฐ ๋ก์ง + Gradio UI
|
| 32 |
+
- `build_leaderboard_data.py`: `results_real`๋ฅผ ์ํํด `leaderboard-data.json` ์ฌ์์ฑ
|
| 33 |
+
- `data/`: ์๋ณธ leaderboard ๋ฐ ํ๊ฐ ๊ฒฐ๊ณผ ๋ฐ์ดํฐ ์ ์ฒด
|
| 34 |
+
- `images/`: ์๋ณธ ๋ฑ์ง/์์ด์ฝ ์ด๋ฏธ์ง
|
app.py
ADDED
|
@@ -0,0 +1,1100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import base64
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
from functools import cmp_to_key
|
| 7 |
+
from html import escape
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
HOME_VIEW = "HOME"
|
| 15 |
+
TASK_ORDER = [
|
| 16 |
+
"K-disentQA",
|
| 17 |
+
"SQA",
|
| 18 |
+
"Instruct",
|
| 19 |
+
"ASR",
|
| 20 |
+
"Translation",
|
| 21 |
+
"LSQA",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
ROOT = Path(__file__).parent
|
| 25 |
+
DATA_PATH = ROOT / "data" / "leaderboard-data.json"
|
| 26 |
+
IMAGES_DIR = ROOT / "images"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def to_num(value: Any) -> float | None:
|
| 30 |
+
try:
|
| 31 |
+
num = float(value)
|
| 32 |
+
except (TypeError, ValueError):
|
| 33 |
+
return None
|
| 34 |
+
return num
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def average(values: list[float | None]) -> float | None:
|
| 38 |
+
valid = [value for value in values if value is not None]
|
| 39 |
+
if not valid:
|
| 40 |
+
return None
|
| 41 |
+
return sum(valid) / len(valid)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def compare_scores(left: float | None, right: float | None, lower_better: bool) -> int:
|
| 45 |
+
if left is None and right is None:
|
| 46 |
+
return 0
|
| 47 |
+
if left is None:
|
| 48 |
+
return 1
|
| 49 |
+
if right is None:
|
| 50 |
+
return -1
|
| 51 |
+
if left == right:
|
| 52 |
+
return 0
|
| 53 |
+
if lower_better:
|
| 54 |
+
return -1 if left < right else 1
|
| 55 |
+
return -1 if left > right else 1
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def ordered_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 59 |
+
def sort_key(task: dict[str, Any]) -> tuple[int, str]:
|
| 60 |
+
try:
|
| 61 |
+
task_index = TASK_ORDER.index(task["id"])
|
| 62 |
+
except ValueError:
|
| 63 |
+
task_index = 10**6
|
| 64 |
+
return task_index, task["label"]
|
| 65 |
+
|
| 66 |
+
return sorted(tasks, key=sort_key)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def dataset_ids(task: dict[str, Any]) -> list[str]:
|
| 70 |
+
return [dataset["id"] for dataset in task.get("datasets", [])]
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def metric_value(entry: dict[str, Any], task_id: str, dataset_id: str) -> float | None:
|
| 74 |
+
dataset = entry.get("tasks", {}).get(task_id, {}).get(dataset_id)
|
| 75 |
+
if not dataset:
|
| 76 |
+
return None
|
| 77 |
+
return to_num(dataset.get("value"))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def metric_display(entry: dict[str, Any], task_id: str, dataset_id: str) -> str:
|
| 81 |
+
dataset = entry.get("tasks", {}).get(task_id, {}).get(dataset_id)
|
| 82 |
+
if not dataset:
|
| 83 |
+
return "-"
|
| 84 |
+
if dataset.get("display") is not None:
|
| 85 |
+
return str(dataset["display"])
|
| 86 |
+
if dataset.get("value") is None:
|
| 87 |
+
return "-"
|
| 88 |
+
return str(dataset["value"])
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def compute_task_overall(entry: dict[str, Any], task: dict[str, Any]) -> float | None:
|
| 92 |
+
return average([metric_value(entry, task["id"], dataset_id) for dataset_id in dataset_ids(task)])
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def normalize_task_scores(entries: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> dict[str, dict[str, float] | None]:
|
| 96 |
+
ranges: dict[str, dict[str, float] | None] = {}
|
| 97 |
+
for task in tasks:
|
| 98 |
+
values = [entry["task_overall"][task["id"]] for entry in entries if entry["task_overall"][task["id"]] is not None]
|
| 99 |
+
if not values:
|
| 100 |
+
ranges[task["id"]] = None
|
| 101 |
+
continue
|
| 102 |
+
ranges[task["id"]] = {"min": min(values), "max": max(values)}
|
| 103 |
+
return ranges
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def normalized_score(value: float | None, score_range: dict[str, float] | None, lower_better: bool) -> float | None:
|
| 107 |
+
if value is None or score_range is None:
|
| 108 |
+
return None
|
| 109 |
+
if score_range["min"] == score_range["max"]:
|
| 110 |
+
return 100.0
|
| 111 |
+
if lower_better:
|
| 112 |
+
return ((score_range["max"] - value) / (score_range["max"] - score_range["min"])) * 100
|
| 113 |
+
return ((value - score_range["min"]) / (score_range["max"] - score_range["min"])) * 100
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def enrich_entries(entries: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 117 |
+
entries_with_task_overall = []
|
| 118 |
+
for entry in entries:
|
| 119 |
+
task_overall = {}
|
| 120 |
+
for task in tasks:
|
| 121 |
+
task_overall[task["id"]] = compute_task_overall(entry, task)
|
| 122 |
+
entries_with_task_overall.append({**entry, "task_overall": task_overall})
|
| 123 |
+
|
| 124 |
+
task_ranges = normalize_task_scores(entries_with_task_overall, tasks)
|
| 125 |
+
enriched_entries = []
|
| 126 |
+
for entry in entries_with_task_overall:
|
| 127 |
+
normalized_task_scores = {}
|
| 128 |
+
for task in tasks:
|
| 129 |
+
normalized_task_scores[task["id"]] = normalized_score(
|
| 130 |
+
entry["task_overall"][task["id"]],
|
| 131 |
+
task_ranges[task["id"]],
|
| 132 |
+
task["lowerBetter"],
|
| 133 |
+
)
|
| 134 |
+
enriched_entries.append(
|
| 135 |
+
{
|
| 136 |
+
**entry,
|
| 137 |
+
"normalized_task_scores": normalized_task_scores,
|
| 138 |
+
"overall": average([normalized_task_scores[task["id"]] for task in tasks]),
|
| 139 |
+
}
|
| 140 |
+
)
|
| 141 |
+
return enriched_entries
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def sort_overall(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 145 |
+
sorted_entries = sorted(
|
| 146 |
+
entries,
|
| 147 |
+
key=cmp_to_key(lambda left, right: compare_scores(left["overall"], right["overall"], False)),
|
| 148 |
+
)
|
| 149 |
+
return [{**entry, "rank": index} for index, entry in enumerate(sorted_entries, start=1)]
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def sort_task(entries: list[dict[str, Any]], task: dict[str, Any], dataset_id: str) -> list[dict[str, Any]]:
|
| 153 |
+
def compare(left: dict[str, Any], right: dict[str, Any]) -> int:
|
| 154 |
+
left_value = left["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(left, task["id"], dataset_id)
|
| 155 |
+
right_value = right["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(right, task["id"], dataset_id)
|
| 156 |
+
return compare_scores(left_value, right_value, task["lowerBetter"])
|
| 157 |
+
|
| 158 |
+
sorted_entries = sorted(entries, key=cmp_to_key(compare))
|
| 159 |
+
return [{**entry, "rank": index} for index, entry in enumerate(sorted_entries, start=1)]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def metric_class(lower_better: bool, value: float | None) -> str:
|
| 163 |
+
if value is None:
|
| 164 |
+
return "muted"
|
| 165 |
+
return "metric-bad" if lower_better else "metric-good"
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def fmt_score(value: float | None) -> str:
|
| 169 |
+
return "-" if value is None else f"{value:.2f}"
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def image_data_uri(path: Path) -> str:
|
| 173 |
+
encoded = base64.b64encode(path.read_bytes()).decode("ascii")
|
| 174 |
+
suffix = path.suffix.lower().lstrip(".") or "png"
|
| 175 |
+
mime = "image/png" if suffix == "png" else f"image/{suffix}"
|
| 176 |
+
return f"data:{mime};base64,{encoded}"
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def load_payload() -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
| 180 |
+
payload = json.loads(DATA_PATH.read_text(encoding="utf-8"))
|
| 181 |
+
tasks = ordered_tasks(payload.get("tasks", []))
|
| 182 |
+
entries = enrich_entries(payload.get("entries", []), tasks)
|
| 183 |
+
return tasks, entries
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
TASKS, ENTRIES = load_payload()
|
| 187 |
+
TASK_MAP = {task["id"]: task for task in TASKS}
|
| 188 |
+
RANKED_OVERALL = sort_overall(ENTRIES)
|
| 189 |
+
BADGE_IMAGES = {
|
| 190 |
+
1: image_data_uri(IMAGES_DIR / "1st.png"),
|
| 191 |
+
2: image_data_uri(IMAGES_DIR / "2nd.png"),
|
| 192 |
+
3: image_data_uri(IMAGES_DIR / "3rd.png"),
|
| 193 |
+
}
|
| 194 |
+
EXTERNAL_LINK_IMAGE = image_data_uri(IMAGES_DIR / "external-link.png")
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def menu_choices() -> list[tuple[str, str]]:
|
| 198 |
+
choices = [("Home\nOverall ranking", HOME_VIEW)]
|
| 199 |
+
for task in TASKS:
|
| 200 |
+
choices.append((f"{task['label']}\n{len(task['datasets'])} datasets", task["id"]))
|
| 201 |
+
return choices
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def dataset_choices(task_id: str) -> list[str]:
|
| 205 |
+
task = TASK_MAP[task_id]
|
| 206 |
+
return ["Overall", *[dataset["id"] for dataset in task.get("datasets", [])]]
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def render_rank_strip(entries: list[dict[str, Any]]) -> str:
|
| 210 |
+
cards = []
|
| 211 |
+
for entry in entries[:12]:
|
| 212 |
+
if entry["rank"] in BADGE_IMAGES:
|
| 213 |
+
badge = f'<img class="rank-badge-image" src="{BADGE_IMAGES[entry["rank"]]}" alt="{entry["rank"]} place" />'
|
| 214 |
+
else:
|
| 215 |
+
badge = f'<span class="rank-badge">#{entry["rank"]}</span>'
|
| 216 |
+
cards.append(
|
| 217 |
+
f"""
|
| 218 |
+
<article class="rank-pill">
|
| 219 |
+
<div class="rank-badge-wrap">{badge}</div>
|
| 220 |
+
<span class="rank-name">{escape(entry["rank_name"])}</span>
|
| 221 |
+
</article>
|
| 222 |
+
"""
|
| 223 |
+
)
|
| 224 |
+
return f"""
|
| 225 |
+
<section class="section-card card">
|
| 226 |
+
<div class="section-head">
|
| 227 |
+
<h3>Top Ranking</h3>
|
| 228 |
+
</div>
|
| 229 |
+
<div class="rank-strip-list">
|
| 230 |
+
{"".join(cards)}
|
| 231 |
+
</div>
|
| 232 |
+
</section>
|
| 233 |
+
"""
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def render_home_table(entries: list[dict[str, Any]]) -> str:
|
| 237 |
+
header_top = [
|
| 238 |
+
'<th rowspan="2" class="rank-col col-rank">Rank</th>',
|
| 239 |
+
'<th rowspan="2" class="col-rankname">RankName</th>',
|
| 240 |
+
'<th rowspan="2" class="col-model">Model</th>',
|
| 241 |
+
'<th rowspan="2">URL</th>',
|
| 242 |
+
'<th rowspan="2">Overall</th>',
|
| 243 |
+
*[f'<th class="grouped" colspan="1">{escape(task["label"])}</th>' for task in TASKS],
|
| 244 |
+
]
|
| 245 |
+
header_bottom = [f'<th>{escape(task["shortMetric"])}</th>' for task in TASKS]
|
| 246 |
+
|
| 247 |
+
rows = []
|
| 248 |
+
for entry in entries:
|
| 249 |
+
url = entry.get("url") or ""
|
| 250 |
+
if url:
|
| 251 |
+
url_cell = (
|
| 252 |
+
f'<a class="url-link" href="{escape(url)}" target="_blank" rel="noopener noreferrer" '
|
| 253 |
+
f'aria-label="External link"><img src="{EXTERNAL_LINK_IMAGE}" alt="" /></a>'
|
| 254 |
+
)
|
| 255 |
+
else:
|
| 256 |
+
url_cell = "-"
|
| 257 |
+
|
| 258 |
+
task_cells = []
|
| 259 |
+
for task in TASKS:
|
| 260 |
+
value = entry["task_overall"][task["id"]]
|
| 261 |
+
task_cells.append(f'<td><span class="{metric_class(task["lowerBetter"], value)}">{fmt_score(value)}</span></td>')
|
| 262 |
+
|
| 263 |
+
rows.append(
|
| 264 |
+
"<tr>"
|
| 265 |
+
f'<td class="rank-col col-rank">{entry["rank"]}</td>'
|
| 266 |
+
f'<td class="col-rankname">{escape(entry["rank_name"])}</td>'
|
| 267 |
+
f'<td class="col-model">{escape(entry.get("model") or entry["rank_name"])}</td>'
|
| 268 |
+
f"<td>{url_cell}</td>"
|
| 269 |
+
f"<td>{fmt_score(entry['overall'])}</td>"
|
| 270 |
+
f"{''.join(task_cells)}"
|
| 271 |
+
"</tr>"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
return f"""
|
| 275 |
+
<section class="section-card card">
|
| 276 |
+
<div class="section-head">
|
| 277 |
+
<h3>Overall Leaderboard</h3>
|
| 278 |
+
</div>
|
| 279 |
+
<div class="table-scroll">
|
| 280 |
+
<table>
|
| 281 |
+
<colgroup>
|
| 282 |
+
<col class="col-rank" />
|
| 283 |
+
<col class="col-rankname" />
|
| 284 |
+
<col class="col-model" />
|
| 285 |
+
<col />
|
| 286 |
+
<col />
|
| 287 |
+
{"".join("<col />" for _ in TASKS)}
|
| 288 |
+
</colgroup>
|
| 289 |
+
<thead>
|
| 290 |
+
<tr>{"".join(header_top)}</tr>
|
| 291 |
+
<tr>{"".join(header_bottom)}</tr>
|
| 292 |
+
</thead>
|
| 293 |
+
<tbody>{"".join(rows)}</tbody>
|
| 294 |
+
</table>
|
| 295 |
+
</div>
|
| 296 |
+
</section>
|
| 297 |
+
"""
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def render_home() -> str:
|
| 301 |
+
return f"{render_rank_strip(RANKED_OVERALL)}{render_home_table(RANKED_OVERALL)}"
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
def render_task_title(task: dict[str, Any]) -> str:
|
| 305 |
+
return f"""
|
| 306 |
+
<div class="section-head">
|
| 307 |
+
<div>
|
| 308 |
+
<h3 class="task-title">Task : {escape(task["label"])}</h3>
|
| 309 |
+
</div>
|
| 310 |
+
</div>
|
| 311 |
+
"""
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def render_task_table(task: dict[str, Any], dataset_id: str) -> str:
|
| 315 |
+
ranked_entries = sort_task(ENTRIES, task, dataset_id)
|
| 316 |
+
active_label = "Overall"
|
| 317 |
+
if dataset_id != "Overall":
|
| 318 |
+
active_label = next(
|
| 319 |
+
(dataset["label"] for dataset in task["datasets"] if dataset["id"] == dataset_id),
|
| 320 |
+
dataset_id,
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
rows = []
|
| 324 |
+
for entry in ranked_entries:
|
| 325 |
+
numeric_value = entry["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(entry, task["id"], dataset_id)
|
| 326 |
+
display_value = fmt_score(numeric_value) if dataset_id == "Overall" else metric_display(entry, task["id"], dataset_id)
|
| 327 |
+
rows.append(
|
| 328 |
+
"<tr>"
|
| 329 |
+
f'<td class="rank-col col-rank">{entry["rank"]}</td>'
|
| 330 |
+
f'<td class="col-rankname">{escape(entry["rank_name"])}</td>'
|
| 331 |
+
f'<td class="col-model">{escape(entry.get("model") or entry["rank_name"])}</td>'
|
| 332 |
+
f'<td><span class="{metric_class(task["lowerBetter"], numeric_value)}">{escape(display_value)}</span></td>'
|
| 333 |
+
"</tr>"
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
return f"""
|
| 337 |
+
<div id="taskTableMount">
|
| 338 |
+
<div class="table-scroll">
|
| 339 |
+
<table class="task-performance-table">
|
| 340 |
+
<colgroup>
|
| 341 |
+
<col class="col-rank" />
|
| 342 |
+
<col class="col-rankname" />
|
| 343 |
+
<col class="col-model" />
|
| 344 |
+
<col />
|
| 345 |
+
</colgroup>
|
| 346 |
+
<thead>
|
| 347 |
+
<tr>
|
| 348 |
+
<th class="rank-col col-rank">Rank</th>
|
| 349 |
+
<th class="col-rankname">RankName</th>
|
| 350 |
+
<th class="col-model">Model</th>
|
| 351 |
+
<th>{escape(active_label)}</th>
|
| 352 |
+
</tr>
|
| 353 |
+
</thead>
|
| 354 |
+
<tbody>{"".join(rows)}</tbody>
|
| 355 |
+
</table>
|
| 356 |
+
</div>
|
| 357 |
+
</div>
|
| 358 |
+
"""
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def update_view(active_view: str, current_dataset: str | None) -> tuple[Any, Any, str, str, str]:
|
| 362 |
+
if active_view == HOME_VIEW:
|
| 363 |
+
first_task = TASKS[0]
|
| 364 |
+
return (
|
| 365 |
+
gr.update(visible=True),
|
| 366 |
+
gr.update(visible="hidden"),
|
| 367 |
+
gr.update(choices=dataset_choices(first_task["id"]), value="Overall"),
|
| 368 |
+
render_task_title(first_task),
|
| 369 |
+
first_task["metricLabel"],
|
| 370 |
+
render_task_table(first_task, "Overall"),
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
task = TASK_MAP[active_view]
|
| 374 |
+
choices = dataset_choices(active_view)
|
| 375 |
+
dataset_id = current_dataset if current_dataset in choices else "Overall"
|
| 376 |
+
return (
|
| 377 |
+
gr.update(visible="hidden"),
|
| 378 |
+
gr.update(visible=True),
|
| 379 |
+
gr.update(choices=choices, value=dataset_id),
|
| 380 |
+
render_task_title(task),
|
| 381 |
+
task["metricLabel"],
|
| 382 |
+
render_task_table(task, dataset_id),
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
CUSTOM_CSS = """
|
| 387 |
+
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&family=Noto+Sans+KR:wght@400;500;700&display=swap');
|
| 388 |
+
|
| 389 |
+
:root {
|
| 390 |
+
--bg: #f2f4f8;
|
| 391 |
+
--bg-strong: #dde5f2;
|
| 392 |
+
--panel: rgba(255, 255, 255, 0.84);
|
| 393 |
+
--panel-strong: #ffffff;
|
| 394 |
+
--text: #0c1730;
|
| 395 |
+
--muted: #66748b;
|
| 396 |
+
--line: rgba(12, 23, 48, 0.14);
|
| 397 |
+
--primary: #0e3a8a;
|
| 398 |
+
--accent: #c56b12;
|
| 399 |
+
--pending: #74674a;
|
| 400 |
+
--pending-bg: #f4ecd8;
|
| 401 |
+
--success: #0c8f61;
|
| 402 |
+
--danger: #b8612f;
|
| 403 |
+
--shadow: 0 14px 38px rgba(12, 23, 48, 0.08);
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
html, body, .gradio-container {
|
| 407 |
+
margin: 0 !important;
|
| 408 |
+
min-height: 100vh;
|
| 409 |
+
font-family: "Noto Sans KR", "Space Grotesk", sans-serif !important;
|
| 410 |
+
color: var(--text);
|
| 411 |
+
background:
|
| 412 |
+
radial-gradient(circle at 15% 0%, #dce8ff 0%, transparent 32%),
|
| 413 |
+
radial-gradient(circle at 95% 5%, #ffe4cf 0%, transparent 24%),
|
| 414 |
+
linear-gradient(180deg, #f8fbff 0%, #eef2f7 100%) !important;
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
.gradio-container {
|
| 418 |
+
max-width: 100% !important;
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
.app-root {
|
| 422 |
+
position: relative;
|
| 423 |
+
max-width: 1440px;
|
| 424 |
+
margin: 0 auto;
|
| 425 |
+
padding: 24px 20px 56px;
|
| 426 |
+
overflow-x: auto;
|
| 427 |
+
-webkit-overflow-scrolling: touch;
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
.bg-orb {
|
| 431 |
+
position: fixed;
|
| 432 |
+
border-radius: 999px;
|
| 433 |
+
filter: blur(70px);
|
| 434 |
+
opacity: 0.45;
|
| 435 |
+
pointer-events: none;
|
| 436 |
+
z-index: 0;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
.orb-1 {
|
| 440 |
+
width: 280px;
|
| 441 |
+
height: 280px;
|
| 442 |
+
background: #a7c5ff;
|
| 443 |
+
top: -110px;
|
| 444 |
+
left: -80px;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
.orb-2 {
|
| 448 |
+
width: 240px;
|
| 449 |
+
height: 240px;
|
| 450 |
+
background: #ffd2a4;
|
| 451 |
+
top: -70px;
|
| 452 |
+
right: -60px;
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
.layout-row {
|
| 456 |
+
position: relative;
|
| 457 |
+
z-index: 1;
|
| 458 |
+
flex-wrap: nowrap !important;
|
| 459 |
+
gap: 24px;
|
| 460 |
+
align-items: flex-start;
|
| 461 |
+
min-width: 1180px;
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
.sidebar-panel {
|
| 465 |
+
flex: 0 0 260px !important;
|
| 466 |
+
width: 260px !important;
|
| 467 |
+
min-width: 260px !important;
|
| 468 |
+
max-width: 260px !important;
|
| 469 |
+
position: sticky;
|
| 470 |
+
top: 20px;
|
| 471 |
+
align-self: start;
|
| 472 |
+
border: 1px solid var(--line);
|
| 473 |
+
border-radius: 26px;
|
| 474 |
+
background: rgba(7, 17, 40, 0.95);
|
| 475 |
+
color: #f5f7fb;
|
| 476 |
+
padding: 22px 18px;
|
| 477 |
+
box-shadow: var(--shadow);
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
.sidebar-head {
|
| 481 |
+
margin-bottom: 26px;
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
.sidebar-kicker, .kicker {
|
| 485 |
+
margin: 0;
|
| 486 |
+
letter-spacing: 0.12em;
|
| 487 |
+
text-transform: uppercase;
|
| 488 |
+
font-size: 12px;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.sidebar-head h1 {
|
| 492 |
+
margin: 8px 0 0;
|
| 493 |
+
font-size: 34px;
|
| 494 |
+
line-height: 0.95;
|
| 495 |
+
color: #f5f7fb;
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
.content-panel {
|
| 499 |
+
flex: 1 1 auto !important;
|
| 500 |
+
min-width: 0;
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
.hero {
|
| 504 |
+
margin-bottom: 20px;
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
.hero-topline {
|
| 508 |
+
display: flex;
|
| 509 |
+
align-items: flex-start;
|
| 510 |
+
justify-content: space-between;
|
| 511 |
+
gap: 20px;
|
| 512 |
+
text-align: center;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.hero-topline > div {
|
| 516 |
+
flex: 1;
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
.hero h2 {
|
| 520 |
+
margin: 8px 0 6px;
|
| 521 |
+
font-family: "Space Grotesk", sans-serif;
|
| 522 |
+
font-size: clamp(42px, 7vw, 76px);
|
| 523 |
+
line-height: 0.95;
|
| 524 |
+
color: var(--text);
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
.desc {
|
| 528 |
+
margin: 0 auto;
|
| 529 |
+
max-width: 760px;
|
| 530 |
+
color: var(--muted);
|
| 531 |
+
font-size: 17px;
|
| 532 |
+
}
|
| 533 |
+
|
| 534 |
+
.card {
|
| 535 |
+
background: var(--panel);
|
| 536 |
+
border: 1px solid var(--line);
|
| 537 |
+
border-radius: 24px;
|
| 538 |
+
box-shadow: var(--shadow);
|
| 539 |
+
backdrop-filter: blur(8px);
|
| 540 |
+
}
|
| 541 |
+
|
| 542 |
+
.section-card {
|
| 543 |
+
padding: 18px;
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
.section-head {
|
| 547 |
+
display: flex;
|
| 548 |
+
align-items: baseline;
|
| 549 |
+
justify-content: space-between;
|
| 550 |
+
gap: 12px;
|
| 551 |
+
margin-bottom: 12px;
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
.section-head h3,
|
| 555 |
+
.task-title {
|
| 556 |
+
margin: 0;
|
| 557 |
+
font-size: 28px;
|
| 558 |
+
font-family: "Space Grotesk", sans-serif;
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
.rank-strip-list {
|
| 562 |
+
display: grid;
|
| 563 |
+
grid-template-columns: repeat(6, minmax(120px, 1fr));
|
| 564 |
+
gap: 10px;
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
.rank-pill {
|
| 568 |
+
border: 1px solid var(--line);
|
| 569 |
+
border-radius: 16px;
|
| 570 |
+
padding: 12px;
|
| 571 |
+
background: linear-gradient(135deg, #ffffff 0%, var(--bg-strong) 100%);
|
| 572 |
+
}
|
| 573 |
+
|
| 574 |
+
.rank-badge-wrap {
|
| 575 |
+
min-height: 28px;
|
| 576 |
+
}
|
| 577 |
+
|
| 578 |
+
.rank-badge {
|
| 579 |
+
display: inline-block;
|
| 580 |
+
padding: 3px 8px;
|
| 581 |
+
border-radius: 999px;
|
| 582 |
+
background: var(--primary);
|
| 583 |
+
color: #fff;
|
| 584 |
+
font-size: 12px;
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
.rank-badge-image {
|
| 588 |
+
display: block;
|
| 589 |
+
width: auto;
|
| 590 |
+
height: 28px;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
.rank-name {
|
| 594 |
+
display: block;
|
| 595 |
+
margin-top: 8px;
|
| 596 |
+
font-weight: 700;
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
.table-scroll {
|
| 600 |
+
overflow-x: auto;
|
| 601 |
+
overflow-y: visible;
|
| 602 |
+
border-radius: 0;
|
| 603 |
+
border: 1px solid #ffffff !important;
|
| 604 |
+
background: var(--panel-strong);
|
| 605 |
+
}
|
| 606 |
+
|
| 607 |
+
table {
|
| 608 |
+
width: 100%;
|
| 609 |
+
min-width: 1080px;
|
| 610 |
+
border-collapse: collapse;
|
| 611 |
+
border: 1px solid #ffffff !important;
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
.task-performance-table {
|
| 615 |
+
table-layout: fixed;
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
thead th {
|
| 619 |
+
background: #e8edf6;
|
| 620 |
+
border-bottom: 1px solid #ffffff !important;
|
| 621 |
+
white-space: nowrap;
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
thead tr:first-child th.grouped {
|
| 625 |
+
text-align: center;
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
th, td {
|
| 629 |
+
padding: 12px 14px;
|
| 630 |
+
text-align: left;
|
| 631 |
+
border-bottom: 1px solid #ffffff !important;
|
| 632 |
+
border-right: 1px solid #ffffff !important;
|
| 633 |
+
font-size: 14px;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
th:first-child,
|
| 637 |
+
td:first-child {
|
| 638 |
+
border-left: 1px solid #ffffff !important;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
th:last-child,
|
| 642 |
+
td:last-child {
|
| 643 |
+
border-right: 0;
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
.table-scroll table,
|
| 647 |
+
.table-scroll thead,
|
| 648 |
+
.table-scroll tbody,
|
| 649 |
+
.table-scroll tr,
|
| 650 |
+
.table-scroll th,
|
| 651 |
+
.table-scroll td {
|
| 652 |
+
border-color: #ffffff !important;
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
tbody tr:hover {
|
| 656 |
+
background: #f8fbff;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
.rank-col {
|
| 660 |
+
font-family: "Space Grotesk", sans-serif;
|
| 661 |
+
font-weight: 700;
|
| 662 |
+
width: 72px;
|
| 663 |
+
}
|
| 664 |
+
|
| 665 |
+
.col-rank {
|
| 666 |
+
width: 96px;
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
.col-rankname {
|
| 670 |
+
width: 240px;
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
.col-model {
|
| 674 |
+
width: 320px;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
.url-link {
|
| 678 |
+
display: inline-flex;
|
| 679 |
+
align-items: center;
|
| 680 |
+
justify-content: center;
|
| 681 |
+
width: 24px;
|
| 682 |
+
height: 24px;
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
.url-link img {
|
| 686 |
+
display: block;
|
| 687 |
+
width: 18px;
|
| 688 |
+
height: 18px;
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
.metric-good {
|
| 692 |
+
color: var(--success);
|
| 693 |
+
font-weight: 700;
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
.metric-bad {
|
| 697 |
+
color: var(--danger);
|
| 698 |
+
font-weight: 700;
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
.muted {
|
| 702 |
+
color: var(--muted);
|
| 703 |
+
}
|
| 704 |
+
|
| 705 |
+
.task-filters {
|
| 706 |
+
display: flex;
|
| 707 |
+
flex-wrap: nowrap;
|
| 708 |
+
align-items: flex-start;
|
| 709 |
+
gap: 12px;
|
| 710 |
+
margin-top: 12px;
|
| 711 |
+
width: 100%;
|
| 712 |
+
overflow-x: auto;
|
| 713 |
+
-webkit-overflow-scrolling: touch;
|
| 714 |
+
padding-bottom: 6px;
|
| 715 |
+
background: transparent !important;
|
| 716 |
+
border: 0 !important;
|
| 717 |
+
box-shadow: none !important;
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
.task-filters .dataset-wrap { flex: 0 0 620px; min-width: 620px; }
|
| 721 |
+
.task-filters .metric-wrap { flex: 0 0 340px; min-width: 340px; }
|
| 722 |
+
|
| 723 |
+
.task-filters .dataset-wrap,
|
| 724 |
+
.task-filters .metric-wrap,
|
| 725 |
+
.task-filters .dataset-wrap > div,
|
| 726 |
+
.task-filters .metric-wrap > div {
|
| 727 |
+
background: transparent !important;
|
| 728 |
+
border: 0 !important;
|
| 729 |
+
box-shadow: none !important;
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
/* Keep Dataset / Metric columns pinned to the same top baseline. */
|
| 733 |
+
.task-filters .dataset-wrap,
|
| 734 |
+
.task-filters .metric-wrap {
|
| 735 |
+
align-self: flex-start !important;
|
| 736 |
+
justify-self: flex-start !important;
|
| 737 |
+
margin-top: 0 !important;
|
| 738 |
+
padding-top: 0 !important;
|
| 739 |
+
}
|
| 740 |
+
|
| 741 |
+
.task-view-shell .filter-title {
|
| 742 |
+
margin: 0 0 6px 0 !important;
|
| 743 |
+
font-size: 13px !important;
|
| 744 |
+
color: var(--muted) !important;
|
| 745 |
+
line-height: 1.2 !important;
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
#taskTableMount {
|
| 749 |
+
margin-top: 18px;
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
.task-menu-radio {
|
| 753 |
+
gap: 8px;
|
| 754 |
+
background: transparent !important;
|
| 755 |
+
border: 0 !important;
|
| 756 |
+
box-shadow: none !important;
|
| 757 |
+
padding: 0 !important;
|
| 758 |
+
}
|
| 759 |
+
|
| 760 |
+
.task-menu-radio > div,
|
| 761 |
+
.task-menu-radio .block,
|
| 762 |
+
.task-menu-radio .gradio-radio,
|
| 763 |
+
.task-menu-radio .form {
|
| 764 |
+
background: transparent !important;
|
| 765 |
+
border: 0 !important;
|
| 766 |
+
box-shadow: none !important;
|
| 767 |
+
padding: 0 !important;
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
.task-menu-radio label > span,
|
| 771 |
+
.task-menu-radio label > div,
|
| 772 |
+
.task-menu-radio label .wrap {
|
| 773 |
+
white-space: pre-line !important;
|
| 774 |
+
}
|
| 775 |
+
|
| 776 |
+
.task-menu-radio label:has(input[type="radio"]) {
|
| 777 |
+
width: 100%;
|
| 778 |
+
margin: 0 !important;
|
| 779 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
| 780 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
| 781 |
+
color: #f5f7fb !important;
|
| 782 |
+
border-radius: 14px !important;
|
| 783 |
+
padding: 12px 14px !important;
|
| 784 |
+
cursor: pointer !important;
|
| 785 |
+
min-height: 72px;
|
| 786 |
+
align-content: center;
|
| 787 |
+
box-shadow: none !important;
|
| 788 |
+
transition: background 0.18s ease, color 0.18s ease, border-color 0.18s ease;
|
| 789 |
+
}
|
| 790 |
+
|
| 791 |
+
.task-menu-radio label:has(input[type="radio"]):hover {
|
| 792 |
+
background: rgba(255, 255, 255, 0.1) !important;
|
| 793 |
+
}
|
| 794 |
+
|
| 795 |
+
.task-menu-radio label:has(input[type="radio"]:checked) {
|
| 796 |
+
background: linear-gradient(135deg, #fcf7eb 0%, #dfeaff 100%) !important;
|
| 797 |
+
color: var(--text) !important;
|
| 798 |
+
border-color: transparent !important;
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
.task-menu-radio input[type="radio"] {
|
| 802 |
+
display: none !important;
|
| 803 |
+
}
|
| 804 |
+
|
| 805 |
+
.task-menu-radio label span,
|
| 806 |
+
.task-menu-radio label div,
|
| 807 |
+
.task-menu-radio label p {
|
| 808 |
+
color: inherit !important;
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
.task-menu-radio label:has(input[type="radio"]) span,
|
| 812 |
+
.task-menu-radio label:has(input[type="radio"]) div {
|
| 813 |
+
color: #f5f7fb !important;
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
.task-menu-radio label:has(input[type="radio"]:checked) span,
|
| 817 |
+
.task-menu-radio label:has(input[type="radio"]:checked) div {
|
| 818 |
+
color: var(--text) !important;
|
| 819 |
+
}
|
| 820 |
+
|
| 821 |
+
.task-menu-radio .wrap,
|
| 822 |
+
.task-menu-radio label span:last-child {
|
| 823 |
+
opacity: 0.72;
|
| 824 |
+
font-size: 12px;
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
+
.task-view-shell .gradio-radio,
|
| 828 |
+
.task-view-shell .gradio-textbox {
|
| 829 |
+
margin: 0 !important;
|
| 830 |
+
min-width: 0 !important;
|
| 831 |
+
background: transparent !important;
|
| 832 |
+
border: 0 !important;
|
| 833 |
+
box-shadow: none !important;
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
.task-view-shell .gradio-radio label,
|
| 837 |
+
.task-view-shell .gradio-textbox label {
|
| 838 |
+
font-size: 13px !important;
|
| 839 |
+
color: var(--muted) !important;
|
| 840 |
+
}
|
| 841 |
+
|
| 842 |
+
.task-view-shell .dataset-radio {
|
| 843 |
+
background: transparent !important;
|
| 844 |
+
border: 0 !important;
|
| 845 |
+
box-shadow: none !important;
|
| 846 |
+
padding: 0 !important;
|
| 847 |
+
margin-left: 0 !important;
|
| 848 |
+
margin-bottom: 12px !important;
|
| 849 |
+
max-width: 100% !important;
|
| 850 |
+
width: 100% !important;
|
| 851 |
+
}
|
| 852 |
+
|
| 853 |
+
.task-view-shell .dataset-radio > div,
|
| 854 |
+
.task-view-shell .dataset-radio .block,
|
| 855 |
+
.task-view-shell .dataset-radio .form {
|
| 856 |
+
background: transparent !important;
|
| 857 |
+
border: 0 !important;
|
| 858 |
+
box-shadow: none !important;
|
| 859 |
+
padding: 0 !important;
|
| 860 |
+
display: grid !important;
|
| 861 |
+
grid-template-columns: repeat(2, minmax(180px, 1fr)) !important;
|
| 862 |
+
gap: 8px 10px !important;
|
| 863 |
+
align-items: start !important;
|
| 864 |
+
justify-content: start !important;
|
| 865 |
+
min-height: 86px !important;
|
| 866 |
+
align-content: start !important;
|
| 867 |
+
max-width: 100% !important;
|
| 868 |
+
width: 100% !important;
|
| 869 |
+
}
|
| 870 |
+
|
| 871 |
+
.task-view-shell .dataset-radio label:has(input[type="radio"]) {
|
| 872 |
+
margin: 0 !important;
|
| 873 |
+
border: 1px solid var(--line) !important;
|
| 874 |
+
background: rgba(255, 255, 255, 0.88) !important;
|
| 875 |
+
color: var(--text) !important;
|
| 876 |
+
border-radius: 10px !important;
|
| 877 |
+
padding: 0 12px !important;
|
| 878 |
+
height: 40px !important;
|
| 879 |
+
min-height: 40px !important;
|
| 880 |
+
width: auto !important;
|
| 881 |
+
display: flex !important;
|
| 882 |
+
align-items: center !important;
|
| 883 |
+
box-shadow: none !important;
|
| 884 |
+
justify-content: flex-start !important;
|
| 885 |
+
font-size: 14px !important;
|
| 886 |
+
line-height: 1.2 !important;
|
| 887 |
+
}
|
| 888 |
+
|
| 889 |
+
.task-view-shell .dataset-radio label:has(input[type="radio"]:checked) {
|
| 890 |
+
background: linear-gradient(135deg, #fcf7eb 0%, #dfeaff 100%) !important;
|
| 891 |
+
border-color: rgba(14, 58, 138, 0.22) !important;
|
| 892 |
+
font-weight: 700 !important;
|
| 893 |
+
}
|
| 894 |
+
|
| 895 |
+
.task-view-shell .dataset-radio input[type="radio"] {
|
| 896 |
+
display: none !important;
|
| 897 |
+
}
|
| 898 |
+
|
| 899 |
+
.task-view-shell .metric-field,
|
| 900 |
+
.task-view-shell .dataset-field {
|
| 901 |
+
align-self: flex-start !important;
|
| 902 |
+
background: transparent !important;
|
| 903 |
+
}
|
| 904 |
+
|
| 905 |
+
/* Remove any residual top spacing on metric box so it aligns with Dataset. */
|
| 906 |
+
.task-view-shell .metric-wrap .metric-field,
|
| 907 |
+
.task-view-shell .metric-wrap .gradio-textbox,
|
| 908 |
+
.task-view-shell .metric-wrap .gradio-textbox > div,
|
| 909 |
+
.task-view-shell .metric-wrap .gradio-textbox .block,
|
| 910 |
+
.task-view-shell .metric-wrap .gradio-textbox .form,
|
| 911 |
+
.task-view-shell .metric-wrap .gradio-textbox .wrap {
|
| 912 |
+
margin-top: 0 !important;
|
| 913 |
+
padding-top: 0 !important;
|
| 914 |
+
}
|
| 915 |
+
|
| 916 |
+
.task-view-shell .metric-field > div,
|
| 917 |
+
.task-view-shell .dataset-field > div {
|
| 918 |
+
background: transparent !important;
|
| 919 |
+
}
|
| 920 |
+
|
| 921 |
+
.task-view-shell .metric-field .gradio-textbox,
|
| 922 |
+
.task-view-shell .metric-field .gradio-textbox > div,
|
| 923 |
+
.task-view-shell .metric-field .gradio-textbox .block,
|
| 924 |
+
.task-view-shell .metric-field .gradio-textbox .form {
|
| 925 |
+
background: transparent !important;
|
| 926 |
+
border: 0 !important;
|
| 927 |
+
box-shadow: none !important;
|
| 928 |
+
}
|
| 929 |
+
|
| 930 |
+
.task-view-shell .metric-field,
|
| 931 |
+
.task-view-shell .metric-field .gradio-textbox,
|
| 932 |
+
.task-view-shell .metric-field .gradio-textbox .wrap {
|
| 933 |
+
width: 100% !important;
|
| 934 |
+
max-width: none !important;
|
| 935 |
+
}
|
| 936 |
+
|
| 937 |
+
.task-view-shell .metric-field .gradio-textbox .wrap,
|
| 938 |
+
.task-view-shell .metric-field .gradio-textbox textarea,
|
| 939 |
+
.task-view-shell .metric-field .gradio-textbox input {
|
| 940 |
+
height: 40px !important;
|
| 941 |
+
min-height: 40px !important;
|
| 942 |
+
width: 100% !important;
|
| 943 |
+
}
|
| 944 |
+
|
| 945 |
+
.task-view-shell input,
|
| 946 |
+
.task-view-shell textarea,
|
| 947 |
+
.task-view-shell .wrap-inner,
|
| 948 |
+
.task-view-shell button.secondary-down-arrow,
|
| 949 |
+
.task-view-shell .gradio-textbox .wrap {
|
| 950 |
+
border-radius: 12px !important;
|
| 951 |
+
}
|
| 952 |
+
|
| 953 |
+
.task-view-shell .gradio-textbox .wrap,
|
| 954 |
+
.task-view-shell .gradio-textbox textarea,
|
| 955 |
+
.task-view-shell .gradio-textbox input {
|
| 956 |
+
border: 1px solid var(--line) !important;
|
| 957 |
+
background: rgba(255, 255, 255, 0.88) !important;
|
| 958 |
+
color: var(--text) !important;
|
| 959 |
+
}
|
| 960 |
+
|
| 961 |
+
@media (max-width: 1280px) {
|
| 962 |
+
.layout-row {
|
| 963 |
+
min-width: 1120px;
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
.task-filters .dataset-wrap { flex-basis: 560px; min-width: 560px; }
|
| 967 |
+
.task-filters .metric-wrap { flex-basis: 320px; min-width: 320px; }
|
| 968 |
+
|
| 969 |
+
.task-view-shell .dataset-radio > div,
|
| 970 |
+
.task-view-shell .dataset-radio .block,
|
| 971 |
+
.task-view-shell .dataset-radio .form {
|
| 972 |
+
grid-template-columns: repeat(2, minmax(150px, 1fr)) !important;
|
| 973 |
+
}
|
| 974 |
+
}
|
| 975 |
+
|
| 976 |
+
@media (max-width: 980px) {
|
| 977 |
+
.layout-row {
|
| 978 |
+
min-width: 1040px;
|
| 979 |
+
}
|
| 980 |
+
|
| 981 |
+
.rank-strip-list {
|
| 982 |
+
grid-template-columns: repeat(2, minmax(0, 1fr));
|
| 983 |
+
}
|
| 984 |
+
|
| 985 |
+
.task-view-shell .dataset-radio > div,
|
| 986 |
+
.task-view-shell .dataset-radio .block,
|
| 987 |
+
.task-view-shell .dataset-radio .form {
|
| 988 |
+
grid-template-columns: repeat(2, minmax(140px, 1fr)) !important;
|
| 989 |
+
}
|
| 990 |
+
}
|
| 991 |
+
|
| 992 |
+
@media (max-width: 720px) {
|
| 993 |
+
.app-root {
|
| 994 |
+
padding: 16px 14px 40px;
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
.hero-topline {
|
| 998 |
+
flex-direction: column;
|
| 999 |
+
align-items: center;
|
| 1000 |
+
}
|
| 1001 |
+
|
| 1002 |
+
.hero h2 {
|
| 1003 |
+
font-size: 40px;
|
| 1004 |
+
}
|
| 1005 |
+
|
| 1006 |
+
.rank-strip-list {
|
| 1007 |
+
grid-template-columns: 1fr;
|
| 1008 |
+
}
|
| 1009 |
+
|
| 1010 |
+
.task-view-shell .dataset-radio > div,
|
| 1011 |
+
.task-view-shell .dataset-radio .block,
|
| 1012 |
+
.task-view-shell .dataset-radio .form {
|
| 1013 |
+
grid-template-columns: 1fr !important;
|
| 1014 |
+
}
|
| 1015 |
+
}
|
| 1016 |
+
"""
|
| 1017 |
+
|
| 1018 |
+
|
| 1019 |
+
def build_app() -> gr.Blocks:
|
| 1020 |
+
with gr.Blocks(title="Ko-Speech-Eval Leaderboard", fill_width=True) as demo:
|
| 1021 |
+
with gr.Column(elem_classes=["app-root"]):
|
| 1022 |
+
gr.HTML('<div class="bg-orb orb-1"></div><div class="bg-orb orb-2"></div>')
|
| 1023 |
+
with gr.Row(elem_classes=["layout-row"]):
|
| 1024 |
+
with gr.Column(scale=0, min_width=260, elem_classes=["sidebar-panel"]):
|
| 1025 |
+
gr.HTML(
|
| 1026 |
+
"""
|
| 1027 |
+
<div class="sidebar-head">
|
| 1028 |
+
<p class="sidebar-kicker">KoALa-bench</p>
|
| 1029 |
+
<h1>Leaderboard</h1>
|
| 1030 |
+
</div>
|
| 1031 |
+
"""
|
| 1032 |
+
)
|
| 1033 |
+
menu = gr.Radio(
|
| 1034 |
+
choices=menu_choices(),
|
| 1035 |
+
value=HOME_VIEW,
|
| 1036 |
+
show_label=False,
|
| 1037 |
+
container=False,
|
| 1038 |
+
elem_classes=["task-menu", "task-menu-radio"],
|
| 1039 |
+
)
|
| 1040 |
+
|
| 1041 |
+
with gr.Column(scale=1, elem_classes=["content-panel"]):
|
| 1042 |
+
gr.HTML(
|
| 1043 |
+
"""
|
| 1044 |
+
<header class="hero">
|
| 1045 |
+
<div class="hero-topline">
|
| 1046 |
+
<div>
|
| 1047 |
+
<p class="kicker">Korean Audio Language benchmark</p>
|
| 1048 |
+
<h2>Leaderboard for KoALa</h2>
|
| 1049 |
+
</div>
|
| 1050 |
+
</div>
|
| 1051 |
+
</header>
|
| 1052 |
+
"""
|
| 1053 |
+
)
|
| 1054 |
+
|
| 1055 |
+
home_view = gr.HTML(render_home(), visible=True)
|
| 1056 |
+
|
| 1057 |
+
with gr.Column(visible="hidden", elem_classes=["task-view-shell", "section-card", "card"]) as task_view:
|
| 1058 |
+
task_title = gr.HTML()
|
| 1059 |
+
with gr.Row(elem_classes=["task-filters"]):
|
| 1060 |
+
with gr.Column(scale=3, min_width=420, elem_classes=["dataset-wrap"]):
|
| 1061 |
+
gr.HTML('<p class="filter-title">Dataset</p>')
|
| 1062 |
+
dataset_dropdown = gr.Radio(
|
| 1063 |
+
choices=dataset_choices(TASKS[0]["id"]),
|
| 1064 |
+
value="Overall",
|
| 1065 |
+
show_label=False,
|
| 1066 |
+
elem_classes=["dataset-field", "dataset-radio"],
|
| 1067 |
+
)
|
| 1068 |
+
with gr.Column(scale=2, min_width=280, elem_classes=["metric-wrap"]):
|
| 1069 |
+
gr.HTML('<p class="filter-title">Metric</p>')
|
| 1070 |
+
metric_text = gr.Textbox(
|
| 1071 |
+
show_label=False,
|
| 1072 |
+
interactive=False,
|
| 1073 |
+
elem_classes=["metric-field"],
|
| 1074 |
+
)
|
| 1075 |
+
task_table = gr.HTML()
|
| 1076 |
+
|
| 1077 |
+
menu.change(
|
| 1078 |
+
fn=update_view,
|
| 1079 |
+
inputs=[menu, dataset_dropdown],
|
| 1080 |
+
outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
|
| 1081 |
+
)
|
| 1082 |
+
dataset_dropdown.change(
|
| 1083 |
+
fn=update_view,
|
| 1084 |
+
inputs=[menu, dataset_dropdown],
|
| 1085 |
+
outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
|
| 1086 |
+
)
|
| 1087 |
+
demo.load(
|
| 1088 |
+
fn=update_view,
|
| 1089 |
+
inputs=[menu, dataset_dropdown],
|
| 1090 |
+
outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
|
| 1091 |
+
)
|
| 1092 |
+
return demo
|
| 1093 |
+
|
| 1094 |
+
|
| 1095 |
+
if __name__ == "__main__":
|
| 1096 |
+
app = build_app()
|
| 1097 |
+
app.launch(
|
| 1098 |
+
server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
|
| 1099 |
+
css=CUSTOM_CSS,
|
| 1100 |
+
)
|
build_leaderboard_data.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from datetime import datetime, timezone
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
ROOT = Path(__file__).parent
|
| 10 |
+
RESULTS_ROOT = ROOT / "data" / "results_real"
|
| 11 |
+
LEADERBOARD_JSON = ROOT / "data" / "leaderboard-data.json"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
CANONICAL_TASKS = [
|
| 15 |
+
{
|
| 16 |
+
"id": "K-disentQA",
|
| 17 |
+
"label": "SCA-QA",
|
| 18 |
+
"metricLabel": "Speech Context Faithfulness",
|
| 19 |
+
"shortMetric": "Faithfulness",
|
| 20 |
+
"lowerBetter": False,
|
| 21 |
+
"datasets": [
|
| 22 |
+
{"id": "history_after_chosun", "label": "History_after_chosun"},
|
| 23 |
+
{"id": "history_after_chosun_other", "label": "History_after_chosun Other"},
|
| 24 |
+
{"id": "history_before_chosun", "label": "History_before_chosun"},
|
| 25 |
+
{"id": "history_before_chosun_other", "label": "History_before_chosun Other"},
|
| 26 |
+
{"id": "k-sports", "label": "K-sports"},
|
| 27 |
+
{"id": "k-sports_other", "label": "K-sports Other"},
|
| 28 |
+
{"id": "kpop", "label": "K-pop"},
|
| 29 |
+
{"id": "kpop_other", "label": "K-pop Other"},
|
| 30 |
+
],
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "SQA",
|
| 34 |
+
"label": "Speech QA",
|
| 35 |
+
"metricLabel": "Accuracy (%)",
|
| 36 |
+
"shortMetric": "Acc(%)",
|
| 37 |
+
"lowerBetter": False,
|
| 38 |
+
"datasets": [
|
| 39 |
+
{"id": "click", "label": "CLICk"},
|
| 40 |
+
{"id": "click_other", "label": "CLICk Other"},
|
| 41 |
+
{"id": "kobest_boolq", "label": "KoBest BoolQ"},
|
| 42 |
+
{"id": "kobest_boolq_other", "label": "KoBest BoolQ Other"},
|
| 43 |
+
],
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"id": "Instruct",
|
| 47 |
+
"label": "Speech Instruction",
|
| 48 |
+
"metricLabel": "Score (GPT-4o as Judge)",
|
| 49 |
+
"shortMetric": "Score (GPT-4o as Judge)",
|
| 50 |
+
"lowerBetter": False,
|
| 51 |
+
"datasets": [
|
| 52 |
+
{"id": "alpaca", "label": "Alpaca"},
|
| 53 |
+
{"id": "alpaca_other", "label": "Alpaca Other"},
|
| 54 |
+
{"id": "kudge", "label": "KUDGE"},
|
| 55 |
+
{"id": "kudge_other", "label": "KUDGE Other"},
|
| 56 |
+
{"id": "openhermes", "label": "OpenHermes"},
|
| 57 |
+
{"id": "openhermes_other", "label": "OpenHermes Other"},
|
| 58 |
+
{"id": "vicuna", "label": "Vicuna"},
|
| 59 |
+
{"id": "vicuna_other", "label": "Vicuna Other"},
|
| 60 |
+
],
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"id": "ASR",
|
| 64 |
+
"label": "ASR",
|
| 65 |
+
"metricLabel": "CER (%)",
|
| 66 |
+
"shortMetric": "CER",
|
| 67 |
+
"lowerBetter": True,
|
| 68 |
+
"datasets": [
|
| 69 |
+
{"id": "common_voice_korea", "label": "CommonVoice-KO"},
|
| 70 |
+
{"id": "common_voice_korea_other", "label": "CommonVoice-KO Other"},
|
| 71 |
+
{"id": "ksponspeech_eval_clean", "label": "KsponSpeech Clean"},
|
| 72 |
+
{"id": "ksponspeech_eval_other", "label": "KsponSpeech Other"},
|
| 73 |
+
{"id": "zeroth_korean_test", "label": "Zeroth-Korean"},
|
| 74 |
+
{"id": "zeroth_korean_test_other", "label": "Zeroth-Korean Other"},
|
| 75 |
+
],
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"id": "Translation",
|
| 79 |
+
"label": "Translation",
|
| 80 |
+
"metricLabel": "BLEU / METEOR",
|
| 81 |
+
"shortMetric": "BLEU / METEOR",
|
| 82 |
+
"lowerBetter": False,
|
| 83 |
+
"datasets": [
|
| 84 |
+
{"id": "etri_tst-COMMON", "label": "ETRI-TST-Common"},
|
| 85 |
+
{"id": "etri_tst-HE", "label": "ETRI-TST-HE"},
|
| 86 |
+
],
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"id": "LSQA",
|
| 90 |
+
"label": "Long Speech Understanding",
|
| 91 |
+
"metricLabel": "Accuracy (%)",
|
| 92 |
+
"shortMetric": "Acc(%)",
|
| 93 |
+
"lowerBetter": False,
|
| 94 |
+
"datasets": [
|
| 95 |
+
{"id": "mctest", "label": "MCTest"},
|
| 96 |
+
{"id": "mctest_other", "label": "MCTest Other"},
|
| 97 |
+
],
|
| 98 |
+
},
|
| 99 |
+
]
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
FOLDER_TO_DATASET_ID = {
|
| 103 |
+
"K-disentQA": {
|
| 104 |
+
"history_after_chosun": "history_after_chosun",
|
| 105 |
+
"history_after_chosun_other": "history_after_chosun_other",
|
| 106 |
+
"history_before_chosun": "history_before_chosun",
|
| 107 |
+
"history_before_chosun_other": "history_before_chosun_other",
|
| 108 |
+
"k-sports": "k-sports",
|
| 109 |
+
"k-sports_other": "k-sports_other",
|
| 110 |
+
"kpop": "kpop",
|
| 111 |
+
"kpop_other": "kpop_other",
|
| 112 |
+
},
|
| 113 |
+
"SQA": {
|
| 114 |
+
"click": "click",
|
| 115 |
+
"click_other": "click_other",
|
| 116 |
+
"kobest_boolq": "kobest_boolq",
|
| 117 |
+
"kobest_boolq_other": "kobest_boolq_other",
|
| 118 |
+
},
|
| 119 |
+
"Instruct": {
|
| 120 |
+
"alpaca": "alpaca",
|
| 121 |
+
"alpaca_other": "alpaca_other",
|
| 122 |
+
"kudge": "kudge",
|
| 123 |
+
"kudge_other": "kudge_other",
|
| 124 |
+
"openhermes": "openhermes",
|
| 125 |
+
"openhermes_other": "openhermes_other",
|
| 126 |
+
"vicuna": "vicuna",
|
| 127 |
+
"vicuna_other": "vicuna_other",
|
| 128 |
+
},
|
| 129 |
+
"ASR": {
|
| 130 |
+
"common_voice_korea": "common_voice_korea",
|
| 131 |
+
"common_voice_korea_other": "common_voice_korea_other",
|
| 132 |
+
"ksponspeech_eval_clean": "ksponspeech_eval_clean",
|
| 133 |
+
"ksponspeech_eval_other": "ksponspeech_eval_other",
|
| 134 |
+
"zeroth_korean_test": "zeroth_korean_test",
|
| 135 |
+
"zeroth_korean_test_other": "zeroth_korean_test_other",
|
| 136 |
+
},
|
| 137 |
+
"Translation": {
|
| 138 |
+
"etri_tst-COMMON": "etri_tst-COMMON",
|
| 139 |
+
"etri_tst-HE": "etri_tst-HE",
|
| 140 |
+
},
|
| 141 |
+
"LSQA": {
|
| 142 |
+
"mctest": "mctest",
|
| 143 |
+
"mctest_other": "mctest_other",
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def load_existing_entry_meta() -> dict[str, dict[str, str]]:
|
| 149 |
+
if not LEADERBOARD_JSON.exists():
|
| 150 |
+
return {}
|
| 151 |
+
|
| 152 |
+
payload = json.loads(LEADERBOARD_JSON.read_text(encoding="utf-8"))
|
| 153 |
+
return {
|
| 154 |
+
entry["id"]: {
|
| 155 |
+
"rank_name": entry.get("rank_name", entry["id"]),
|
| 156 |
+
"model": entry.get("model", ""),
|
| 157 |
+
"url": entry.get("url", ""),
|
| 158 |
+
}
|
| 159 |
+
for entry in payload.get("entries", [])
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def pick_summary(dataset_dir: Path) -> Path | None:
|
| 164 |
+
direct = sorted(path for path in dataset_dir.glob("*_summary.json") if path.is_file())
|
| 165 |
+
if direct:
|
| 166 |
+
return direct[0]
|
| 167 |
+
|
| 168 |
+
recursive = sorted(
|
| 169 |
+
dataset_dir.rglob("*_summary.json"),
|
| 170 |
+
key=lambda path: (len(path.relative_to(dataset_dir).parts), str(path)),
|
| 171 |
+
)
|
| 172 |
+
return recursive[0] if recursive else None
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def extract_metric(task_name: str, payload: dict[str, Any]) -> dict[str, Any] | None:
|
| 176 |
+
if task_name == "K-disentQA":
|
| 177 |
+
value = payload.get("accuracy_speech")
|
| 178 |
+
if value is None:
|
| 179 |
+
return None
|
| 180 |
+
value *= 100
|
| 181 |
+
return {"value": value, "display": f"{value:.2f}"}
|
| 182 |
+
|
| 183 |
+
if task_name in {"SQA", "LSQA"}:
|
| 184 |
+
value = payload.get("accuracy_logit")
|
| 185 |
+
if value is None:
|
| 186 |
+
value = payload.get("accuracy_generation")
|
| 187 |
+
if value is None:
|
| 188 |
+
return None
|
| 189 |
+
value *= 100
|
| 190 |
+
return {"value": value, "display": f"{value:.2f}"}
|
| 191 |
+
|
| 192 |
+
if task_name == "Instruct":
|
| 193 |
+
value = payload.get("avg_gpt_score")
|
| 194 |
+
if value is None:
|
| 195 |
+
return None
|
| 196 |
+
value *= 100
|
| 197 |
+
return {"value": value, "display": f"{value:.2f}"}
|
| 198 |
+
|
| 199 |
+
if task_name == "ASR":
|
| 200 |
+
value = payload.get("total_cer")
|
| 201 |
+
if value is None:
|
| 202 |
+
return None
|
| 203 |
+
value *= 100
|
| 204 |
+
return {"value": value, "display": f"{value:.2f}"}
|
| 205 |
+
|
| 206 |
+
if task_name == "Translation":
|
| 207 |
+
bleu = payload.get("avg_bleu")
|
| 208 |
+
if bleu is None:
|
| 209 |
+
bleu = payload.get("corpus_bleu")
|
| 210 |
+
meteor = payload.get("avg_meteor")
|
| 211 |
+
if bleu is None:
|
| 212 |
+
return None
|
| 213 |
+
if meteor is None:
|
| 214 |
+
return {"value": bleu, "display": f"{bleu:.2f}"}
|
| 215 |
+
return {"value": bleu, "display": f"{bleu:.2f} / {meteor:.2f}"}
|
| 216 |
+
|
| 217 |
+
return None
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def build_leaderboard_payload() -> dict[str, Any]:
|
| 221 |
+
if not RESULTS_ROOT.exists():
|
| 222 |
+
raise SystemExit(f"Missing results directory: {RESULTS_ROOT}")
|
| 223 |
+
|
| 224 |
+
existing_meta = load_existing_entry_meta()
|
| 225 |
+
entries: dict[str, dict[str, Any]] = {}
|
| 226 |
+
|
| 227 |
+
for task in CANONICAL_TASKS:
|
| 228 |
+
task_id = task["id"]
|
| 229 |
+
task_dir = RESULTS_ROOT / task_id
|
| 230 |
+
if not task_dir.exists():
|
| 231 |
+
continue
|
| 232 |
+
|
| 233 |
+
folder_map = FOLDER_TO_DATASET_ID[task_id]
|
| 234 |
+
for model_dir in sorted(path for path in task_dir.iterdir() if path.is_dir()):
|
| 235 |
+
model_id = model_dir.name
|
| 236 |
+
meta = existing_meta.get(model_id, {})
|
| 237 |
+
entry = entries.setdefault(
|
| 238 |
+
model_id,
|
| 239 |
+
{
|
| 240 |
+
"id": model_id,
|
| 241 |
+
"rank_name": meta.get("rank_name", model_id),
|
| 242 |
+
"model": meta.get("model", ""),
|
| 243 |
+
"url": meta.get("url", ""),
|
| 244 |
+
"tasks": {},
|
| 245 |
+
},
|
| 246 |
+
)
|
| 247 |
+
entry["tasks"].setdefault(task_id, {})
|
| 248 |
+
|
| 249 |
+
for dataset_dir in sorted(path for path in model_dir.iterdir() if path.is_dir()):
|
| 250 |
+
dataset_id = folder_map.get(dataset_dir.name)
|
| 251 |
+
if not dataset_id:
|
| 252 |
+
continue
|
| 253 |
+
|
| 254 |
+
summary_path = pick_summary(dataset_dir)
|
| 255 |
+
if summary_path is None:
|
| 256 |
+
continue
|
| 257 |
+
|
| 258 |
+
payload = json.loads(summary_path.read_text(encoding="utf-8"))
|
| 259 |
+
metric = extract_metric(task_id, payload)
|
| 260 |
+
if metric is None:
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
+
if not entry["model"] and payload.get("model"):
|
| 264 |
+
entry["model"] = payload["model"]
|
| 265 |
+
entry["tasks"][task_id][dataset_id] = metric
|
| 266 |
+
|
| 267 |
+
for entry in entries.values():
|
| 268 |
+
if not entry["model"]:
|
| 269 |
+
entry["model"] = entry["id"]
|
| 270 |
+
for task in CANONICAL_TASKS:
|
| 271 |
+
entry["tasks"].setdefault(task["id"], {})
|
| 272 |
+
|
| 273 |
+
return {
|
| 274 |
+
"generatedAt": datetime.now(timezone.utc).isoformat(),
|
| 275 |
+
"sourceRoot": "data/results_real",
|
| 276 |
+
"tasks": CANONICAL_TASKS,
|
| 277 |
+
"entries": [entries[entry_id] for entry_id in sorted(entries)],
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def main() -> None:
|
| 282 |
+
payload = build_leaderboard_payload()
|
| 283 |
+
LEADERBOARD_JSON.write_text(
|
| 284 |
+
json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
|
| 285 |
+
encoding="utf-8",
|
| 286 |
+
)
|
| 287 |
+
print(f"Wrote {LEADERBOARD_JSON}")
|
| 288 |
+
print(f"Entries: {len(payload['entries'])}")
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
if __name__ == "__main__":
|
| 292 |
+
main()
|
data/leaderboard-data.json
ADDED
|
@@ -0,0 +1,1022 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"generatedAt": "2026-03-30T07:45:11.033924+00:00",
|
| 3 |
+
"sourceRoot": "data/results_real",
|
| 4 |
+
"tasks": [
|
| 5 |
+
{
|
| 6 |
+
"id": "K-disentQA",
|
| 7 |
+
"label": "SCA-QA",
|
| 8 |
+
"metricLabel": "Speech Context Faithfulness",
|
| 9 |
+
"shortMetric": "Faithfulness",
|
| 10 |
+
"lowerBetter": false,
|
| 11 |
+
"datasets": [
|
| 12 |
+
{
|
| 13 |
+
"id": "history_after_chosun",
|
| 14 |
+
"label": "History_after_chosun"
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"id": "history_after_chosun_other",
|
| 18 |
+
"label": "History_after_chosun Other"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"id": "history_before_chosun",
|
| 22 |
+
"label": "History_before_chosun"
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"id": "history_before_chosun_other",
|
| 26 |
+
"label": "History_before_chosun Other"
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"id": "k-sports",
|
| 30 |
+
"label": "K-sports"
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"id": "k-sports_other",
|
| 34 |
+
"label": "K-sports Other"
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"id": "kpop",
|
| 38 |
+
"label": "K-pop"
|
| 39 |
+
},
|
| 40 |
+
{
|
| 41 |
+
"id": "kpop_other",
|
| 42 |
+
"label": "K-pop Other"
|
| 43 |
+
}
|
| 44 |
+
]
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"id": "SQA",
|
| 48 |
+
"label": "Speech QA",
|
| 49 |
+
"metricLabel": "Accuracy (%)",
|
| 50 |
+
"shortMetric": "Acc(%)",
|
| 51 |
+
"lowerBetter": false,
|
| 52 |
+
"datasets": [
|
| 53 |
+
{
|
| 54 |
+
"id": "click",
|
| 55 |
+
"label": "CLICk"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"id": "click_other",
|
| 59 |
+
"label": "CLICk Other"
|
| 60 |
+
},
|
| 61 |
+
{
|
| 62 |
+
"id": "kobest_boolq",
|
| 63 |
+
"label": "KoBest BoolQ"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"id": "kobest_boolq_other",
|
| 67 |
+
"label": "KoBest BoolQ Other"
|
| 68 |
+
}
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"id": "Instruct",
|
| 73 |
+
"label": "Speech Instruction",
|
| 74 |
+
"metricLabel": "Score (GPT-4o as Judge)",
|
| 75 |
+
"shortMetric": "Score (GPT-4o as Judge)",
|
| 76 |
+
"lowerBetter": false,
|
| 77 |
+
"datasets": [
|
| 78 |
+
{
|
| 79 |
+
"id": "alpaca",
|
| 80 |
+
"label": "Alpaca"
|
| 81 |
+
},
|
| 82 |
+
{
|
| 83 |
+
"id": "alpaca_other",
|
| 84 |
+
"label": "Alpaca Other"
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"id": "kudge",
|
| 88 |
+
"label": "KUDGE"
|
| 89 |
+
},
|
| 90 |
+
{
|
| 91 |
+
"id": "kudge_other",
|
| 92 |
+
"label": "KUDGE Other"
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"id": "openhermes",
|
| 96 |
+
"label": "OpenHermes"
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"id": "openhermes_other",
|
| 100 |
+
"label": "OpenHermes Other"
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"id": "vicuna",
|
| 104 |
+
"label": "Vicuna"
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"id": "vicuna_other",
|
| 108 |
+
"label": "Vicuna Other"
|
| 109 |
+
}
|
| 110 |
+
]
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"id": "ASR",
|
| 114 |
+
"label": "ASR",
|
| 115 |
+
"metricLabel": "CER (%)",
|
| 116 |
+
"shortMetric": "CER",
|
| 117 |
+
"lowerBetter": true,
|
| 118 |
+
"datasets": [
|
| 119 |
+
{
|
| 120 |
+
"id": "common_voice_korea",
|
| 121 |
+
"label": "CommonVoice-KO"
|
| 122 |
+
},
|
| 123 |
+
{
|
| 124 |
+
"id": "common_voice_korea_other",
|
| 125 |
+
"label": "CommonVoice-KO Other"
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
"id": "ksponspeech_eval_clean",
|
| 129 |
+
"label": "KsponSpeech Clean"
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"id": "ksponspeech_eval_other",
|
| 133 |
+
"label": "KsponSpeech Other"
|
| 134 |
+
},
|
| 135 |
+
{
|
| 136 |
+
"id": "zeroth_korean_test",
|
| 137 |
+
"label": "Zeroth-Korean"
|
| 138 |
+
},
|
| 139 |
+
{
|
| 140 |
+
"id": "zeroth_korean_test_other",
|
| 141 |
+
"label": "Zeroth-Korean Other"
|
| 142 |
+
}
|
| 143 |
+
]
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"id": "Translation",
|
| 147 |
+
"label": "Translation",
|
| 148 |
+
"metricLabel": "BLEU / METEOR",
|
| 149 |
+
"shortMetric": "BLEU / METEOR",
|
| 150 |
+
"lowerBetter": false,
|
| 151 |
+
"datasets": [
|
| 152 |
+
{
|
| 153 |
+
"id": "etri_tst-COMMON",
|
| 154 |
+
"label": "ETRI-TST-Common"
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"id": "etri_tst-HE",
|
| 158 |
+
"label": "ETRI-TST-HE"
|
| 159 |
+
}
|
| 160 |
+
]
|
| 161 |
+
},
|
| 162 |
+
{
|
| 163 |
+
"id": "LSQA",
|
| 164 |
+
"label": "Long Speech Understanding",
|
| 165 |
+
"metricLabel": "Accuracy (%)",
|
| 166 |
+
"shortMetric": "Acc(%)",
|
| 167 |
+
"lowerBetter": false,
|
| 168 |
+
"datasets": [
|
| 169 |
+
{
|
| 170 |
+
"id": "mctest",
|
| 171 |
+
"label": "MCTest"
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"id": "mctest_other",
|
| 175 |
+
"label": "MCTest Other"
|
| 176 |
+
}
|
| 177 |
+
]
|
| 178 |
+
}
|
| 179 |
+
],
|
| 180 |
+
"entries": [
|
| 181 |
+
{
|
| 182 |
+
"id": "gemini_flash",
|
| 183 |
+
"rank_name": "gemini_flash",
|
| 184 |
+
"model": "gemini-2.5-flash-lite",
|
| 185 |
+
"url": "",
|
| 186 |
+
"tasks": {
|
| 187 |
+
"K-disentQA": {
|
| 188 |
+
"history_after_chosun": {
|
| 189 |
+
"value": 64.63414634146342,
|
| 190 |
+
"display": "64.63"
|
| 191 |
+
},
|
| 192 |
+
"history_after_chosun_other": {
|
| 193 |
+
"value": 58.536585365853654,
|
| 194 |
+
"display": "58.54"
|
| 195 |
+
},
|
| 196 |
+
"history_before_chosun": {
|
| 197 |
+
"value": 66.33663366336634,
|
| 198 |
+
"display": "66.34"
|
| 199 |
+
},
|
| 200 |
+
"history_before_chosun_other": {
|
| 201 |
+
"value": 65.34653465346535,
|
| 202 |
+
"display": "65.35"
|
| 203 |
+
},
|
| 204 |
+
"k-sports": {
|
| 205 |
+
"value": 86.36363636363636,
|
| 206 |
+
"display": "86.36"
|
| 207 |
+
},
|
| 208 |
+
"k-sports_other": {
|
| 209 |
+
"value": 84.0909090909091,
|
| 210 |
+
"display": "84.09"
|
| 211 |
+
},
|
| 212 |
+
"kpop": {
|
| 213 |
+
"value": 69.90291262135922,
|
| 214 |
+
"display": "69.90"
|
| 215 |
+
},
|
| 216 |
+
"kpop_other": {
|
| 217 |
+
"value": 73.7864077669903,
|
| 218 |
+
"display": "73.79"
|
| 219 |
+
}
|
| 220 |
+
},
|
| 221 |
+
"SQA": {
|
| 222 |
+
"click": {
|
| 223 |
+
"value": 60.48053024026512,
|
| 224 |
+
"display": "60.48"
|
| 225 |
+
},
|
| 226 |
+
"click_other": {
|
| 227 |
+
"value": 60.56338028169014,
|
| 228 |
+
"display": "60.56"
|
| 229 |
+
},
|
| 230 |
+
"kobest_boolq": {
|
| 231 |
+
"value": 51.967799642218246,
|
| 232 |
+
"display": "51.97"
|
| 233 |
+
},
|
| 234 |
+
"kobest_boolq_other": {
|
| 235 |
+
"value": 50.44722719141323,
|
| 236 |
+
"display": "50.45"
|
| 237 |
+
}
|
| 238 |
+
},
|
| 239 |
+
"Instruct": {
|
| 240 |
+
"alpaca": {
|
| 241 |
+
"value": 85.94202898550726,
|
| 242 |
+
"display": "85.94"
|
| 243 |
+
},
|
| 244 |
+
"alpaca_other": {
|
| 245 |
+
"value": 86.8840579710145,
|
| 246 |
+
"display": "86.88"
|
| 247 |
+
},
|
| 248 |
+
"kudge": {
|
| 249 |
+
"value": 70.28725314183116,
|
| 250 |
+
"display": "70.29"
|
| 251 |
+
},
|
| 252 |
+
"kudge_other": {
|
| 253 |
+
"value": 70.3859964093356,
|
| 254 |
+
"display": "70.39"
|
| 255 |
+
},
|
| 256 |
+
"openhermes": {
|
| 257 |
+
"value": 82.69230769230771,
|
| 258 |
+
"display": "82.69"
|
| 259 |
+
},
|
| 260 |
+
"openhermes_other": {
|
| 261 |
+
"value": 80.70512820512825,
|
| 262 |
+
"display": "80.71"
|
| 263 |
+
},
|
| 264 |
+
"vicuna": {
|
| 265 |
+
"value": 76.42857142857144,
|
| 266 |
+
"display": "76.43"
|
| 267 |
+
},
|
| 268 |
+
"vicuna_other": {
|
| 269 |
+
"value": 73.28571428571429,
|
| 270 |
+
"display": "73.29"
|
| 271 |
+
}
|
| 272 |
+
},
|
| 273 |
+
"ASR": {
|
| 274 |
+
"common_voice_korea": {
|
| 275 |
+
"value": 13.738049713193117,
|
| 276 |
+
"display": "13.74"
|
| 277 |
+
},
|
| 278 |
+
"common_voice_korea_other": {
|
| 279 |
+
"value": 26.73996175908222,
|
| 280 |
+
"display": "26.74"
|
| 281 |
+
},
|
| 282 |
+
"ksponspeech_eval_clean": {
|
| 283 |
+
"value": 83.18526725056962,
|
| 284 |
+
"display": "83.19"
|
| 285 |
+
},
|
| 286 |
+
"ksponspeech_eval_other": {
|
| 287 |
+
"value": 45.13922315086276,
|
| 288 |
+
"display": "45.14"
|
| 289 |
+
},
|
| 290 |
+
"zeroth_korean_test": {
|
| 291 |
+
"value": 13.599128992119452,
|
| 292 |
+
"display": "13.60"
|
| 293 |
+
},
|
| 294 |
+
"zeroth_korean_test_other": {
|
| 295 |
+
"value": 14.558274574865202,
|
| 296 |
+
"display": "14.56"
|
| 297 |
+
}
|
| 298 |
+
},
|
| 299 |
+
"Translation": {
|
| 300 |
+
"etri_tst-COMMON": {
|
| 301 |
+
"value": 22.879423047941344,
|
| 302 |
+
"display": "22.88 / 47.15"
|
| 303 |
+
},
|
| 304 |
+
"etri_tst-HE": {
|
| 305 |
+
"value": 26.953160916836307,
|
| 306 |
+
"display": "26.95 / 52.59"
|
| 307 |
+
}
|
| 308 |
+
},
|
| 309 |
+
"LSQA": {
|
| 310 |
+
"mctest": {
|
| 311 |
+
"value": 93.88379204892966,
|
| 312 |
+
"display": "93.88"
|
| 313 |
+
},
|
| 314 |
+
"mctest_other": {
|
| 315 |
+
"value": 94.18960244648318,
|
| 316 |
+
"display": "94.19"
|
| 317 |
+
}
|
| 318 |
+
}
|
| 319 |
+
}
|
| 320 |
+
},
|
| 321 |
+
{
|
| 322 |
+
"id": "gemma3n_vllm",
|
| 323 |
+
"rank_name": "gemma3n_vllm",
|
| 324 |
+
"model": "google/gemma-3n-E4B-it",
|
| 325 |
+
"url": "",
|
| 326 |
+
"tasks": {
|
| 327 |
+
"K-disentQA": {
|
| 328 |
+
"history_after_chosun": {
|
| 329 |
+
"value": 50.0,
|
| 330 |
+
"display": "50.00"
|
| 331 |
+
},
|
| 332 |
+
"history_after_chosun_other": {
|
| 333 |
+
"value": 48.78048780487805,
|
| 334 |
+
"display": "48.78"
|
| 335 |
+
},
|
| 336 |
+
"history_before_chosun": {
|
| 337 |
+
"value": 69.3069306930693,
|
| 338 |
+
"display": "69.31"
|
| 339 |
+
},
|
| 340 |
+
"history_before_chosun_other": {
|
| 341 |
+
"value": 68.31683168316832,
|
| 342 |
+
"display": "68.32"
|
| 343 |
+
},
|
| 344 |
+
"k-sports": {
|
| 345 |
+
"value": 76.13636363636364,
|
| 346 |
+
"display": "76.14"
|
| 347 |
+
},
|
| 348 |
+
"k-sports_other": {
|
| 349 |
+
"value": 73.86363636363636,
|
| 350 |
+
"display": "73.86"
|
| 351 |
+
},
|
| 352 |
+
"kpop": {
|
| 353 |
+
"value": 65.0485436893204,
|
| 354 |
+
"display": "65.05"
|
| 355 |
+
},
|
| 356 |
+
"kpop_other": {
|
| 357 |
+
"value": 64.07766990291263,
|
| 358 |
+
"display": "64.08"
|
| 359 |
+
}
|
| 360 |
+
},
|
| 361 |
+
"SQA": {
|
| 362 |
+
"click": {
|
| 363 |
+
"value": 35.294117647058826,
|
| 364 |
+
"display": "35.29"
|
| 365 |
+
},
|
| 366 |
+
"click_other": {
|
| 367 |
+
"value": 35.708367854183926,
|
| 368 |
+
"display": "35.71"
|
| 369 |
+
},
|
| 370 |
+
"kobest_boolq": {
|
| 371 |
+
"value": 50.715563506261184,
|
| 372 |
+
"display": "50.72"
|
| 373 |
+
},
|
| 374 |
+
"kobest_boolq_other": {
|
| 375 |
+
"value": 51.25223613595706,
|
| 376 |
+
"display": "51.25"
|
| 377 |
+
}
|
| 378 |
+
},
|
| 379 |
+
"Instruct": {
|
| 380 |
+
"alpaca": {
|
| 381 |
+
"value": 82.97101449275362,
|
| 382 |
+
"display": "82.97"
|
| 383 |
+
},
|
| 384 |
+
"alpaca_other": {
|
| 385 |
+
"value": 83.36231884057973,
|
| 386 |
+
"display": "83.36"
|
| 387 |
+
},
|
| 388 |
+
"kudge": {
|
| 389 |
+
"value": 71.38240574506275,
|
| 390 |
+
"display": "71.38"
|
| 391 |
+
},
|
| 392 |
+
"kudge_other": {
|
| 393 |
+
"value": 70.69120287253139,
|
| 394 |
+
"display": "70.69"
|
| 395 |
+
},
|
| 396 |
+
"openhermes": {
|
| 397 |
+
"value": 84.61538461538464,
|
| 398 |
+
"display": "84.62"
|
| 399 |
+
},
|
| 400 |
+
"openhermes_other": {
|
| 401 |
+
"value": 85.96153846153848,
|
| 402 |
+
"display": "85.96"
|
| 403 |
+
},
|
| 404 |
+
"vicuna": {
|
| 405 |
+
"value": 80.21428571428574,
|
| 406 |
+
"display": "80.21"
|
| 407 |
+
},
|
| 408 |
+
"vicuna_other": {
|
| 409 |
+
"value": 80.00000000000003,
|
| 410 |
+
"display": "80.00"
|
| 411 |
+
}
|
| 412 |
+
},
|
| 413 |
+
"ASR": {
|
| 414 |
+
"common_voice_korea": {
|
| 415 |
+
"value": 144.5793499043977,
|
| 416 |
+
"display": "144.58"
|
| 417 |
+
},
|
| 418 |
+
"common_voice_korea_other": {
|
| 419 |
+
"value": 178.34163155878971,
|
| 420 |
+
"display": "178.34"
|
| 421 |
+
},
|
| 422 |
+
"ksponspeech_eval_clean": {
|
| 423 |
+
"value": 142.99420974518696,
|
| 424 |
+
"display": "142.99"
|
| 425 |
+
},
|
| 426 |
+
"ksponspeech_eval_other": {
|
| 427 |
+
"value": 130.76182558088652,
|
| 428 |
+
"display": "130.76"
|
| 429 |
+
},
|
| 430 |
+
"zeroth_korean_test": {
|
| 431 |
+
"value": 107.88054749066778,
|
| 432 |
+
"display": "107.88"
|
| 433 |
+
},
|
| 434 |
+
"zeroth_korean_test_other": {
|
| 435 |
+
"value": 118.62297801742017,
|
| 436 |
+
"display": "118.62"
|
| 437 |
+
}
|
| 438 |
+
},
|
| 439 |
+
"Translation": {
|
| 440 |
+
"etri_tst-COMMON": {
|
| 441 |
+
"value": 5.1704245727847375,
|
| 442 |
+
"display": "5.17 / 16.97"
|
| 443 |
+
},
|
| 444 |
+
"etri_tst-HE": {
|
| 445 |
+
"value": 5.235146756138342,
|
| 446 |
+
"display": "5.24 / 17.50"
|
| 447 |
+
}
|
| 448 |
+
},
|
| 449 |
+
"LSQA": {
|
| 450 |
+
"mctest": {
|
| 451 |
+
"value": 47.706422018348626,
|
| 452 |
+
"display": "47.71"
|
| 453 |
+
},
|
| 454 |
+
"mctest_other": {
|
| 455 |
+
"value": 48.62385321100918,
|
| 456 |
+
"display": "48.62"
|
| 457 |
+
}
|
| 458 |
+
}
|
| 459 |
+
}
|
| 460 |
+
},
|
| 461 |
+
{
|
| 462 |
+
"id": "gpt_realtime_mini",
|
| 463 |
+
"rank_name": "gpt_realtime_mini",
|
| 464 |
+
"model": "gpt-audio-mini",
|
| 465 |
+
"url": "",
|
| 466 |
+
"tasks": {
|
| 467 |
+
"K-disentQA": {
|
| 468 |
+
"history_after_chosun": {
|
| 469 |
+
"value": 45.1219512195122,
|
| 470 |
+
"display": "45.12"
|
| 471 |
+
},
|
| 472 |
+
"history_after_chosun_other": {
|
| 473 |
+
"value": 42.68292682926829,
|
| 474 |
+
"display": "42.68"
|
| 475 |
+
},
|
| 476 |
+
"history_before_chosun": {
|
| 477 |
+
"value": 64.35643564356435,
|
| 478 |
+
"display": "64.36"
|
| 479 |
+
},
|
| 480 |
+
"history_before_chosun_other": {
|
| 481 |
+
"value": 62.37623762376238,
|
| 482 |
+
"display": "62.38"
|
| 483 |
+
},
|
| 484 |
+
"k-sports": {
|
| 485 |
+
"value": 45.45454545454545,
|
| 486 |
+
"display": "45.45"
|
| 487 |
+
},
|
| 488 |
+
"k-sports_other": {
|
| 489 |
+
"value": 46.590909090909086,
|
| 490 |
+
"display": "46.59"
|
| 491 |
+
},
|
| 492 |
+
"kpop": {
|
| 493 |
+
"value": 41.74757281553398,
|
| 494 |
+
"display": "41.75"
|
| 495 |
+
},
|
| 496 |
+
"kpop_other": {
|
| 497 |
+
"value": 40.77669902912621,
|
| 498 |
+
"display": "40.78"
|
| 499 |
+
}
|
| 500 |
+
},
|
| 501 |
+
"SQA": {
|
| 502 |
+
"click": {
|
| 503 |
+
"value": 61.64043082021541,
|
| 504 |
+
"display": "61.64"
|
| 505 |
+
},
|
| 506 |
+
"click_other": {
|
| 507 |
+
"value": 60.06628003314002,
|
| 508 |
+
"display": "60.07"
|
| 509 |
+
},
|
| 510 |
+
"kobest_boolq": {
|
| 511 |
+
"value": 51.878354203935594,
|
| 512 |
+
"display": "51.88"
|
| 513 |
+
},
|
| 514 |
+
"kobest_boolq_other": {
|
| 515 |
+
"value": 50.44722719141323,
|
| 516 |
+
"display": "50.45"
|
| 517 |
+
}
|
| 518 |
+
},
|
| 519 |
+
"Instruct": {
|
| 520 |
+
"alpaca": {
|
| 521 |
+
"value": 90.57971014492755,
|
| 522 |
+
"display": "90.58"
|
| 523 |
+
},
|
| 524 |
+
"alpaca_other": {
|
| 525 |
+
"value": 90.57971014492755,
|
| 526 |
+
"display": "90.58"
|
| 527 |
+
},
|
| 528 |
+
"kudge": {
|
| 529 |
+
"value": 74.06642728904846,
|
| 530 |
+
"display": "74.07"
|
| 531 |
+
},
|
| 532 |
+
"kudge_other": {
|
| 533 |
+
"value": 73.98563734290842,
|
| 534 |
+
"display": "73.99"
|
| 535 |
+
},
|
| 536 |
+
"openhermes": {
|
| 537 |
+
"value": 89.42307692307693,
|
| 538 |
+
"display": "89.42"
|
| 539 |
+
},
|
| 540 |
+
"openhermes_other": {
|
| 541 |
+
"value": 89.61538461538464,
|
| 542 |
+
"display": "89.62"
|
| 543 |
+
},
|
| 544 |
+
"vicuna": {
|
| 545 |
+
"value": 82.14285714285717,
|
| 546 |
+
"display": "82.14"
|
| 547 |
+
},
|
| 548 |
+
"vicuna_other": {
|
| 549 |
+
"value": 81.78571428571429,
|
| 550 |
+
"display": "81.79"
|
| 551 |
+
}
|
| 552 |
+
},
|
| 553 |
+
"ASR": {
|
| 554 |
+
"common_voice_korea": {
|
| 555 |
+
"value": 33.04971319311664,
|
| 556 |
+
"display": "33.05"
|
| 557 |
+
},
|
| 558 |
+
"common_voice_korea_other": {
|
| 559 |
+
"value": 36.21414913957935,
|
| 560 |
+
"display": "36.21"
|
| 561 |
+
},
|
| 562 |
+
"ksponspeech_eval_clean": {
|
| 563 |
+
"value": 134.18967787788205,
|
| 564 |
+
"display": "134.19"
|
| 565 |
+
},
|
| 566 |
+
"ksponspeech_eval_other": {
|
| 567 |
+
"value": 63.6444522236322,
|
| 568 |
+
"display": "63.64"
|
| 569 |
+
},
|
| 570 |
+
"zeroth_korean_test": {
|
| 571 |
+
"value": 100.0,
|
| 572 |
+
"display": "100.00"
|
| 573 |
+
},
|
| 574 |
+
"zeroth_korean_test_other": {
|
| 575 |
+
"value": 100.0,
|
| 576 |
+
"display": "100.00"
|
| 577 |
+
}
|
| 578 |
+
},
|
| 579 |
+
"Translation": {
|
| 580 |
+
"etri_tst-COMMON": {
|
| 581 |
+
"value": 26.06340955434567,
|
| 582 |
+
"display": "26.06 / 49.37"
|
| 583 |
+
},
|
| 584 |
+
"etri_tst-HE": {
|
| 585 |
+
"value": 28.984282512215568,
|
| 586 |
+
"display": "28.98 / 52.96"
|
| 587 |
+
}
|
| 588 |
+
},
|
| 589 |
+
"LSQA": {
|
| 590 |
+
"mctest": {
|
| 591 |
+
"value": 84.09785932721712,
|
| 592 |
+
"display": "84.10"
|
| 593 |
+
},
|
| 594 |
+
"mctest_other": {
|
| 595 |
+
"value": 82.87461773700305,
|
| 596 |
+
"display": "82.87"
|
| 597 |
+
}
|
| 598 |
+
}
|
| 599 |
+
}
|
| 600 |
+
},
|
| 601 |
+
{
|
| 602 |
+
"id": "qwen",
|
| 603 |
+
"rank_name": "qwen",
|
| 604 |
+
"model": "qwen",
|
| 605 |
+
"url": "",
|
| 606 |
+
"tasks": {
|
| 607 |
+
"K-disentQA": {
|
| 608 |
+
"history_after_chosun": {
|
| 609 |
+
"value": 67.07317073170732,
|
| 610 |
+
"display": "67.07"
|
| 611 |
+
},
|
| 612 |
+
"history_after_chosun_other": {
|
| 613 |
+
"value": 63.41463414634146,
|
| 614 |
+
"display": "63.41"
|
| 615 |
+
},
|
| 616 |
+
"history_before_chosun": {
|
| 617 |
+
"value": 72.27722772277228,
|
| 618 |
+
"display": "72.28"
|
| 619 |
+
},
|
| 620 |
+
"history_before_chosun_other": {
|
| 621 |
+
"value": 66.33663366336634,
|
| 622 |
+
"display": "66.34"
|
| 623 |
+
},
|
| 624 |
+
"k-sports": {
|
| 625 |
+
"value": 88.63636363636364,
|
| 626 |
+
"display": "88.64"
|
| 627 |
+
},
|
| 628 |
+
"k-sports_other": {
|
| 629 |
+
"value": 87.5,
|
| 630 |
+
"display": "87.50"
|
| 631 |
+
},
|
| 632 |
+
"kpop": {
|
| 633 |
+
"value": 81.55339805825243,
|
| 634 |
+
"display": "81.55"
|
| 635 |
+
},
|
| 636 |
+
"kpop_other": {
|
| 637 |
+
"value": 81.55339805825243,
|
| 638 |
+
"display": "81.55"
|
| 639 |
+
}
|
| 640 |
+
},
|
| 641 |
+
"SQA": {
|
| 642 |
+
"click": {
|
| 643 |
+
"value": 33.471416735708374,
|
| 644 |
+
"display": "33.47"
|
| 645 |
+
},
|
| 646 |
+
"click_other": {
|
| 647 |
+
"value": 32.64291632145816,
|
| 648 |
+
"display": "32.64"
|
| 649 |
+
},
|
| 650 |
+
"kobest_boolq": {
|
| 651 |
+
"value": 50.62611806797853,
|
| 652 |
+
"display": "50.63"
|
| 653 |
+
},
|
| 654 |
+
"kobest_boolq_other": {
|
| 655 |
+
"value": 50.62611806797853,
|
| 656 |
+
"display": "50.63"
|
| 657 |
+
}
|
| 658 |
+
},
|
| 659 |
+
"Instruct": {
|
| 660 |
+
"alpaca": {
|
| 661 |
+
"value": 55.79710144927535,
|
| 662 |
+
"display": "55.80"
|
| 663 |
+
},
|
| 664 |
+
"alpaca_other": {
|
| 665 |
+
"value": 55.43478260869564,
|
| 666 |
+
"display": "55.43"
|
| 667 |
+
},
|
| 668 |
+
"kudge": {
|
| 669 |
+
"value": 58.88888888888886,
|
| 670 |
+
"display": "58.89"
|
| 671 |
+
},
|
| 672 |
+
"kudge_other": {
|
| 673 |
+
"value": 55.25134649910226,
|
| 674 |
+
"display": "55.25"
|
| 675 |
+
},
|
| 676 |
+
"openhermes": {
|
| 677 |
+
"value": 59.679487179487175,
|
| 678 |
+
"display": "59.68"
|
| 679 |
+
},
|
| 680 |
+
"openhermes_other": {
|
| 681 |
+
"value": 59.294871794871796,
|
| 682 |
+
"display": "59.29"
|
| 683 |
+
},
|
| 684 |
+
"vicuna": {
|
| 685 |
+
"value": 63.28571428571424,
|
| 686 |
+
"display": "63.29"
|
| 687 |
+
},
|
| 688 |
+
"vicuna_other": {
|
| 689 |
+
"value": 61.49999999999998,
|
| 690 |
+
"display": "61.50"
|
| 691 |
+
}
|
| 692 |
+
},
|
| 693 |
+
"ASR": {
|
| 694 |
+
"common_voice_korea": {
|
| 695 |
+
"value": 33.55640535372849,
|
| 696 |
+
"display": "33.56"
|
| 697 |
+
},
|
| 698 |
+
"common_voice_korea_other": {
|
| 699 |
+
"value": 38.14630409804673,
|
| 700 |
+
"display": "38.15"
|
| 701 |
+
},
|
| 702 |
+
"ksponspeech_eval_clean": {
|
| 703 |
+
"value": 39.99665544848345,
|
| 704 |
+
"display": "40.00"
|
| 705 |
+
},
|
| 706 |
+
"ksponspeech_eval_other": {
|
| 707 |
+
"value": 35.66417246459254,
|
| 708 |
+
"display": "35.66"
|
| 709 |
+
},
|
| 710 |
+
"zeroth_korean_test": {
|
| 711 |
+
"value": 22.169224388220655,
|
| 712 |
+
"display": "22.17"
|
| 713 |
+
},
|
| 714 |
+
"zeroth_korean_test_other": {
|
| 715 |
+
"value": 25.036291995022815,
|
| 716 |
+
"display": "25.04"
|
| 717 |
+
}
|
| 718 |
+
},
|
| 719 |
+
"Translation": {
|
| 720 |
+
"etri_tst-COMMON": {
|
| 721 |
+
"value": 15.98267809185928,
|
| 722 |
+
"display": "15.98 / 36.78"
|
| 723 |
+
},
|
| 724 |
+
"etri_tst-HE": {
|
| 725 |
+
"value": 19.09056943080292,
|
| 726 |
+
"display": "19.09 / 40.77"
|
| 727 |
+
}
|
| 728 |
+
},
|
| 729 |
+
"LSQA": {
|
| 730 |
+
"mctest": {
|
| 731 |
+
"value": 51.07033639143731,
|
| 732 |
+
"display": "51.07"
|
| 733 |
+
},
|
| 734 |
+
"mctest_other": {
|
| 735 |
+
"value": 50.45871559633027,
|
| 736 |
+
"display": "50.46"
|
| 737 |
+
}
|
| 738 |
+
}
|
| 739 |
+
}
|
| 740 |
+
},
|
| 741 |
+
{
|
| 742 |
+
"id": "qwen3_vllm",
|
| 743 |
+
"rank_name": "qwen3_vllm",
|
| 744 |
+
"model": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
|
| 745 |
+
"url": "",
|
| 746 |
+
"tasks": {
|
| 747 |
+
"K-disentQA": {
|
| 748 |
+
"history_after_chosun": {
|
| 749 |
+
"value": 84.14634146341463,
|
| 750 |
+
"display": "84.15"
|
| 751 |
+
},
|
| 752 |
+
"history_after_chosun_other": {
|
| 753 |
+
"value": 82.92682926829268,
|
| 754 |
+
"display": "82.93"
|
| 755 |
+
},
|
| 756 |
+
"history_before_chosun": {
|
| 757 |
+
"value": 94.05940594059405,
|
| 758 |
+
"display": "94.06"
|
| 759 |
+
},
|
| 760 |
+
"history_before_chosun_other": {
|
| 761 |
+
"value": 94.05940594059405,
|
| 762 |
+
"display": "94.06"
|
| 763 |
+
},
|
| 764 |
+
"k-sports": {
|
| 765 |
+
"value": 88.63636363636364,
|
| 766 |
+
"display": "88.64"
|
| 767 |
+
},
|
| 768 |
+
"k-sports_other": {
|
| 769 |
+
"value": 89.77272727272727,
|
| 770 |
+
"display": "89.77"
|
| 771 |
+
},
|
| 772 |
+
"kpop": {
|
| 773 |
+
"value": 88.3495145631068,
|
| 774 |
+
"display": "88.35"
|
| 775 |
+
},
|
| 776 |
+
"kpop_other": {
|
| 777 |
+
"value": 85.43689320388349,
|
| 778 |
+
"display": "85.44"
|
| 779 |
+
}
|
| 780 |
+
},
|
| 781 |
+
"SQA": {
|
| 782 |
+
"click": {
|
| 783 |
+
"value": 63.96023198011599,
|
| 784 |
+
"display": "63.96"
|
| 785 |
+
},
|
| 786 |
+
"click_other": {
|
| 787 |
+
"value": 61.55758077879039,
|
| 788 |
+
"display": "61.56"
|
| 789 |
+
},
|
| 790 |
+
"kobest_boolq": {
|
| 791 |
+
"value": 51.341681574239715,
|
| 792 |
+
"display": "51.34"
|
| 793 |
+
},
|
| 794 |
+
"kobest_boolq_other": {
|
| 795 |
+
"value": 51.43112701252236,
|
| 796 |
+
"display": "51.43"
|
| 797 |
+
}
|
| 798 |
+
},
|
| 799 |
+
"Instruct": {
|
| 800 |
+
"alpaca": {
|
| 801 |
+
"value": 84.05797101449278,
|
| 802 |
+
"display": "84.06"
|
| 803 |
+
},
|
| 804 |
+
"alpaca_other": {
|
| 805 |
+
"value": 83.04347826086959,
|
| 806 |
+
"display": "83.04"
|
| 807 |
+
},
|
| 808 |
+
"kudge": {
|
| 809 |
+
"value": 71.86714542190298,
|
| 810 |
+
"display": "71.87"
|
| 811 |
+
},
|
| 812 |
+
"kudge_other": {
|
| 813 |
+
"value": 71.82226211849184,
|
| 814 |
+
"display": "71.82"
|
| 815 |
+
},
|
| 816 |
+
"openhermes": {
|
| 817 |
+
"value": 86.5384615384616,
|
| 818 |
+
"display": "86.54"
|
| 819 |
+
},
|
| 820 |
+
"openhermes_other": {
|
| 821 |
+
"value": 85.19230769230771,
|
| 822 |
+
"display": "85.19"
|
| 823 |
+
},
|
| 824 |
+
"vicuna": {
|
| 825 |
+
"value": 79.64285714285715,
|
| 826 |
+
"display": "79.64"
|
| 827 |
+
},
|
| 828 |
+
"vicuna_other": {
|
| 829 |
+
"value": 78.42857142857143,
|
| 830 |
+
"display": "78.43"
|
| 831 |
+
}
|
| 832 |
+
},
|
| 833 |
+
"ASR": {
|
| 834 |
+
"common_voice_korea": {
|
| 835 |
+
"value": 4.961759082217973,
|
| 836 |
+
"display": "4.96"
|
| 837 |
+
},
|
| 838 |
+
"common_voice_korea_other": {
|
| 839 |
+
"value": 6.779011872845652,
|
| 840 |
+
"display": "6.78"
|
| 841 |
+
},
|
| 842 |
+
"ksponspeech_eval_clean": {
|
| 843 |
+
"value": 8.459624992161208,
|
| 844 |
+
"display": "8.46"
|
| 845 |
+
},
|
| 846 |
+
"ksponspeech_eval_other": {
|
| 847 |
+
"value": 7.907058154290465,
|
| 848 |
+
"display": "7.91"
|
| 849 |
+
},
|
| 850 |
+
"zeroth_korean_test": {
|
| 851 |
+
"value": 3.3336789713811696,
|
| 852 |
+
"display": "3.33"
|
| 853 |
+
},
|
| 854 |
+
"zeroth_korean_test_other": {
|
| 855 |
+
"value": 3.9143508917461634,
|
| 856 |
+
"display": "3.91"
|
| 857 |
+
}
|
| 858 |
+
},
|
| 859 |
+
"Translation": {
|
| 860 |
+
"etri_tst-COMMON": {
|
| 861 |
+
"value": 28.528409006302443,
|
| 862 |
+
"display": "28.53 / 52.02"
|
| 863 |
+
},
|
| 864 |
+
"etri_tst-HE": {
|
| 865 |
+
"value": 31.70803814468474,
|
| 866 |
+
"display": "31.71 / 55.79"
|
| 867 |
+
}
|
| 868 |
+
},
|
| 869 |
+
"LSQA": {
|
| 870 |
+
"mctest": {
|
| 871 |
+
"value": 92.04892966360856,
|
| 872 |
+
"display": "92.05"
|
| 873 |
+
},
|
| 874 |
+
"mctest_other": {
|
| 875 |
+
"value": 95.71865443425077,
|
| 876 |
+
"display": "95.72"
|
| 877 |
+
}
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
},
|
| 881 |
+
{
|
| 882 |
+
"id": "voxtral3b",
|
| 883 |
+
"rank_name": "voxtral3b",
|
| 884 |
+
"model": "mistralai/Voxtral-Mini-3B-2507",
|
| 885 |
+
"url": "",
|
| 886 |
+
"tasks": {
|
| 887 |
+
"K-disentQA": {
|
| 888 |
+
"history_after_chosun": {
|
| 889 |
+
"value": 81.70731707317073,
|
| 890 |
+
"display": "81.71"
|
| 891 |
+
},
|
| 892 |
+
"history_after_chosun_other": {
|
| 893 |
+
"value": 78.04878048780488,
|
| 894 |
+
"display": "78.05"
|
| 895 |
+
},
|
| 896 |
+
"history_before_chosun": {
|
| 897 |
+
"value": 86.13861386138613,
|
| 898 |
+
"display": "86.14"
|
| 899 |
+
},
|
| 900 |
+
"history_before_chosun_other": {
|
| 901 |
+
"value": 86.13861386138613,
|
| 902 |
+
"display": "86.14"
|
| 903 |
+
},
|
| 904 |
+
"k-sports": {
|
| 905 |
+
"value": 94.31818181818183,
|
| 906 |
+
"display": "94.32"
|
| 907 |
+
},
|
| 908 |
+
"k-sports_other": {
|
| 909 |
+
"value": 94.31818181818183,
|
| 910 |
+
"display": "94.32"
|
| 911 |
+
},
|
| 912 |
+
"kpop": {
|
| 913 |
+
"value": 89.32038834951457,
|
| 914 |
+
"display": "89.32"
|
| 915 |
+
},
|
| 916 |
+
"kpop_other": {
|
| 917 |
+
"value": 90.29126213592234,
|
| 918 |
+
"display": "90.29"
|
| 919 |
+
}
|
| 920 |
+
},
|
| 921 |
+
"SQA": {
|
| 922 |
+
"click": {
|
| 923 |
+
"value": 41.25932062966031,
|
| 924 |
+
"display": "41.26"
|
| 925 |
+
},
|
| 926 |
+
"click_other": {
|
| 927 |
+
"value": 41.01077050538525,
|
| 928 |
+
"display": "41.01"
|
| 929 |
+
},
|
| 930 |
+
"kobest_boolq": {
|
| 931 |
+
"value": 51.788908765652955,
|
| 932 |
+
"display": "51.79"
|
| 933 |
+
},
|
| 934 |
+
"kobest_boolq_other": {
|
| 935 |
+
"value": 51.073345259391765,
|
| 936 |
+
"display": "51.07"
|
| 937 |
+
}
|
| 938 |
+
},
|
| 939 |
+
"Instruct": {
|
| 940 |
+
"alpaca": {
|
| 941 |
+
"value": 72.89855072463769,
|
| 942 |
+
"display": "72.90"
|
| 943 |
+
},
|
| 944 |
+
"alpaca_other": {
|
| 945 |
+
"value": 72.46376811594205,
|
| 946 |
+
"display": "72.46"
|
| 947 |
+
},
|
| 948 |
+
"kudge": {
|
| 949 |
+
"value": 61.9658886894074,
|
| 950 |
+
"display": "61.97"
|
| 951 |
+
},
|
| 952 |
+
"kudge_other": {
|
| 953 |
+
"value": 61.69658886894065,
|
| 954 |
+
"display": "61.70"
|
| 955 |
+
},
|
| 956 |
+
"openhermes": {
|
| 957 |
+
"value": 69.10256410256412,
|
| 958 |
+
"display": "69.10"
|
| 959 |
+
},
|
| 960 |
+
"openhermes_other": {
|
| 961 |
+
"value": 69.61538461538463,
|
| 962 |
+
"display": "69.62"
|
| 963 |
+
},
|
| 964 |
+
"vicuna": {
|
| 965 |
+
"value": 67.78571428571428,
|
| 966 |
+
"display": "67.79"
|
| 967 |
+
},
|
| 968 |
+
"vicuna_other": {
|
| 969 |
+
"value": 69.49999999999997,
|
| 970 |
+
"display": "69.50"
|
| 971 |
+
}
|
| 972 |
+
},
|
| 973 |
+
"ASR": {
|
| 974 |
+
"common_voice_korea": {
|
| 975 |
+
"value": 60.09560229445506,
|
| 976 |
+
"display": "60.10"
|
| 977 |
+
},
|
| 978 |
+
"common_voice_korea_other": {
|
| 979 |
+
"value": 58.98123324396782,
|
| 980 |
+
"display": "58.98"
|
| 981 |
+
},
|
| 982 |
+
"ksponspeech_eval_clean": {
|
| 983 |
+
"value": 62.62463680260875,
|
| 984 |
+
"display": "62.62"
|
| 985 |
+
},
|
| 986 |
+
"ksponspeech_eval_other": {
|
| 987 |
+
"value": 56.042240989512685,
|
| 988 |
+
"display": "56.04"
|
| 989 |
+
},
|
| 990 |
+
"zeroth_korean_test": {
|
| 991 |
+
"value": 40.92181667357943,
|
| 992 |
+
"display": "40.92"
|
| 993 |
+
},
|
| 994 |
+
"zeroth_korean_test_other": {
|
| 995 |
+
"value": 39.06574035669846,
|
| 996 |
+
"display": "39.07"
|
| 997 |
+
}
|
| 998 |
+
},
|
| 999 |
+
"Translation": {
|
| 1000 |
+
"etri_tst-COMMON": {
|
| 1001 |
+
"value": 23.06821405492274,
|
| 1002 |
+
"display": "23.07 / 45.95"
|
| 1003 |
+
},
|
| 1004 |
+
"etri_tst-HE": {
|
| 1005 |
+
"value": 26.078611466879202,
|
| 1006 |
+
"display": "26.08 / 50.16"
|
| 1007 |
+
}
|
| 1008 |
+
},
|
| 1009 |
+
"LSQA": {
|
| 1010 |
+
"mctest": {
|
| 1011 |
+
"value": 88.37920489296636,
|
| 1012 |
+
"display": "88.38"
|
| 1013 |
+
},
|
| 1014 |
+
"mctest_other": {
|
| 1015 |
+
"value": 89.29663608562691,
|
| 1016 |
+
"display": "89.30"
|
| 1017 |
+
}
|
| 1018 |
+
}
|
| 1019 |
+
}
|
| 1020 |
+
}
|
| 1021 |
+
]
|
| 1022 |
+
}
|
data/results_real/ASR/gemini_flash/common_voice_korea/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.13738049713193118,
|
| 6 |
+
"total_edit_distance": 1437,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 1678.7555549144745,
|
| 11 |
+
"timestamp": "2026-03-12T02:49:36.480317"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/common_voice_korea/prompt_v2/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.13738049713193118,
|
| 6 |
+
"total_edit_distance": 1437,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 1678.7555549144745,
|
| 11 |
+
"timestamp": "2026-03-12T02:49:36.480317"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/common_voice_korea_other/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.2673996175908222,
|
| 6 |
+
"total_edit_distance": 2797,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2389.414547920227,
|
| 11 |
+
"timestamp": "2026-03-12T03:29:28.746633"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.2673996175908222,
|
| 6 |
+
"total_edit_distance": 2797,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2389.414547920227,
|
| 11 |
+
"timestamp": "2026-03-12T03:29:28.746633"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 0.8318526725056962,
|
| 6 |
+
"total_edit_distance": 39795,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 14890.70679473877,
|
| 11 |
+
"timestamp": "2026-03-12T07:37:42.309543"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 0.8318526725056962,
|
| 6 |
+
"total_edit_distance": 39795,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 14890.70679473877,
|
| 11 |
+
"timestamp": "2026-03-12T07:37:42.309543"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/ksponspeech_eval_other/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 0.4513922315086276,
|
| 6 |
+
"total_edit_distance": 30947,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 14233.098718166351,
|
| 11 |
+
"timestamp": "2026-03-12T11:34:58.652305"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 0.4513922315086276,
|
| 6 |
+
"total_edit_distance": 30947,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 14233.098718166351,
|
| 11 |
+
"timestamp": "2026-03-12T11:34:58.652305"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 0.13599128992119452,
|
| 6 |
+
"total_edit_distance": 2623,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2212.4609863758087,
|
| 11 |
+
"timestamp": "2026-03-12T12:11:54.141376"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/zeroth_korean_test/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 0.13599128992119452,
|
| 6 |
+
"total_edit_distance": 2623,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2212.4609863758087,
|
| 11 |
+
"timestamp": "2026-03-12T12:11:54.141376"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 0.14558274574865201,
|
| 6 |
+
"total_edit_distance": 2808,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2302.8534002304077,
|
| 11 |
+
"timestamp": "2026-03-12T12:50:19.843093"
|
| 12 |
+
}
|
data/results_real/ASR/gemini_flash/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "gemini-2.5-flash-lite",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 0.14558274574865201,
|
| 6 |
+
"total_edit_distance": 2808,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 2302.8534002304077,
|
| 11 |
+
"timestamp": "2026-03-12T12:50:19.843093"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 1.4457934990439771,
|
| 6 |
+
"total_edit_distance": 15123,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 110.93934440612793,
|
| 11 |
+
"timestamp": "2026-03-06T09:23:46.623712"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v1/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 2.8693116634799236,
|
| 6 |
+
"total_edit_distance": 30013,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 307.95989894866943,
|
| 11 |
+
"timestamp": "2026-03-10T19:22:21.797930"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v2/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 1.6489483747609943,
|
| 6 |
+
"total_edit_distance": 17248,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 134.68821096420288,
|
| 11 |
+
"timestamp": "2026-03-06T09:21:55.660171"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v3/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 1.4457934990439771,
|
| 6 |
+
"total_edit_distance": 15123,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 110.93934440612793,
|
| 11 |
+
"timestamp": "2026-03-06T09:23:46.623712"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v4/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 1.8219885277246655,
|
| 6 |
+
"total_edit_distance": 19058,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 147.14897418022156,
|
| 11 |
+
"timestamp": "2026-03-06T09:26:13.796740"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 522,
|
| 5 |
+
"total_cer": 1.7834163155878973,
|
| 6 |
+
"total_edit_distance": 18626,
|
| 7 |
+
"total_ref_length": 10444,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 147.34291195869446,
|
| 11 |
+
"timestamp": "2026-03-06T09:36:14.543997"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v1/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 522,
|
| 5 |
+
"total_cer": 1.945710455764075,
|
| 6 |
+
"total_edit_distance": 20321,
|
| 7 |
+
"total_ref_length": 10444,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 185.2561948299408,
|
| 11 |
+
"timestamp": "2026-03-06T09:30:59.463344"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 522,
|
| 5 |
+
"total_cer": 2.036480275756415,
|
| 6 |
+
"total_edit_distance": 21269,
|
| 7 |
+
"total_ref_length": 10444,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 167.68726301193237,
|
| 11 |
+
"timestamp": "2026-03-06T09:33:47.176678"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v3/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 522,
|
| 5 |
+
"total_cer": 1.7834163155878973,
|
| 6 |
+
"total_edit_distance": 18626,
|
| 7 |
+
"total_ref_length": 10444,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 147.34291195869446,
|
| 11 |
+
"timestamp": "2026-03-06T09:36:14.543997"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v4/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 522,
|
| 5 |
+
"total_cer": 2.239946380697051,
|
| 6 |
+
"total_edit_distance": 23394,
|
| 7 |
+
"total_ref_length": 10444,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 191.64880776405334,
|
| 11 |
+
"timestamp": "2026-03-06T09:39:26.216829"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.4299420974518697,
|
| 6 |
+
"total_edit_distance": 68407,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 570.3824157714844,
|
| 11 |
+
"timestamp": "2026-03-06T10:10:14.044395"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v1/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.4493613996948096,
|
| 6 |
+
"total_edit_distance": 69336,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 603.2837119102478,
|
| 11 |
+
"timestamp": "2026-03-06T09:51:00.969666"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.5473985660235372,
|
| 6 |
+
"total_edit_distance": 74026,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 582.5036385059357,
|
| 11 |
+
"timestamp": "2026-03-06T10:00:43.569889"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v3/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.4299420974518697,
|
| 6 |
+
"total_edit_distance": 68407,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 570.3824157714844,
|
| 11 |
+
"timestamp": "2026-03-06T10:10:14.044395"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v4/ksponspeech_eval_clean_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_clean.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.804092894918372,
|
| 6 |
+
"total_edit_distance": 86306,
|
| 7 |
+
"total_ref_length": 47839,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 712.8236339092255,
|
| 11 |
+
"timestamp": "2026-03-06T10:22:06.959856"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.3076182558088654,
|
| 6 |
+
"total_edit_distance": 89649,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 584.8936712741852,
|
| 11 |
+
"timestamp": "2026-03-06T10:55:11.069525"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v1/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.3692585947869718,
|
| 6 |
+
"total_edit_distance": 93875,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 658.2554221153259,
|
| 11 |
+
"timestamp": "2026-03-06T10:34:39.983630"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.463206872912382,
|
| 6 |
+
"total_edit_distance": 100316,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 646.0058970451355,
|
| 11 |
+
"timestamp": "2026-03-06T10:45:26.085344"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v3/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.3076182558088654,
|
| 6 |
+
"total_edit_distance": 89649,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 584.8936712741852,
|
| 11 |
+
"timestamp": "2026-03-06T10:55:11.069525"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v4/ksponspeech_eval_other_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "ksponspeech_eval_other.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 3000,
|
| 5 |
+
"total_cer": 1.6952114237372191,
|
| 6 |
+
"total_edit_distance": 116222,
|
| 7 |
+
"total_ref_length": 68559,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 814.8545458316803,
|
| 11 |
+
"timestamp": "2026-03-06T11:08:46.021211"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v1/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.2059829946080465,
|
| 6 |
+
"total_edit_distance": 23261,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 121.40821027755737,
|
| 11 |
+
"timestamp": "2026-03-06T11:12:15.746632"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.150819162173372,
|
| 6 |
+
"total_edit_distance": 22197,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 85.42956852912903,
|
| 11 |
+
"timestamp": "2026-03-06T11:13:41.191670"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v3/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.0788054749066778,
|
| 6 |
+
"total_edit_distance": 20808,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 90.69133472442627,
|
| 11 |
+
"timestamp": "2026-03-06T11:15:11.905767"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v4/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.4709664039817503,
|
| 6 |
+
"total_edit_distance": 28372,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 155.58400297164917,
|
| 11 |
+
"timestamp": "2026-03-06T11:17:47.512263"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/zeroth_korean_test_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.0788054749066778,
|
| 6 |
+
"total_edit_distance": 20808,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 90.69133472442627,
|
| 11 |
+
"timestamp": "2026-03-06T11:15:11.905767"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v1/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.227447117378681,
|
| 6 |
+
"total_edit_distance": 23675,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํ๊ณ ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 128.4206943511963,
|
| 11 |
+
"timestamp": "2026-03-06T11:21:24.146182"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.294068851099129,
|
| 6 |
+
"total_edit_distance": 24960,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 118.1246964931488,
|
| 11 |
+
"timestamp": "2026-03-06T11:23:22.295773"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v3/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.1862297801742017,
|
| 6 |
+
"total_edit_distance": 22880,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 110.6101644039154,
|
| 11 |
+
"timestamp": "2026-03-06T11:25:12.929083"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v4/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.6815636665284115,
|
| 6 |
+
"total_edit_distance": 32434,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋งฅ๋ฝ์ผ๋ก ์ฃผ์ด์ง ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๋ ๋ค๋ฅธ ์ค๋ช
์์ด ์ ์ฌ๋ ํ๊ตญ์ด ํ
์คํธ๋ง ์๋ตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 198.1166172027588,
|
| 11 |
+
"timestamp": "2026-03-06T11:28:31.068015"
|
| 12 |
+
}
|
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "zeroth_korean_test_noisy.jsonl",
|
| 3 |
+
"model": "google/gemma-3n-E4B-it",
|
| 4 |
+
"total_samples": 457,
|
| 5 |
+
"total_cer": 1.1862297801742017,
|
| 6 |
+
"total_edit_distance": 22880,
|
| 7 |
+
"total_ref_length": 19288,
|
| 8 |
+
"normalization": "์ซ์ ๋ณํ ํ์ฉ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "์ ๊ณต๋ ์์ฑ์ ๋ด์ฉ์ ํ๊ตญ์ด๋ก ์ ํํ๊ฒ ์ ์ฌํด ์ฃผ์๊ณ ์ ์ฌ๋ ๋ฌธ์ฅ๋ง ์ถ๋ ฅํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 110.6101644039154,
|
| 11 |
+
"timestamp": "2026-03-06T11:25:12.929083"
|
| 12 |
+
}
|
data/results_real/ASR/gpt_realtime_mini/common_voice_korea/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "gpt-audio-mini",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.33049713193116637,
|
| 6 |
+
"total_edit_distance": 3457,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 480.61955213546753,
|
| 11 |
+
"timestamp": "2026-03-11T18:28:40.586049"
|
| 12 |
+
}
|
data/results_real/ASR/gpt_realtime_mini/common_voice_korea/prompt_v2/common_voice_korea_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea.jsonl",
|
| 3 |
+
"model": "gpt-audio-mini",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.33049713193116637,
|
| 6 |
+
"total_edit_distance": 3457,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 480.61955213546753,
|
| 11 |
+
"timestamp": "2026-03-11T18:28:40.586049"
|
| 12 |
+
}
|
data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "gpt-audio-mini",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.3621414913957935,
|
| 6 |
+
"total_edit_distance": 3788,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 486.3854024410248,
|
| 11 |
+
"timestamp": "2026-03-11T18:36:47.342577"
|
| 12 |
+
}
|
data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": "common_voice_korea_noisy.jsonl",
|
| 3 |
+
"model": "gpt-audio-mini",
|
| 4 |
+
"total_samples": 523,
|
| 5 |
+
"total_cer": 0.3621414913957935,
|
| 6 |
+
"total_edit_distance": 3788,
|
| 7 |
+
"total_ref_length": 10460,
|
| 8 |
+
"normalization": "๊ตฌ๋์ ์ ๊ฑฐ + ๊ณต๋ฐฑ ์ ๊ฑฐ",
|
| 9 |
+
"prompt": "๋ค์ ์์ฑ์ ํ๊ตญ์ด๋ก ์ ์ฌํด ์ฃผ์๊ณ ์ค๋ช
์ด๋ ์ถ๊ฐ ๋ฌธ์ฅ ์์ด ์ ์ฌ ๊ฒฐ๊ณผ๋ง ์ ๊ณตํด ์ฃผ์ธ์.",
|
| 10 |
+
"elapsed_time_seconds": 486.3854024410248,
|
| 11 |
+
"timestamp": "2026-03-11T18:36:47.342577"
|
| 12 |
+
}
|