tjdmstj commited on
Commit
11907d4
ยท
1 Parent(s): 53c4627

leaderboard

Browse files
This view is limited to 50 files because it contains too many changes. ย  See raw diff
Files changed (50) hide show
  1. README.md +34 -12
  2. app.py +1100 -0
  3. build_leaderboard_data.py +292 -0
  4. data/leaderboard-data.json +1022 -0
  5. data/results_real/ASR/gemini_flash/common_voice_korea/common_voice_korea_summary.json +12 -0
  6. data/results_real/ASR/gemini_flash/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
  7. data/results_real/ASR/gemini_flash/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
  8. data/results_real/ASR/gemini_flash/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
  9. data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json +12 -0
  10. data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json +12 -0
  11. data/results_real/ASR/gemini_flash/ksponspeech_eval_other/ksponspeech_eval_other_summary.json +12 -0
  12. data/results_real/ASR/gemini_flash/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json +12 -0
  13. data/results_real/ASR/gemini_flash/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json +12 -0
  14. data/results_real/ASR/gemini_flash/zeroth_korean_test/zeroth_korean_test_summary.json +12 -0
  15. data/results_real/ASR/gemini_flash/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json +12 -0
  16. data/results_real/ASR/gemini_flash/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json +12 -0
  17. data/results_real/ASR/gemma3n_vllm/common_voice_korea/common_voice_korea_summary.json +12 -0
  18. data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v1/common_voice_korea_summary.json +12 -0
  19. data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
  20. data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v3/common_voice_korea_summary.json +12 -0
  21. data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v4/common_voice_korea_summary.json +12 -0
  22. data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
  23. data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v1/common_voice_korea_noisy_summary.json +12 -0
  24. data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
  25. data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v3/common_voice_korea_noisy_summary.json +12 -0
  26. data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v4/common_voice_korea_noisy_summary.json +12 -0
  27. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json +12 -0
  28. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v1/ksponspeech_eval_clean_summary.json +12 -0
  29. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json +12 -0
  30. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v3/ksponspeech_eval_clean_summary.json +12 -0
  31. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v4/ksponspeech_eval_clean_summary.json +12 -0
  32. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/ksponspeech_eval_other_summary.json +12 -0
  33. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v1/ksponspeech_eval_other_summary.json +12 -0
  34. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json +12 -0
  35. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v3/ksponspeech_eval_other_summary.json +12 -0
  36. data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v4/ksponspeech_eval_other_summary.json +12 -0
  37. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v1/zeroth_korean_test_summary.json +12 -0
  38. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json +12 -0
  39. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v3/zeroth_korean_test_summary.json +12 -0
  40. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v4/zeroth_korean_test_summary.json +12 -0
  41. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/zeroth_korean_test_summary.json +12 -0
  42. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v1/zeroth_korean_test_noisy_summary.json +12 -0
  43. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json +12 -0
  44. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v3/zeroth_korean_test_noisy_summary.json +12 -0
  45. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v4/zeroth_korean_test_noisy_summary.json +12 -0
  46. data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json +12 -0
  47. data/results_real/ASR/gpt_realtime_mini/common_voice_korea/common_voice_korea_summary.json +12 -0
  48. data/results_real/ASR/gpt_realtime_mini/common_voice_korea/prompt_v2/common_voice_korea_summary.json +12 -0
  49. data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/common_voice_korea_noisy_summary.json +12 -0
  50. data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json +12 -0
README.md CHANGED
@@ -1,12 +1,34 @@
1
- ---
2
- title: KoALa Bench
3
- emoji: ๐Ÿ˜ป
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 6.10.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # KoALa-Bench Gradio
2
+
3
+ `KoALa-Bench`์˜ ์ •์  leaderboard ํŽ˜์ด์ง€๋ฅผ `gradio` ๊ธฐ๋ฐ˜ UI๋กœ ์˜ฎ๊ธด ๋ฒ„์ „์ž…๋‹ˆ๋‹ค.
4
+ ์›๋ณธ `data/`์™€ `images/`๋„ ํ•จ๊ป˜ ๋ณต์‚ฌํ•ด ๋‘” ์ƒํƒœ์ž…๋‹ˆ๋‹ค.
5
+
6
+ ## Run
7
+
8
+ ```bash
9
+ cd /data/esseo/PycharmProject/KoALa-Bench-gradio
10
+ pip install -r requirements.txt
11
+ python app.py
12
+ ```
13
+
14
+ If port `7860` is already in use:
15
+
16
+ ```bash
17
+ GRADIO_SERVER_PORT=7861 python app.py
18
+ ```
19
+
20
+ ## Rebuild leaderboard data
21
+
22
+ If `data/results_real` changes, rebuild `data/leaderboard-data.json` with:
23
+
24
+ ```bash
25
+ cd /data/esseo/PycharmProject/KoALa-Bench-gradio
26
+ python build_leaderboard_data.py
27
+ ```
28
+
29
+ ## Files
30
+
31
+ - `app.py`: leaderboard ๊ณ„์‚ฐ ๋กœ์ง + Gradio UI
32
+ - `build_leaderboard_data.py`: `results_real`๋ฅผ ์ˆœํšŒํ•ด `leaderboard-data.json` ์žฌ์ƒ์„ฑ
33
+ - `data/`: ์›๋ณธ leaderboard ๋ฐ ํ‰๊ฐ€ ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ ์ „์ฒด
34
+ - `images/`: ์›๋ณธ ๋ฑƒ์ง€/์•„์ด์ฝ˜ ์ด๋ฏธ์ง€
app.py ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ import json
5
+ import os
6
+ from functools import cmp_to_key
7
+ from html import escape
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import gradio as gr
12
+
13
+
14
+ HOME_VIEW = "HOME"
15
+ TASK_ORDER = [
16
+ "K-disentQA",
17
+ "SQA",
18
+ "Instruct",
19
+ "ASR",
20
+ "Translation",
21
+ "LSQA",
22
+ ]
23
+
24
+ ROOT = Path(__file__).parent
25
+ DATA_PATH = ROOT / "data" / "leaderboard-data.json"
26
+ IMAGES_DIR = ROOT / "images"
27
+
28
+
29
+ def to_num(value: Any) -> float | None:
30
+ try:
31
+ num = float(value)
32
+ except (TypeError, ValueError):
33
+ return None
34
+ return num
35
+
36
+
37
+ def average(values: list[float | None]) -> float | None:
38
+ valid = [value for value in values if value is not None]
39
+ if not valid:
40
+ return None
41
+ return sum(valid) / len(valid)
42
+
43
+
44
+ def compare_scores(left: float | None, right: float | None, lower_better: bool) -> int:
45
+ if left is None and right is None:
46
+ return 0
47
+ if left is None:
48
+ return 1
49
+ if right is None:
50
+ return -1
51
+ if left == right:
52
+ return 0
53
+ if lower_better:
54
+ return -1 if left < right else 1
55
+ return -1 if left > right else 1
56
+
57
+
58
+ def ordered_tasks(tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
59
+ def sort_key(task: dict[str, Any]) -> tuple[int, str]:
60
+ try:
61
+ task_index = TASK_ORDER.index(task["id"])
62
+ except ValueError:
63
+ task_index = 10**6
64
+ return task_index, task["label"]
65
+
66
+ return sorted(tasks, key=sort_key)
67
+
68
+
69
+ def dataset_ids(task: dict[str, Any]) -> list[str]:
70
+ return [dataset["id"] for dataset in task.get("datasets", [])]
71
+
72
+
73
+ def metric_value(entry: dict[str, Any], task_id: str, dataset_id: str) -> float | None:
74
+ dataset = entry.get("tasks", {}).get(task_id, {}).get(dataset_id)
75
+ if not dataset:
76
+ return None
77
+ return to_num(dataset.get("value"))
78
+
79
+
80
+ def metric_display(entry: dict[str, Any], task_id: str, dataset_id: str) -> str:
81
+ dataset = entry.get("tasks", {}).get(task_id, {}).get(dataset_id)
82
+ if not dataset:
83
+ return "-"
84
+ if dataset.get("display") is not None:
85
+ return str(dataset["display"])
86
+ if dataset.get("value") is None:
87
+ return "-"
88
+ return str(dataset["value"])
89
+
90
+
91
+ def compute_task_overall(entry: dict[str, Any], task: dict[str, Any]) -> float | None:
92
+ return average([metric_value(entry, task["id"], dataset_id) for dataset_id in dataset_ids(task)])
93
+
94
+
95
+ def normalize_task_scores(entries: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> dict[str, dict[str, float] | None]:
96
+ ranges: dict[str, dict[str, float] | None] = {}
97
+ for task in tasks:
98
+ values = [entry["task_overall"][task["id"]] for entry in entries if entry["task_overall"][task["id"]] is not None]
99
+ if not values:
100
+ ranges[task["id"]] = None
101
+ continue
102
+ ranges[task["id"]] = {"min": min(values), "max": max(values)}
103
+ return ranges
104
+
105
+
106
+ def normalized_score(value: float | None, score_range: dict[str, float] | None, lower_better: bool) -> float | None:
107
+ if value is None or score_range is None:
108
+ return None
109
+ if score_range["min"] == score_range["max"]:
110
+ return 100.0
111
+ if lower_better:
112
+ return ((score_range["max"] - value) / (score_range["max"] - score_range["min"])) * 100
113
+ return ((value - score_range["min"]) / (score_range["max"] - score_range["min"])) * 100
114
+
115
+
116
+ def enrich_entries(entries: list[dict[str, Any]], tasks: list[dict[str, Any]]) -> list[dict[str, Any]]:
117
+ entries_with_task_overall = []
118
+ for entry in entries:
119
+ task_overall = {}
120
+ for task in tasks:
121
+ task_overall[task["id"]] = compute_task_overall(entry, task)
122
+ entries_with_task_overall.append({**entry, "task_overall": task_overall})
123
+
124
+ task_ranges = normalize_task_scores(entries_with_task_overall, tasks)
125
+ enriched_entries = []
126
+ for entry in entries_with_task_overall:
127
+ normalized_task_scores = {}
128
+ for task in tasks:
129
+ normalized_task_scores[task["id"]] = normalized_score(
130
+ entry["task_overall"][task["id"]],
131
+ task_ranges[task["id"]],
132
+ task["lowerBetter"],
133
+ )
134
+ enriched_entries.append(
135
+ {
136
+ **entry,
137
+ "normalized_task_scores": normalized_task_scores,
138
+ "overall": average([normalized_task_scores[task["id"]] for task in tasks]),
139
+ }
140
+ )
141
+ return enriched_entries
142
+
143
+
144
+ def sort_overall(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
145
+ sorted_entries = sorted(
146
+ entries,
147
+ key=cmp_to_key(lambda left, right: compare_scores(left["overall"], right["overall"], False)),
148
+ )
149
+ return [{**entry, "rank": index} for index, entry in enumerate(sorted_entries, start=1)]
150
+
151
+
152
+ def sort_task(entries: list[dict[str, Any]], task: dict[str, Any], dataset_id: str) -> list[dict[str, Any]]:
153
+ def compare(left: dict[str, Any], right: dict[str, Any]) -> int:
154
+ left_value = left["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(left, task["id"], dataset_id)
155
+ right_value = right["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(right, task["id"], dataset_id)
156
+ return compare_scores(left_value, right_value, task["lowerBetter"])
157
+
158
+ sorted_entries = sorted(entries, key=cmp_to_key(compare))
159
+ return [{**entry, "rank": index} for index, entry in enumerate(sorted_entries, start=1)]
160
+
161
+
162
+ def metric_class(lower_better: bool, value: float | None) -> str:
163
+ if value is None:
164
+ return "muted"
165
+ return "metric-bad" if lower_better else "metric-good"
166
+
167
+
168
+ def fmt_score(value: float | None) -> str:
169
+ return "-" if value is None else f"{value:.2f}"
170
+
171
+
172
+ def image_data_uri(path: Path) -> str:
173
+ encoded = base64.b64encode(path.read_bytes()).decode("ascii")
174
+ suffix = path.suffix.lower().lstrip(".") or "png"
175
+ mime = "image/png" if suffix == "png" else f"image/{suffix}"
176
+ return f"data:{mime};base64,{encoded}"
177
+
178
+
179
+ def load_payload() -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
180
+ payload = json.loads(DATA_PATH.read_text(encoding="utf-8"))
181
+ tasks = ordered_tasks(payload.get("tasks", []))
182
+ entries = enrich_entries(payload.get("entries", []), tasks)
183
+ return tasks, entries
184
+
185
+
186
+ TASKS, ENTRIES = load_payload()
187
+ TASK_MAP = {task["id"]: task for task in TASKS}
188
+ RANKED_OVERALL = sort_overall(ENTRIES)
189
+ BADGE_IMAGES = {
190
+ 1: image_data_uri(IMAGES_DIR / "1st.png"),
191
+ 2: image_data_uri(IMAGES_DIR / "2nd.png"),
192
+ 3: image_data_uri(IMAGES_DIR / "3rd.png"),
193
+ }
194
+ EXTERNAL_LINK_IMAGE = image_data_uri(IMAGES_DIR / "external-link.png")
195
+
196
+
197
+ def menu_choices() -> list[tuple[str, str]]:
198
+ choices = [("Home\nOverall ranking", HOME_VIEW)]
199
+ for task in TASKS:
200
+ choices.append((f"{task['label']}\n{len(task['datasets'])} datasets", task["id"]))
201
+ return choices
202
+
203
+
204
+ def dataset_choices(task_id: str) -> list[str]:
205
+ task = TASK_MAP[task_id]
206
+ return ["Overall", *[dataset["id"] for dataset in task.get("datasets", [])]]
207
+
208
+
209
+ def render_rank_strip(entries: list[dict[str, Any]]) -> str:
210
+ cards = []
211
+ for entry in entries[:12]:
212
+ if entry["rank"] in BADGE_IMAGES:
213
+ badge = f'<img class="rank-badge-image" src="{BADGE_IMAGES[entry["rank"]]}" alt="{entry["rank"]} place" />'
214
+ else:
215
+ badge = f'<span class="rank-badge">#{entry["rank"]}</span>'
216
+ cards.append(
217
+ f"""
218
+ <article class="rank-pill">
219
+ <div class="rank-badge-wrap">{badge}</div>
220
+ <span class="rank-name">{escape(entry["rank_name"])}</span>
221
+ </article>
222
+ """
223
+ )
224
+ return f"""
225
+ <section class="section-card card">
226
+ <div class="section-head">
227
+ <h3>Top Ranking</h3>
228
+ </div>
229
+ <div class="rank-strip-list">
230
+ {"".join(cards)}
231
+ </div>
232
+ </section>
233
+ """
234
+
235
+
236
+ def render_home_table(entries: list[dict[str, Any]]) -> str:
237
+ header_top = [
238
+ '<th rowspan="2" class="rank-col col-rank">Rank</th>',
239
+ '<th rowspan="2" class="col-rankname">RankName</th>',
240
+ '<th rowspan="2" class="col-model">Model</th>',
241
+ '<th rowspan="2">URL</th>',
242
+ '<th rowspan="2">Overall</th>',
243
+ *[f'<th class="grouped" colspan="1">{escape(task["label"])}</th>' for task in TASKS],
244
+ ]
245
+ header_bottom = [f'<th>{escape(task["shortMetric"])}</th>' for task in TASKS]
246
+
247
+ rows = []
248
+ for entry in entries:
249
+ url = entry.get("url") or ""
250
+ if url:
251
+ url_cell = (
252
+ f'<a class="url-link" href="{escape(url)}" target="_blank" rel="noopener noreferrer" '
253
+ f'aria-label="External link"><img src="{EXTERNAL_LINK_IMAGE}" alt="" /></a>'
254
+ )
255
+ else:
256
+ url_cell = "-"
257
+
258
+ task_cells = []
259
+ for task in TASKS:
260
+ value = entry["task_overall"][task["id"]]
261
+ task_cells.append(f'<td><span class="{metric_class(task["lowerBetter"], value)}">{fmt_score(value)}</span></td>')
262
+
263
+ rows.append(
264
+ "<tr>"
265
+ f'<td class="rank-col col-rank">{entry["rank"]}</td>'
266
+ f'<td class="col-rankname">{escape(entry["rank_name"])}</td>'
267
+ f'<td class="col-model">{escape(entry.get("model") or entry["rank_name"])}</td>'
268
+ f"<td>{url_cell}</td>"
269
+ f"<td>{fmt_score(entry['overall'])}</td>"
270
+ f"{''.join(task_cells)}"
271
+ "</tr>"
272
+ )
273
+
274
+ return f"""
275
+ <section class="section-card card">
276
+ <div class="section-head">
277
+ <h3>Overall Leaderboard</h3>
278
+ </div>
279
+ <div class="table-scroll">
280
+ <table>
281
+ <colgroup>
282
+ <col class="col-rank" />
283
+ <col class="col-rankname" />
284
+ <col class="col-model" />
285
+ <col />
286
+ <col />
287
+ {"".join("<col />" for _ in TASKS)}
288
+ </colgroup>
289
+ <thead>
290
+ <tr>{"".join(header_top)}</tr>
291
+ <tr>{"".join(header_bottom)}</tr>
292
+ </thead>
293
+ <tbody>{"".join(rows)}</tbody>
294
+ </table>
295
+ </div>
296
+ </section>
297
+ """
298
+
299
+
300
+ def render_home() -> str:
301
+ return f"{render_rank_strip(RANKED_OVERALL)}{render_home_table(RANKED_OVERALL)}"
302
+
303
+
304
+ def render_task_title(task: dict[str, Any]) -> str:
305
+ return f"""
306
+ <div class="section-head">
307
+ <div>
308
+ <h3 class="task-title">Task : {escape(task["label"])}</h3>
309
+ </div>
310
+ </div>
311
+ """
312
+
313
+
314
+ def render_task_table(task: dict[str, Any], dataset_id: str) -> str:
315
+ ranked_entries = sort_task(ENTRIES, task, dataset_id)
316
+ active_label = "Overall"
317
+ if dataset_id != "Overall":
318
+ active_label = next(
319
+ (dataset["label"] for dataset in task["datasets"] if dataset["id"] == dataset_id),
320
+ dataset_id,
321
+ )
322
+
323
+ rows = []
324
+ for entry in ranked_entries:
325
+ numeric_value = entry["task_overall"][task["id"]] if dataset_id == "Overall" else metric_value(entry, task["id"], dataset_id)
326
+ display_value = fmt_score(numeric_value) if dataset_id == "Overall" else metric_display(entry, task["id"], dataset_id)
327
+ rows.append(
328
+ "<tr>"
329
+ f'<td class="rank-col col-rank">{entry["rank"]}</td>'
330
+ f'<td class="col-rankname">{escape(entry["rank_name"])}</td>'
331
+ f'<td class="col-model">{escape(entry.get("model") or entry["rank_name"])}</td>'
332
+ f'<td><span class="{metric_class(task["lowerBetter"], numeric_value)}">{escape(display_value)}</span></td>'
333
+ "</tr>"
334
+ )
335
+
336
+ return f"""
337
+ <div id="taskTableMount">
338
+ <div class="table-scroll">
339
+ <table class="task-performance-table">
340
+ <colgroup>
341
+ <col class="col-rank" />
342
+ <col class="col-rankname" />
343
+ <col class="col-model" />
344
+ <col />
345
+ </colgroup>
346
+ <thead>
347
+ <tr>
348
+ <th class="rank-col col-rank">Rank</th>
349
+ <th class="col-rankname">RankName</th>
350
+ <th class="col-model">Model</th>
351
+ <th>{escape(active_label)}</th>
352
+ </tr>
353
+ </thead>
354
+ <tbody>{"".join(rows)}</tbody>
355
+ </table>
356
+ </div>
357
+ </div>
358
+ """
359
+
360
+
361
+ def update_view(active_view: str, current_dataset: str | None) -> tuple[Any, Any, str, str, str]:
362
+ if active_view == HOME_VIEW:
363
+ first_task = TASKS[0]
364
+ return (
365
+ gr.update(visible=True),
366
+ gr.update(visible="hidden"),
367
+ gr.update(choices=dataset_choices(first_task["id"]), value="Overall"),
368
+ render_task_title(first_task),
369
+ first_task["metricLabel"],
370
+ render_task_table(first_task, "Overall"),
371
+ )
372
+
373
+ task = TASK_MAP[active_view]
374
+ choices = dataset_choices(active_view)
375
+ dataset_id = current_dataset if current_dataset in choices else "Overall"
376
+ return (
377
+ gr.update(visible="hidden"),
378
+ gr.update(visible=True),
379
+ gr.update(choices=choices, value=dataset_id),
380
+ render_task_title(task),
381
+ task["metricLabel"],
382
+ render_task_table(task, dataset_id),
383
+ )
384
+
385
+
386
+ CUSTOM_CSS = """
387
+ @import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;500;700&family=Noto+Sans+KR:wght@400;500;700&display=swap');
388
+
389
+ :root {
390
+ --bg: #f2f4f8;
391
+ --bg-strong: #dde5f2;
392
+ --panel: rgba(255, 255, 255, 0.84);
393
+ --panel-strong: #ffffff;
394
+ --text: #0c1730;
395
+ --muted: #66748b;
396
+ --line: rgba(12, 23, 48, 0.14);
397
+ --primary: #0e3a8a;
398
+ --accent: #c56b12;
399
+ --pending: #74674a;
400
+ --pending-bg: #f4ecd8;
401
+ --success: #0c8f61;
402
+ --danger: #b8612f;
403
+ --shadow: 0 14px 38px rgba(12, 23, 48, 0.08);
404
+ }
405
+
406
+ html, body, .gradio-container {
407
+ margin: 0 !important;
408
+ min-height: 100vh;
409
+ font-family: "Noto Sans KR", "Space Grotesk", sans-serif !important;
410
+ color: var(--text);
411
+ background:
412
+ radial-gradient(circle at 15% 0%, #dce8ff 0%, transparent 32%),
413
+ radial-gradient(circle at 95% 5%, #ffe4cf 0%, transparent 24%),
414
+ linear-gradient(180deg, #f8fbff 0%, #eef2f7 100%) !important;
415
+ }
416
+
417
+ .gradio-container {
418
+ max-width: 100% !important;
419
+ }
420
+
421
+ .app-root {
422
+ position: relative;
423
+ max-width: 1440px;
424
+ margin: 0 auto;
425
+ padding: 24px 20px 56px;
426
+ overflow-x: auto;
427
+ -webkit-overflow-scrolling: touch;
428
+ }
429
+
430
+ .bg-orb {
431
+ position: fixed;
432
+ border-radius: 999px;
433
+ filter: blur(70px);
434
+ opacity: 0.45;
435
+ pointer-events: none;
436
+ z-index: 0;
437
+ }
438
+
439
+ .orb-1 {
440
+ width: 280px;
441
+ height: 280px;
442
+ background: #a7c5ff;
443
+ top: -110px;
444
+ left: -80px;
445
+ }
446
+
447
+ .orb-2 {
448
+ width: 240px;
449
+ height: 240px;
450
+ background: #ffd2a4;
451
+ top: -70px;
452
+ right: -60px;
453
+ }
454
+
455
+ .layout-row {
456
+ position: relative;
457
+ z-index: 1;
458
+ flex-wrap: nowrap !important;
459
+ gap: 24px;
460
+ align-items: flex-start;
461
+ min-width: 1180px;
462
+ }
463
+
464
+ .sidebar-panel {
465
+ flex: 0 0 260px !important;
466
+ width: 260px !important;
467
+ min-width: 260px !important;
468
+ max-width: 260px !important;
469
+ position: sticky;
470
+ top: 20px;
471
+ align-self: start;
472
+ border: 1px solid var(--line);
473
+ border-radius: 26px;
474
+ background: rgba(7, 17, 40, 0.95);
475
+ color: #f5f7fb;
476
+ padding: 22px 18px;
477
+ box-shadow: var(--shadow);
478
+ }
479
+
480
+ .sidebar-head {
481
+ margin-bottom: 26px;
482
+ }
483
+
484
+ .sidebar-kicker, .kicker {
485
+ margin: 0;
486
+ letter-spacing: 0.12em;
487
+ text-transform: uppercase;
488
+ font-size: 12px;
489
+ }
490
+
491
+ .sidebar-head h1 {
492
+ margin: 8px 0 0;
493
+ font-size: 34px;
494
+ line-height: 0.95;
495
+ color: #f5f7fb;
496
+ }
497
+
498
+ .content-panel {
499
+ flex: 1 1 auto !important;
500
+ min-width: 0;
501
+ }
502
+
503
+ .hero {
504
+ margin-bottom: 20px;
505
+ }
506
+
507
+ .hero-topline {
508
+ display: flex;
509
+ align-items: flex-start;
510
+ justify-content: space-between;
511
+ gap: 20px;
512
+ text-align: center;
513
+ }
514
+
515
+ .hero-topline > div {
516
+ flex: 1;
517
+ }
518
+
519
+ .hero h2 {
520
+ margin: 8px 0 6px;
521
+ font-family: "Space Grotesk", sans-serif;
522
+ font-size: clamp(42px, 7vw, 76px);
523
+ line-height: 0.95;
524
+ color: var(--text);
525
+ }
526
+
527
+ .desc {
528
+ margin: 0 auto;
529
+ max-width: 760px;
530
+ color: var(--muted);
531
+ font-size: 17px;
532
+ }
533
+
534
+ .card {
535
+ background: var(--panel);
536
+ border: 1px solid var(--line);
537
+ border-radius: 24px;
538
+ box-shadow: var(--shadow);
539
+ backdrop-filter: blur(8px);
540
+ }
541
+
542
+ .section-card {
543
+ padding: 18px;
544
+ }
545
+
546
+ .section-head {
547
+ display: flex;
548
+ align-items: baseline;
549
+ justify-content: space-between;
550
+ gap: 12px;
551
+ margin-bottom: 12px;
552
+ }
553
+
554
+ .section-head h3,
555
+ .task-title {
556
+ margin: 0;
557
+ font-size: 28px;
558
+ font-family: "Space Grotesk", sans-serif;
559
+ }
560
+
561
+ .rank-strip-list {
562
+ display: grid;
563
+ grid-template-columns: repeat(6, minmax(120px, 1fr));
564
+ gap: 10px;
565
+ }
566
+
567
+ .rank-pill {
568
+ border: 1px solid var(--line);
569
+ border-radius: 16px;
570
+ padding: 12px;
571
+ background: linear-gradient(135deg, #ffffff 0%, var(--bg-strong) 100%);
572
+ }
573
+
574
+ .rank-badge-wrap {
575
+ min-height: 28px;
576
+ }
577
+
578
+ .rank-badge {
579
+ display: inline-block;
580
+ padding: 3px 8px;
581
+ border-radius: 999px;
582
+ background: var(--primary);
583
+ color: #fff;
584
+ font-size: 12px;
585
+ }
586
+
587
+ .rank-badge-image {
588
+ display: block;
589
+ width: auto;
590
+ height: 28px;
591
+ }
592
+
593
+ .rank-name {
594
+ display: block;
595
+ margin-top: 8px;
596
+ font-weight: 700;
597
+ }
598
+
599
+ .table-scroll {
600
+ overflow-x: auto;
601
+ overflow-y: visible;
602
+ border-radius: 0;
603
+ border: 1px solid #ffffff !important;
604
+ background: var(--panel-strong);
605
+ }
606
+
607
+ table {
608
+ width: 100%;
609
+ min-width: 1080px;
610
+ border-collapse: collapse;
611
+ border: 1px solid #ffffff !important;
612
+ }
613
+
614
+ .task-performance-table {
615
+ table-layout: fixed;
616
+ }
617
+
618
+ thead th {
619
+ background: #e8edf6;
620
+ border-bottom: 1px solid #ffffff !important;
621
+ white-space: nowrap;
622
+ }
623
+
624
+ thead tr:first-child th.grouped {
625
+ text-align: center;
626
+ }
627
+
628
+ th, td {
629
+ padding: 12px 14px;
630
+ text-align: left;
631
+ border-bottom: 1px solid #ffffff !important;
632
+ border-right: 1px solid #ffffff !important;
633
+ font-size: 14px;
634
+ }
635
+
636
+ th:first-child,
637
+ td:first-child {
638
+ border-left: 1px solid #ffffff !important;
639
+ }
640
+
641
+ th:last-child,
642
+ td:last-child {
643
+ border-right: 0;
644
+ }
645
+
646
+ .table-scroll table,
647
+ .table-scroll thead,
648
+ .table-scroll tbody,
649
+ .table-scroll tr,
650
+ .table-scroll th,
651
+ .table-scroll td {
652
+ border-color: #ffffff !important;
653
+ }
654
+
655
+ tbody tr:hover {
656
+ background: #f8fbff;
657
+ }
658
+
659
+ .rank-col {
660
+ font-family: "Space Grotesk", sans-serif;
661
+ font-weight: 700;
662
+ width: 72px;
663
+ }
664
+
665
+ .col-rank {
666
+ width: 96px;
667
+ }
668
+
669
+ .col-rankname {
670
+ width: 240px;
671
+ }
672
+
673
+ .col-model {
674
+ width: 320px;
675
+ }
676
+
677
+ .url-link {
678
+ display: inline-flex;
679
+ align-items: center;
680
+ justify-content: center;
681
+ width: 24px;
682
+ height: 24px;
683
+ }
684
+
685
+ .url-link img {
686
+ display: block;
687
+ width: 18px;
688
+ height: 18px;
689
+ }
690
+
691
+ .metric-good {
692
+ color: var(--success);
693
+ font-weight: 700;
694
+ }
695
+
696
+ .metric-bad {
697
+ color: var(--danger);
698
+ font-weight: 700;
699
+ }
700
+
701
+ .muted {
702
+ color: var(--muted);
703
+ }
704
+
705
+ .task-filters {
706
+ display: flex;
707
+ flex-wrap: nowrap;
708
+ align-items: flex-start;
709
+ gap: 12px;
710
+ margin-top: 12px;
711
+ width: 100%;
712
+ overflow-x: auto;
713
+ -webkit-overflow-scrolling: touch;
714
+ padding-bottom: 6px;
715
+ background: transparent !important;
716
+ border: 0 !important;
717
+ box-shadow: none !important;
718
+ }
719
+
720
+ .task-filters .dataset-wrap { flex: 0 0 620px; min-width: 620px; }
721
+ .task-filters .metric-wrap { flex: 0 0 340px; min-width: 340px; }
722
+
723
+ .task-filters .dataset-wrap,
724
+ .task-filters .metric-wrap,
725
+ .task-filters .dataset-wrap > div,
726
+ .task-filters .metric-wrap > div {
727
+ background: transparent !important;
728
+ border: 0 !important;
729
+ box-shadow: none !important;
730
+ }
731
+
732
+ /* Keep Dataset / Metric columns pinned to the same top baseline. */
733
+ .task-filters .dataset-wrap,
734
+ .task-filters .metric-wrap {
735
+ align-self: flex-start !important;
736
+ justify-self: flex-start !important;
737
+ margin-top: 0 !important;
738
+ padding-top: 0 !important;
739
+ }
740
+
741
+ .task-view-shell .filter-title {
742
+ margin: 0 0 6px 0 !important;
743
+ font-size: 13px !important;
744
+ color: var(--muted) !important;
745
+ line-height: 1.2 !important;
746
+ }
747
+
748
+ #taskTableMount {
749
+ margin-top: 18px;
750
+ }
751
+
752
+ .task-menu-radio {
753
+ gap: 8px;
754
+ background: transparent !important;
755
+ border: 0 !important;
756
+ box-shadow: none !important;
757
+ padding: 0 !important;
758
+ }
759
+
760
+ .task-menu-radio > div,
761
+ .task-menu-radio .block,
762
+ .task-menu-radio .gradio-radio,
763
+ .task-menu-radio .form {
764
+ background: transparent !important;
765
+ border: 0 !important;
766
+ box-shadow: none !important;
767
+ padding: 0 !important;
768
+ }
769
+
770
+ .task-menu-radio label > span,
771
+ .task-menu-radio label > div,
772
+ .task-menu-radio label .wrap {
773
+ white-space: pre-line !important;
774
+ }
775
+
776
+ .task-menu-radio label:has(input[type="radio"]) {
777
+ width: 100%;
778
+ margin: 0 !important;
779
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
780
+ background: rgba(255, 255, 255, 0.05) !important;
781
+ color: #f5f7fb !important;
782
+ border-radius: 14px !important;
783
+ padding: 12px 14px !important;
784
+ cursor: pointer !important;
785
+ min-height: 72px;
786
+ align-content: center;
787
+ box-shadow: none !important;
788
+ transition: background 0.18s ease, color 0.18s ease, border-color 0.18s ease;
789
+ }
790
+
791
+ .task-menu-radio label:has(input[type="radio"]):hover {
792
+ background: rgba(255, 255, 255, 0.1) !important;
793
+ }
794
+
795
+ .task-menu-radio label:has(input[type="radio"]:checked) {
796
+ background: linear-gradient(135deg, #fcf7eb 0%, #dfeaff 100%) !important;
797
+ color: var(--text) !important;
798
+ border-color: transparent !important;
799
+ }
800
+
801
+ .task-menu-radio input[type="radio"] {
802
+ display: none !important;
803
+ }
804
+
805
+ .task-menu-radio label span,
806
+ .task-menu-radio label div,
807
+ .task-menu-radio label p {
808
+ color: inherit !important;
809
+ }
810
+
811
+ .task-menu-radio label:has(input[type="radio"]) span,
812
+ .task-menu-radio label:has(input[type="radio"]) div {
813
+ color: #f5f7fb !important;
814
+ }
815
+
816
+ .task-menu-radio label:has(input[type="radio"]:checked) span,
817
+ .task-menu-radio label:has(input[type="radio"]:checked) div {
818
+ color: var(--text) !important;
819
+ }
820
+
821
+ .task-menu-radio .wrap,
822
+ .task-menu-radio label span:last-child {
823
+ opacity: 0.72;
824
+ font-size: 12px;
825
+ }
826
+
827
+ .task-view-shell .gradio-radio,
828
+ .task-view-shell .gradio-textbox {
829
+ margin: 0 !important;
830
+ min-width: 0 !important;
831
+ background: transparent !important;
832
+ border: 0 !important;
833
+ box-shadow: none !important;
834
+ }
835
+
836
+ .task-view-shell .gradio-radio label,
837
+ .task-view-shell .gradio-textbox label {
838
+ font-size: 13px !important;
839
+ color: var(--muted) !important;
840
+ }
841
+
842
+ .task-view-shell .dataset-radio {
843
+ background: transparent !important;
844
+ border: 0 !important;
845
+ box-shadow: none !important;
846
+ padding: 0 !important;
847
+ margin-left: 0 !important;
848
+ margin-bottom: 12px !important;
849
+ max-width: 100% !important;
850
+ width: 100% !important;
851
+ }
852
+
853
+ .task-view-shell .dataset-radio > div,
854
+ .task-view-shell .dataset-radio .block,
855
+ .task-view-shell .dataset-radio .form {
856
+ background: transparent !important;
857
+ border: 0 !important;
858
+ box-shadow: none !important;
859
+ padding: 0 !important;
860
+ display: grid !important;
861
+ grid-template-columns: repeat(2, minmax(180px, 1fr)) !important;
862
+ gap: 8px 10px !important;
863
+ align-items: start !important;
864
+ justify-content: start !important;
865
+ min-height: 86px !important;
866
+ align-content: start !important;
867
+ max-width: 100% !important;
868
+ width: 100% !important;
869
+ }
870
+
871
+ .task-view-shell .dataset-radio label:has(input[type="radio"]) {
872
+ margin: 0 !important;
873
+ border: 1px solid var(--line) !important;
874
+ background: rgba(255, 255, 255, 0.88) !important;
875
+ color: var(--text) !important;
876
+ border-radius: 10px !important;
877
+ padding: 0 12px !important;
878
+ height: 40px !important;
879
+ min-height: 40px !important;
880
+ width: auto !important;
881
+ display: flex !important;
882
+ align-items: center !important;
883
+ box-shadow: none !important;
884
+ justify-content: flex-start !important;
885
+ font-size: 14px !important;
886
+ line-height: 1.2 !important;
887
+ }
888
+
889
+ .task-view-shell .dataset-radio label:has(input[type="radio"]:checked) {
890
+ background: linear-gradient(135deg, #fcf7eb 0%, #dfeaff 100%) !important;
891
+ border-color: rgba(14, 58, 138, 0.22) !important;
892
+ font-weight: 700 !important;
893
+ }
894
+
895
+ .task-view-shell .dataset-radio input[type="radio"] {
896
+ display: none !important;
897
+ }
898
+
899
+ .task-view-shell .metric-field,
900
+ .task-view-shell .dataset-field {
901
+ align-self: flex-start !important;
902
+ background: transparent !important;
903
+ }
904
+
905
+ /* Remove any residual top spacing on metric box so it aligns with Dataset. */
906
+ .task-view-shell .metric-wrap .metric-field,
907
+ .task-view-shell .metric-wrap .gradio-textbox,
908
+ .task-view-shell .metric-wrap .gradio-textbox > div,
909
+ .task-view-shell .metric-wrap .gradio-textbox .block,
910
+ .task-view-shell .metric-wrap .gradio-textbox .form,
911
+ .task-view-shell .metric-wrap .gradio-textbox .wrap {
912
+ margin-top: 0 !important;
913
+ padding-top: 0 !important;
914
+ }
915
+
916
+ .task-view-shell .metric-field > div,
917
+ .task-view-shell .dataset-field > div {
918
+ background: transparent !important;
919
+ }
920
+
921
+ .task-view-shell .metric-field .gradio-textbox,
922
+ .task-view-shell .metric-field .gradio-textbox > div,
923
+ .task-view-shell .metric-field .gradio-textbox .block,
924
+ .task-view-shell .metric-field .gradio-textbox .form {
925
+ background: transparent !important;
926
+ border: 0 !important;
927
+ box-shadow: none !important;
928
+ }
929
+
930
+ .task-view-shell .metric-field,
931
+ .task-view-shell .metric-field .gradio-textbox,
932
+ .task-view-shell .metric-field .gradio-textbox .wrap {
933
+ width: 100% !important;
934
+ max-width: none !important;
935
+ }
936
+
937
+ .task-view-shell .metric-field .gradio-textbox .wrap,
938
+ .task-view-shell .metric-field .gradio-textbox textarea,
939
+ .task-view-shell .metric-field .gradio-textbox input {
940
+ height: 40px !important;
941
+ min-height: 40px !important;
942
+ width: 100% !important;
943
+ }
944
+
945
+ .task-view-shell input,
946
+ .task-view-shell textarea,
947
+ .task-view-shell .wrap-inner,
948
+ .task-view-shell button.secondary-down-arrow,
949
+ .task-view-shell .gradio-textbox .wrap {
950
+ border-radius: 12px !important;
951
+ }
952
+
953
+ .task-view-shell .gradio-textbox .wrap,
954
+ .task-view-shell .gradio-textbox textarea,
955
+ .task-view-shell .gradio-textbox input {
956
+ border: 1px solid var(--line) !important;
957
+ background: rgba(255, 255, 255, 0.88) !important;
958
+ color: var(--text) !important;
959
+ }
960
+
961
+ @media (max-width: 1280px) {
962
+ .layout-row {
963
+ min-width: 1120px;
964
+ }
965
+
966
+ .task-filters .dataset-wrap { flex-basis: 560px; min-width: 560px; }
967
+ .task-filters .metric-wrap { flex-basis: 320px; min-width: 320px; }
968
+
969
+ .task-view-shell .dataset-radio > div,
970
+ .task-view-shell .dataset-radio .block,
971
+ .task-view-shell .dataset-radio .form {
972
+ grid-template-columns: repeat(2, minmax(150px, 1fr)) !important;
973
+ }
974
+ }
975
+
976
+ @media (max-width: 980px) {
977
+ .layout-row {
978
+ min-width: 1040px;
979
+ }
980
+
981
+ .rank-strip-list {
982
+ grid-template-columns: repeat(2, minmax(0, 1fr));
983
+ }
984
+
985
+ .task-view-shell .dataset-radio > div,
986
+ .task-view-shell .dataset-radio .block,
987
+ .task-view-shell .dataset-radio .form {
988
+ grid-template-columns: repeat(2, minmax(140px, 1fr)) !important;
989
+ }
990
+ }
991
+
992
+ @media (max-width: 720px) {
993
+ .app-root {
994
+ padding: 16px 14px 40px;
995
+ }
996
+
997
+ .hero-topline {
998
+ flex-direction: column;
999
+ align-items: center;
1000
+ }
1001
+
1002
+ .hero h2 {
1003
+ font-size: 40px;
1004
+ }
1005
+
1006
+ .rank-strip-list {
1007
+ grid-template-columns: 1fr;
1008
+ }
1009
+
1010
+ .task-view-shell .dataset-radio > div,
1011
+ .task-view-shell .dataset-radio .block,
1012
+ .task-view-shell .dataset-radio .form {
1013
+ grid-template-columns: 1fr !important;
1014
+ }
1015
+ }
1016
+ """
1017
+
1018
+
1019
+ def build_app() -> gr.Blocks:
1020
+ with gr.Blocks(title="Ko-Speech-Eval Leaderboard", fill_width=True) as demo:
1021
+ with gr.Column(elem_classes=["app-root"]):
1022
+ gr.HTML('<div class="bg-orb orb-1"></div><div class="bg-orb orb-2"></div>')
1023
+ with gr.Row(elem_classes=["layout-row"]):
1024
+ with gr.Column(scale=0, min_width=260, elem_classes=["sidebar-panel"]):
1025
+ gr.HTML(
1026
+ """
1027
+ <div class="sidebar-head">
1028
+ <p class="sidebar-kicker">KoALa-bench</p>
1029
+ <h1>Leaderboard</h1>
1030
+ </div>
1031
+ """
1032
+ )
1033
+ menu = gr.Radio(
1034
+ choices=menu_choices(),
1035
+ value=HOME_VIEW,
1036
+ show_label=False,
1037
+ container=False,
1038
+ elem_classes=["task-menu", "task-menu-radio"],
1039
+ )
1040
+
1041
+ with gr.Column(scale=1, elem_classes=["content-panel"]):
1042
+ gr.HTML(
1043
+ """
1044
+ <header class="hero">
1045
+ <div class="hero-topline">
1046
+ <div>
1047
+ <p class="kicker">Korean Audio Language benchmark</p>
1048
+ <h2>Leaderboard for KoALa</h2>
1049
+ </div>
1050
+ </div>
1051
+ </header>
1052
+ """
1053
+ )
1054
+
1055
+ home_view = gr.HTML(render_home(), visible=True)
1056
+
1057
+ with gr.Column(visible="hidden", elem_classes=["task-view-shell", "section-card", "card"]) as task_view:
1058
+ task_title = gr.HTML()
1059
+ with gr.Row(elem_classes=["task-filters"]):
1060
+ with gr.Column(scale=3, min_width=420, elem_classes=["dataset-wrap"]):
1061
+ gr.HTML('<p class="filter-title">Dataset</p>')
1062
+ dataset_dropdown = gr.Radio(
1063
+ choices=dataset_choices(TASKS[0]["id"]),
1064
+ value="Overall",
1065
+ show_label=False,
1066
+ elem_classes=["dataset-field", "dataset-radio"],
1067
+ )
1068
+ with gr.Column(scale=2, min_width=280, elem_classes=["metric-wrap"]):
1069
+ gr.HTML('<p class="filter-title">Metric</p>')
1070
+ metric_text = gr.Textbox(
1071
+ show_label=False,
1072
+ interactive=False,
1073
+ elem_classes=["metric-field"],
1074
+ )
1075
+ task_table = gr.HTML()
1076
+
1077
+ menu.change(
1078
+ fn=update_view,
1079
+ inputs=[menu, dataset_dropdown],
1080
+ outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
1081
+ )
1082
+ dataset_dropdown.change(
1083
+ fn=update_view,
1084
+ inputs=[menu, dataset_dropdown],
1085
+ outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
1086
+ )
1087
+ demo.load(
1088
+ fn=update_view,
1089
+ inputs=[menu, dataset_dropdown],
1090
+ outputs=[home_view, task_view, dataset_dropdown, task_title, metric_text, task_table],
1091
+ )
1092
+ return demo
1093
+
1094
+
1095
+ if __name__ == "__main__":
1096
+ app = build_app()
1097
+ app.launch(
1098
+ server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
1099
+ css=CUSTOM_CSS,
1100
+ )
build_leaderboard_data.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ ROOT = Path(__file__).parent
10
+ RESULTS_ROOT = ROOT / "data" / "results_real"
11
+ LEADERBOARD_JSON = ROOT / "data" / "leaderboard-data.json"
12
+
13
+
14
+ CANONICAL_TASKS = [
15
+ {
16
+ "id": "K-disentQA",
17
+ "label": "SCA-QA",
18
+ "metricLabel": "Speech Context Faithfulness",
19
+ "shortMetric": "Faithfulness",
20
+ "lowerBetter": False,
21
+ "datasets": [
22
+ {"id": "history_after_chosun", "label": "History_after_chosun"},
23
+ {"id": "history_after_chosun_other", "label": "History_after_chosun Other"},
24
+ {"id": "history_before_chosun", "label": "History_before_chosun"},
25
+ {"id": "history_before_chosun_other", "label": "History_before_chosun Other"},
26
+ {"id": "k-sports", "label": "K-sports"},
27
+ {"id": "k-sports_other", "label": "K-sports Other"},
28
+ {"id": "kpop", "label": "K-pop"},
29
+ {"id": "kpop_other", "label": "K-pop Other"},
30
+ ],
31
+ },
32
+ {
33
+ "id": "SQA",
34
+ "label": "Speech QA",
35
+ "metricLabel": "Accuracy (%)",
36
+ "shortMetric": "Acc(%)",
37
+ "lowerBetter": False,
38
+ "datasets": [
39
+ {"id": "click", "label": "CLICk"},
40
+ {"id": "click_other", "label": "CLICk Other"},
41
+ {"id": "kobest_boolq", "label": "KoBest BoolQ"},
42
+ {"id": "kobest_boolq_other", "label": "KoBest BoolQ Other"},
43
+ ],
44
+ },
45
+ {
46
+ "id": "Instruct",
47
+ "label": "Speech Instruction",
48
+ "metricLabel": "Score (GPT-4o as Judge)",
49
+ "shortMetric": "Score (GPT-4o as Judge)",
50
+ "lowerBetter": False,
51
+ "datasets": [
52
+ {"id": "alpaca", "label": "Alpaca"},
53
+ {"id": "alpaca_other", "label": "Alpaca Other"},
54
+ {"id": "kudge", "label": "KUDGE"},
55
+ {"id": "kudge_other", "label": "KUDGE Other"},
56
+ {"id": "openhermes", "label": "OpenHermes"},
57
+ {"id": "openhermes_other", "label": "OpenHermes Other"},
58
+ {"id": "vicuna", "label": "Vicuna"},
59
+ {"id": "vicuna_other", "label": "Vicuna Other"},
60
+ ],
61
+ },
62
+ {
63
+ "id": "ASR",
64
+ "label": "ASR",
65
+ "metricLabel": "CER (%)",
66
+ "shortMetric": "CER",
67
+ "lowerBetter": True,
68
+ "datasets": [
69
+ {"id": "common_voice_korea", "label": "CommonVoice-KO"},
70
+ {"id": "common_voice_korea_other", "label": "CommonVoice-KO Other"},
71
+ {"id": "ksponspeech_eval_clean", "label": "KsponSpeech Clean"},
72
+ {"id": "ksponspeech_eval_other", "label": "KsponSpeech Other"},
73
+ {"id": "zeroth_korean_test", "label": "Zeroth-Korean"},
74
+ {"id": "zeroth_korean_test_other", "label": "Zeroth-Korean Other"},
75
+ ],
76
+ },
77
+ {
78
+ "id": "Translation",
79
+ "label": "Translation",
80
+ "metricLabel": "BLEU / METEOR",
81
+ "shortMetric": "BLEU / METEOR",
82
+ "lowerBetter": False,
83
+ "datasets": [
84
+ {"id": "etri_tst-COMMON", "label": "ETRI-TST-Common"},
85
+ {"id": "etri_tst-HE", "label": "ETRI-TST-HE"},
86
+ ],
87
+ },
88
+ {
89
+ "id": "LSQA",
90
+ "label": "Long Speech Understanding",
91
+ "metricLabel": "Accuracy (%)",
92
+ "shortMetric": "Acc(%)",
93
+ "lowerBetter": False,
94
+ "datasets": [
95
+ {"id": "mctest", "label": "MCTest"},
96
+ {"id": "mctest_other", "label": "MCTest Other"},
97
+ ],
98
+ },
99
+ ]
100
+
101
+
102
+ FOLDER_TO_DATASET_ID = {
103
+ "K-disentQA": {
104
+ "history_after_chosun": "history_after_chosun",
105
+ "history_after_chosun_other": "history_after_chosun_other",
106
+ "history_before_chosun": "history_before_chosun",
107
+ "history_before_chosun_other": "history_before_chosun_other",
108
+ "k-sports": "k-sports",
109
+ "k-sports_other": "k-sports_other",
110
+ "kpop": "kpop",
111
+ "kpop_other": "kpop_other",
112
+ },
113
+ "SQA": {
114
+ "click": "click",
115
+ "click_other": "click_other",
116
+ "kobest_boolq": "kobest_boolq",
117
+ "kobest_boolq_other": "kobest_boolq_other",
118
+ },
119
+ "Instruct": {
120
+ "alpaca": "alpaca",
121
+ "alpaca_other": "alpaca_other",
122
+ "kudge": "kudge",
123
+ "kudge_other": "kudge_other",
124
+ "openhermes": "openhermes",
125
+ "openhermes_other": "openhermes_other",
126
+ "vicuna": "vicuna",
127
+ "vicuna_other": "vicuna_other",
128
+ },
129
+ "ASR": {
130
+ "common_voice_korea": "common_voice_korea",
131
+ "common_voice_korea_other": "common_voice_korea_other",
132
+ "ksponspeech_eval_clean": "ksponspeech_eval_clean",
133
+ "ksponspeech_eval_other": "ksponspeech_eval_other",
134
+ "zeroth_korean_test": "zeroth_korean_test",
135
+ "zeroth_korean_test_other": "zeroth_korean_test_other",
136
+ },
137
+ "Translation": {
138
+ "etri_tst-COMMON": "etri_tst-COMMON",
139
+ "etri_tst-HE": "etri_tst-HE",
140
+ },
141
+ "LSQA": {
142
+ "mctest": "mctest",
143
+ "mctest_other": "mctest_other",
144
+ },
145
+ }
146
+
147
+
148
+ def load_existing_entry_meta() -> dict[str, dict[str, str]]:
149
+ if not LEADERBOARD_JSON.exists():
150
+ return {}
151
+
152
+ payload = json.loads(LEADERBOARD_JSON.read_text(encoding="utf-8"))
153
+ return {
154
+ entry["id"]: {
155
+ "rank_name": entry.get("rank_name", entry["id"]),
156
+ "model": entry.get("model", ""),
157
+ "url": entry.get("url", ""),
158
+ }
159
+ for entry in payload.get("entries", [])
160
+ }
161
+
162
+
163
+ def pick_summary(dataset_dir: Path) -> Path | None:
164
+ direct = sorted(path for path in dataset_dir.glob("*_summary.json") if path.is_file())
165
+ if direct:
166
+ return direct[0]
167
+
168
+ recursive = sorted(
169
+ dataset_dir.rglob("*_summary.json"),
170
+ key=lambda path: (len(path.relative_to(dataset_dir).parts), str(path)),
171
+ )
172
+ return recursive[0] if recursive else None
173
+
174
+
175
+ def extract_metric(task_name: str, payload: dict[str, Any]) -> dict[str, Any] | None:
176
+ if task_name == "K-disentQA":
177
+ value = payload.get("accuracy_speech")
178
+ if value is None:
179
+ return None
180
+ value *= 100
181
+ return {"value": value, "display": f"{value:.2f}"}
182
+
183
+ if task_name in {"SQA", "LSQA"}:
184
+ value = payload.get("accuracy_logit")
185
+ if value is None:
186
+ value = payload.get("accuracy_generation")
187
+ if value is None:
188
+ return None
189
+ value *= 100
190
+ return {"value": value, "display": f"{value:.2f}"}
191
+
192
+ if task_name == "Instruct":
193
+ value = payload.get("avg_gpt_score")
194
+ if value is None:
195
+ return None
196
+ value *= 100
197
+ return {"value": value, "display": f"{value:.2f}"}
198
+
199
+ if task_name == "ASR":
200
+ value = payload.get("total_cer")
201
+ if value is None:
202
+ return None
203
+ value *= 100
204
+ return {"value": value, "display": f"{value:.2f}"}
205
+
206
+ if task_name == "Translation":
207
+ bleu = payload.get("avg_bleu")
208
+ if bleu is None:
209
+ bleu = payload.get("corpus_bleu")
210
+ meteor = payload.get("avg_meteor")
211
+ if bleu is None:
212
+ return None
213
+ if meteor is None:
214
+ return {"value": bleu, "display": f"{bleu:.2f}"}
215
+ return {"value": bleu, "display": f"{bleu:.2f} / {meteor:.2f}"}
216
+
217
+ return None
218
+
219
+
220
+ def build_leaderboard_payload() -> dict[str, Any]:
221
+ if not RESULTS_ROOT.exists():
222
+ raise SystemExit(f"Missing results directory: {RESULTS_ROOT}")
223
+
224
+ existing_meta = load_existing_entry_meta()
225
+ entries: dict[str, dict[str, Any]] = {}
226
+
227
+ for task in CANONICAL_TASKS:
228
+ task_id = task["id"]
229
+ task_dir = RESULTS_ROOT / task_id
230
+ if not task_dir.exists():
231
+ continue
232
+
233
+ folder_map = FOLDER_TO_DATASET_ID[task_id]
234
+ for model_dir in sorted(path for path in task_dir.iterdir() if path.is_dir()):
235
+ model_id = model_dir.name
236
+ meta = existing_meta.get(model_id, {})
237
+ entry = entries.setdefault(
238
+ model_id,
239
+ {
240
+ "id": model_id,
241
+ "rank_name": meta.get("rank_name", model_id),
242
+ "model": meta.get("model", ""),
243
+ "url": meta.get("url", ""),
244
+ "tasks": {},
245
+ },
246
+ )
247
+ entry["tasks"].setdefault(task_id, {})
248
+
249
+ for dataset_dir in sorted(path for path in model_dir.iterdir() if path.is_dir()):
250
+ dataset_id = folder_map.get(dataset_dir.name)
251
+ if not dataset_id:
252
+ continue
253
+
254
+ summary_path = pick_summary(dataset_dir)
255
+ if summary_path is None:
256
+ continue
257
+
258
+ payload = json.loads(summary_path.read_text(encoding="utf-8"))
259
+ metric = extract_metric(task_id, payload)
260
+ if metric is None:
261
+ continue
262
+
263
+ if not entry["model"] and payload.get("model"):
264
+ entry["model"] = payload["model"]
265
+ entry["tasks"][task_id][dataset_id] = metric
266
+
267
+ for entry in entries.values():
268
+ if not entry["model"]:
269
+ entry["model"] = entry["id"]
270
+ for task in CANONICAL_TASKS:
271
+ entry["tasks"].setdefault(task["id"], {})
272
+
273
+ return {
274
+ "generatedAt": datetime.now(timezone.utc).isoformat(),
275
+ "sourceRoot": "data/results_real",
276
+ "tasks": CANONICAL_TASKS,
277
+ "entries": [entries[entry_id] for entry_id in sorted(entries)],
278
+ }
279
+
280
+
281
+ def main() -> None:
282
+ payload = build_leaderboard_payload()
283
+ LEADERBOARD_JSON.write_text(
284
+ json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
285
+ encoding="utf-8",
286
+ )
287
+ print(f"Wrote {LEADERBOARD_JSON}")
288
+ print(f"Entries: {len(payload['entries'])}")
289
+
290
+
291
+ if __name__ == "__main__":
292
+ main()
data/leaderboard-data.json ADDED
@@ -0,0 +1,1022 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "generatedAt": "2026-03-30T07:45:11.033924+00:00",
3
+ "sourceRoot": "data/results_real",
4
+ "tasks": [
5
+ {
6
+ "id": "K-disentQA",
7
+ "label": "SCA-QA",
8
+ "metricLabel": "Speech Context Faithfulness",
9
+ "shortMetric": "Faithfulness",
10
+ "lowerBetter": false,
11
+ "datasets": [
12
+ {
13
+ "id": "history_after_chosun",
14
+ "label": "History_after_chosun"
15
+ },
16
+ {
17
+ "id": "history_after_chosun_other",
18
+ "label": "History_after_chosun Other"
19
+ },
20
+ {
21
+ "id": "history_before_chosun",
22
+ "label": "History_before_chosun"
23
+ },
24
+ {
25
+ "id": "history_before_chosun_other",
26
+ "label": "History_before_chosun Other"
27
+ },
28
+ {
29
+ "id": "k-sports",
30
+ "label": "K-sports"
31
+ },
32
+ {
33
+ "id": "k-sports_other",
34
+ "label": "K-sports Other"
35
+ },
36
+ {
37
+ "id": "kpop",
38
+ "label": "K-pop"
39
+ },
40
+ {
41
+ "id": "kpop_other",
42
+ "label": "K-pop Other"
43
+ }
44
+ ]
45
+ },
46
+ {
47
+ "id": "SQA",
48
+ "label": "Speech QA",
49
+ "metricLabel": "Accuracy (%)",
50
+ "shortMetric": "Acc(%)",
51
+ "lowerBetter": false,
52
+ "datasets": [
53
+ {
54
+ "id": "click",
55
+ "label": "CLICk"
56
+ },
57
+ {
58
+ "id": "click_other",
59
+ "label": "CLICk Other"
60
+ },
61
+ {
62
+ "id": "kobest_boolq",
63
+ "label": "KoBest BoolQ"
64
+ },
65
+ {
66
+ "id": "kobest_boolq_other",
67
+ "label": "KoBest BoolQ Other"
68
+ }
69
+ ]
70
+ },
71
+ {
72
+ "id": "Instruct",
73
+ "label": "Speech Instruction",
74
+ "metricLabel": "Score (GPT-4o as Judge)",
75
+ "shortMetric": "Score (GPT-4o as Judge)",
76
+ "lowerBetter": false,
77
+ "datasets": [
78
+ {
79
+ "id": "alpaca",
80
+ "label": "Alpaca"
81
+ },
82
+ {
83
+ "id": "alpaca_other",
84
+ "label": "Alpaca Other"
85
+ },
86
+ {
87
+ "id": "kudge",
88
+ "label": "KUDGE"
89
+ },
90
+ {
91
+ "id": "kudge_other",
92
+ "label": "KUDGE Other"
93
+ },
94
+ {
95
+ "id": "openhermes",
96
+ "label": "OpenHermes"
97
+ },
98
+ {
99
+ "id": "openhermes_other",
100
+ "label": "OpenHermes Other"
101
+ },
102
+ {
103
+ "id": "vicuna",
104
+ "label": "Vicuna"
105
+ },
106
+ {
107
+ "id": "vicuna_other",
108
+ "label": "Vicuna Other"
109
+ }
110
+ ]
111
+ },
112
+ {
113
+ "id": "ASR",
114
+ "label": "ASR",
115
+ "metricLabel": "CER (%)",
116
+ "shortMetric": "CER",
117
+ "lowerBetter": true,
118
+ "datasets": [
119
+ {
120
+ "id": "common_voice_korea",
121
+ "label": "CommonVoice-KO"
122
+ },
123
+ {
124
+ "id": "common_voice_korea_other",
125
+ "label": "CommonVoice-KO Other"
126
+ },
127
+ {
128
+ "id": "ksponspeech_eval_clean",
129
+ "label": "KsponSpeech Clean"
130
+ },
131
+ {
132
+ "id": "ksponspeech_eval_other",
133
+ "label": "KsponSpeech Other"
134
+ },
135
+ {
136
+ "id": "zeroth_korean_test",
137
+ "label": "Zeroth-Korean"
138
+ },
139
+ {
140
+ "id": "zeroth_korean_test_other",
141
+ "label": "Zeroth-Korean Other"
142
+ }
143
+ ]
144
+ },
145
+ {
146
+ "id": "Translation",
147
+ "label": "Translation",
148
+ "metricLabel": "BLEU / METEOR",
149
+ "shortMetric": "BLEU / METEOR",
150
+ "lowerBetter": false,
151
+ "datasets": [
152
+ {
153
+ "id": "etri_tst-COMMON",
154
+ "label": "ETRI-TST-Common"
155
+ },
156
+ {
157
+ "id": "etri_tst-HE",
158
+ "label": "ETRI-TST-HE"
159
+ }
160
+ ]
161
+ },
162
+ {
163
+ "id": "LSQA",
164
+ "label": "Long Speech Understanding",
165
+ "metricLabel": "Accuracy (%)",
166
+ "shortMetric": "Acc(%)",
167
+ "lowerBetter": false,
168
+ "datasets": [
169
+ {
170
+ "id": "mctest",
171
+ "label": "MCTest"
172
+ },
173
+ {
174
+ "id": "mctest_other",
175
+ "label": "MCTest Other"
176
+ }
177
+ ]
178
+ }
179
+ ],
180
+ "entries": [
181
+ {
182
+ "id": "gemini_flash",
183
+ "rank_name": "gemini_flash",
184
+ "model": "gemini-2.5-flash-lite",
185
+ "url": "",
186
+ "tasks": {
187
+ "K-disentQA": {
188
+ "history_after_chosun": {
189
+ "value": 64.63414634146342,
190
+ "display": "64.63"
191
+ },
192
+ "history_after_chosun_other": {
193
+ "value": 58.536585365853654,
194
+ "display": "58.54"
195
+ },
196
+ "history_before_chosun": {
197
+ "value": 66.33663366336634,
198
+ "display": "66.34"
199
+ },
200
+ "history_before_chosun_other": {
201
+ "value": 65.34653465346535,
202
+ "display": "65.35"
203
+ },
204
+ "k-sports": {
205
+ "value": 86.36363636363636,
206
+ "display": "86.36"
207
+ },
208
+ "k-sports_other": {
209
+ "value": 84.0909090909091,
210
+ "display": "84.09"
211
+ },
212
+ "kpop": {
213
+ "value": 69.90291262135922,
214
+ "display": "69.90"
215
+ },
216
+ "kpop_other": {
217
+ "value": 73.7864077669903,
218
+ "display": "73.79"
219
+ }
220
+ },
221
+ "SQA": {
222
+ "click": {
223
+ "value": 60.48053024026512,
224
+ "display": "60.48"
225
+ },
226
+ "click_other": {
227
+ "value": 60.56338028169014,
228
+ "display": "60.56"
229
+ },
230
+ "kobest_boolq": {
231
+ "value": 51.967799642218246,
232
+ "display": "51.97"
233
+ },
234
+ "kobest_boolq_other": {
235
+ "value": 50.44722719141323,
236
+ "display": "50.45"
237
+ }
238
+ },
239
+ "Instruct": {
240
+ "alpaca": {
241
+ "value": 85.94202898550726,
242
+ "display": "85.94"
243
+ },
244
+ "alpaca_other": {
245
+ "value": 86.8840579710145,
246
+ "display": "86.88"
247
+ },
248
+ "kudge": {
249
+ "value": 70.28725314183116,
250
+ "display": "70.29"
251
+ },
252
+ "kudge_other": {
253
+ "value": 70.3859964093356,
254
+ "display": "70.39"
255
+ },
256
+ "openhermes": {
257
+ "value": 82.69230769230771,
258
+ "display": "82.69"
259
+ },
260
+ "openhermes_other": {
261
+ "value": 80.70512820512825,
262
+ "display": "80.71"
263
+ },
264
+ "vicuna": {
265
+ "value": 76.42857142857144,
266
+ "display": "76.43"
267
+ },
268
+ "vicuna_other": {
269
+ "value": 73.28571428571429,
270
+ "display": "73.29"
271
+ }
272
+ },
273
+ "ASR": {
274
+ "common_voice_korea": {
275
+ "value": 13.738049713193117,
276
+ "display": "13.74"
277
+ },
278
+ "common_voice_korea_other": {
279
+ "value": 26.73996175908222,
280
+ "display": "26.74"
281
+ },
282
+ "ksponspeech_eval_clean": {
283
+ "value": 83.18526725056962,
284
+ "display": "83.19"
285
+ },
286
+ "ksponspeech_eval_other": {
287
+ "value": 45.13922315086276,
288
+ "display": "45.14"
289
+ },
290
+ "zeroth_korean_test": {
291
+ "value": 13.599128992119452,
292
+ "display": "13.60"
293
+ },
294
+ "zeroth_korean_test_other": {
295
+ "value": 14.558274574865202,
296
+ "display": "14.56"
297
+ }
298
+ },
299
+ "Translation": {
300
+ "etri_tst-COMMON": {
301
+ "value": 22.879423047941344,
302
+ "display": "22.88 / 47.15"
303
+ },
304
+ "etri_tst-HE": {
305
+ "value": 26.953160916836307,
306
+ "display": "26.95 / 52.59"
307
+ }
308
+ },
309
+ "LSQA": {
310
+ "mctest": {
311
+ "value": 93.88379204892966,
312
+ "display": "93.88"
313
+ },
314
+ "mctest_other": {
315
+ "value": 94.18960244648318,
316
+ "display": "94.19"
317
+ }
318
+ }
319
+ }
320
+ },
321
+ {
322
+ "id": "gemma3n_vllm",
323
+ "rank_name": "gemma3n_vllm",
324
+ "model": "google/gemma-3n-E4B-it",
325
+ "url": "",
326
+ "tasks": {
327
+ "K-disentQA": {
328
+ "history_after_chosun": {
329
+ "value": 50.0,
330
+ "display": "50.00"
331
+ },
332
+ "history_after_chosun_other": {
333
+ "value": 48.78048780487805,
334
+ "display": "48.78"
335
+ },
336
+ "history_before_chosun": {
337
+ "value": 69.3069306930693,
338
+ "display": "69.31"
339
+ },
340
+ "history_before_chosun_other": {
341
+ "value": 68.31683168316832,
342
+ "display": "68.32"
343
+ },
344
+ "k-sports": {
345
+ "value": 76.13636363636364,
346
+ "display": "76.14"
347
+ },
348
+ "k-sports_other": {
349
+ "value": 73.86363636363636,
350
+ "display": "73.86"
351
+ },
352
+ "kpop": {
353
+ "value": 65.0485436893204,
354
+ "display": "65.05"
355
+ },
356
+ "kpop_other": {
357
+ "value": 64.07766990291263,
358
+ "display": "64.08"
359
+ }
360
+ },
361
+ "SQA": {
362
+ "click": {
363
+ "value": 35.294117647058826,
364
+ "display": "35.29"
365
+ },
366
+ "click_other": {
367
+ "value": 35.708367854183926,
368
+ "display": "35.71"
369
+ },
370
+ "kobest_boolq": {
371
+ "value": 50.715563506261184,
372
+ "display": "50.72"
373
+ },
374
+ "kobest_boolq_other": {
375
+ "value": 51.25223613595706,
376
+ "display": "51.25"
377
+ }
378
+ },
379
+ "Instruct": {
380
+ "alpaca": {
381
+ "value": 82.97101449275362,
382
+ "display": "82.97"
383
+ },
384
+ "alpaca_other": {
385
+ "value": 83.36231884057973,
386
+ "display": "83.36"
387
+ },
388
+ "kudge": {
389
+ "value": 71.38240574506275,
390
+ "display": "71.38"
391
+ },
392
+ "kudge_other": {
393
+ "value": 70.69120287253139,
394
+ "display": "70.69"
395
+ },
396
+ "openhermes": {
397
+ "value": 84.61538461538464,
398
+ "display": "84.62"
399
+ },
400
+ "openhermes_other": {
401
+ "value": 85.96153846153848,
402
+ "display": "85.96"
403
+ },
404
+ "vicuna": {
405
+ "value": 80.21428571428574,
406
+ "display": "80.21"
407
+ },
408
+ "vicuna_other": {
409
+ "value": 80.00000000000003,
410
+ "display": "80.00"
411
+ }
412
+ },
413
+ "ASR": {
414
+ "common_voice_korea": {
415
+ "value": 144.5793499043977,
416
+ "display": "144.58"
417
+ },
418
+ "common_voice_korea_other": {
419
+ "value": 178.34163155878971,
420
+ "display": "178.34"
421
+ },
422
+ "ksponspeech_eval_clean": {
423
+ "value": 142.99420974518696,
424
+ "display": "142.99"
425
+ },
426
+ "ksponspeech_eval_other": {
427
+ "value": 130.76182558088652,
428
+ "display": "130.76"
429
+ },
430
+ "zeroth_korean_test": {
431
+ "value": 107.88054749066778,
432
+ "display": "107.88"
433
+ },
434
+ "zeroth_korean_test_other": {
435
+ "value": 118.62297801742017,
436
+ "display": "118.62"
437
+ }
438
+ },
439
+ "Translation": {
440
+ "etri_tst-COMMON": {
441
+ "value": 5.1704245727847375,
442
+ "display": "5.17 / 16.97"
443
+ },
444
+ "etri_tst-HE": {
445
+ "value": 5.235146756138342,
446
+ "display": "5.24 / 17.50"
447
+ }
448
+ },
449
+ "LSQA": {
450
+ "mctest": {
451
+ "value": 47.706422018348626,
452
+ "display": "47.71"
453
+ },
454
+ "mctest_other": {
455
+ "value": 48.62385321100918,
456
+ "display": "48.62"
457
+ }
458
+ }
459
+ }
460
+ },
461
+ {
462
+ "id": "gpt_realtime_mini",
463
+ "rank_name": "gpt_realtime_mini",
464
+ "model": "gpt-audio-mini",
465
+ "url": "",
466
+ "tasks": {
467
+ "K-disentQA": {
468
+ "history_after_chosun": {
469
+ "value": 45.1219512195122,
470
+ "display": "45.12"
471
+ },
472
+ "history_after_chosun_other": {
473
+ "value": 42.68292682926829,
474
+ "display": "42.68"
475
+ },
476
+ "history_before_chosun": {
477
+ "value": 64.35643564356435,
478
+ "display": "64.36"
479
+ },
480
+ "history_before_chosun_other": {
481
+ "value": 62.37623762376238,
482
+ "display": "62.38"
483
+ },
484
+ "k-sports": {
485
+ "value": 45.45454545454545,
486
+ "display": "45.45"
487
+ },
488
+ "k-sports_other": {
489
+ "value": 46.590909090909086,
490
+ "display": "46.59"
491
+ },
492
+ "kpop": {
493
+ "value": 41.74757281553398,
494
+ "display": "41.75"
495
+ },
496
+ "kpop_other": {
497
+ "value": 40.77669902912621,
498
+ "display": "40.78"
499
+ }
500
+ },
501
+ "SQA": {
502
+ "click": {
503
+ "value": 61.64043082021541,
504
+ "display": "61.64"
505
+ },
506
+ "click_other": {
507
+ "value": 60.06628003314002,
508
+ "display": "60.07"
509
+ },
510
+ "kobest_boolq": {
511
+ "value": 51.878354203935594,
512
+ "display": "51.88"
513
+ },
514
+ "kobest_boolq_other": {
515
+ "value": 50.44722719141323,
516
+ "display": "50.45"
517
+ }
518
+ },
519
+ "Instruct": {
520
+ "alpaca": {
521
+ "value": 90.57971014492755,
522
+ "display": "90.58"
523
+ },
524
+ "alpaca_other": {
525
+ "value": 90.57971014492755,
526
+ "display": "90.58"
527
+ },
528
+ "kudge": {
529
+ "value": 74.06642728904846,
530
+ "display": "74.07"
531
+ },
532
+ "kudge_other": {
533
+ "value": 73.98563734290842,
534
+ "display": "73.99"
535
+ },
536
+ "openhermes": {
537
+ "value": 89.42307692307693,
538
+ "display": "89.42"
539
+ },
540
+ "openhermes_other": {
541
+ "value": 89.61538461538464,
542
+ "display": "89.62"
543
+ },
544
+ "vicuna": {
545
+ "value": 82.14285714285717,
546
+ "display": "82.14"
547
+ },
548
+ "vicuna_other": {
549
+ "value": 81.78571428571429,
550
+ "display": "81.79"
551
+ }
552
+ },
553
+ "ASR": {
554
+ "common_voice_korea": {
555
+ "value": 33.04971319311664,
556
+ "display": "33.05"
557
+ },
558
+ "common_voice_korea_other": {
559
+ "value": 36.21414913957935,
560
+ "display": "36.21"
561
+ },
562
+ "ksponspeech_eval_clean": {
563
+ "value": 134.18967787788205,
564
+ "display": "134.19"
565
+ },
566
+ "ksponspeech_eval_other": {
567
+ "value": 63.6444522236322,
568
+ "display": "63.64"
569
+ },
570
+ "zeroth_korean_test": {
571
+ "value": 100.0,
572
+ "display": "100.00"
573
+ },
574
+ "zeroth_korean_test_other": {
575
+ "value": 100.0,
576
+ "display": "100.00"
577
+ }
578
+ },
579
+ "Translation": {
580
+ "etri_tst-COMMON": {
581
+ "value": 26.06340955434567,
582
+ "display": "26.06 / 49.37"
583
+ },
584
+ "etri_tst-HE": {
585
+ "value": 28.984282512215568,
586
+ "display": "28.98 / 52.96"
587
+ }
588
+ },
589
+ "LSQA": {
590
+ "mctest": {
591
+ "value": 84.09785932721712,
592
+ "display": "84.10"
593
+ },
594
+ "mctest_other": {
595
+ "value": 82.87461773700305,
596
+ "display": "82.87"
597
+ }
598
+ }
599
+ }
600
+ },
601
+ {
602
+ "id": "qwen",
603
+ "rank_name": "qwen",
604
+ "model": "qwen",
605
+ "url": "",
606
+ "tasks": {
607
+ "K-disentQA": {
608
+ "history_after_chosun": {
609
+ "value": 67.07317073170732,
610
+ "display": "67.07"
611
+ },
612
+ "history_after_chosun_other": {
613
+ "value": 63.41463414634146,
614
+ "display": "63.41"
615
+ },
616
+ "history_before_chosun": {
617
+ "value": 72.27722772277228,
618
+ "display": "72.28"
619
+ },
620
+ "history_before_chosun_other": {
621
+ "value": 66.33663366336634,
622
+ "display": "66.34"
623
+ },
624
+ "k-sports": {
625
+ "value": 88.63636363636364,
626
+ "display": "88.64"
627
+ },
628
+ "k-sports_other": {
629
+ "value": 87.5,
630
+ "display": "87.50"
631
+ },
632
+ "kpop": {
633
+ "value": 81.55339805825243,
634
+ "display": "81.55"
635
+ },
636
+ "kpop_other": {
637
+ "value": 81.55339805825243,
638
+ "display": "81.55"
639
+ }
640
+ },
641
+ "SQA": {
642
+ "click": {
643
+ "value": 33.471416735708374,
644
+ "display": "33.47"
645
+ },
646
+ "click_other": {
647
+ "value": 32.64291632145816,
648
+ "display": "32.64"
649
+ },
650
+ "kobest_boolq": {
651
+ "value": 50.62611806797853,
652
+ "display": "50.63"
653
+ },
654
+ "kobest_boolq_other": {
655
+ "value": 50.62611806797853,
656
+ "display": "50.63"
657
+ }
658
+ },
659
+ "Instruct": {
660
+ "alpaca": {
661
+ "value": 55.79710144927535,
662
+ "display": "55.80"
663
+ },
664
+ "alpaca_other": {
665
+ "value": 55.43478260869564,
666
+ "display": "55.43"
667
+ },
668
+ "kudge": {
669
+ "value": 58.88888888888886,
670
+ "display": "58.89"
671
+ },
672
+ "kudge_other": {
673
+ "value": 55.25134649910226,
674
+ "display": "55.25"
675
+ },
676
+ "openhermes": {
677
+ "value": 59.679487179487175,
678
+ "display": "59.68"
679
+ },
680
+ "openhermes_other": {
681
+ "value": 59.294871794871796,
682
+ "display": "59.29"
683
+ },
684
+ "vicuna": {
685
+ "value": 63.28571428571424,
686
+ "display": "63.29"
687
+ },
688
+ "vicuna_other": {
689
+ "value": 61.49999999999998,
690
+ "display": "61.50"
691
+ }
692
+ },
693
+ "ASR": {
694
+ "common_voice_korea": {
695
+ "value": 33.55640535372849,
696
+ "display": "33.56"
697
+ },
698
+ "common_voice_korea_other": {
699
+ "value": 38.14630409804673,
700
+ "display": "38.15"
701
+ },
702
+ "ksponspeech_eval_clean": {
703
+ "value": 39.99665544848345,
704
+ "display": "40.00"
705
+ },
706
+ "ksponspeech_eval_other": {
707
+ "value": 35.66417246459254,
708
+ "display": "35.66"
709
+ },
710
+ "zeroth_korean_test": {
711
+ "value": 22.169224388220655,
712
+ "display": "22.17"
713
+ },
714
+ "zeroth_korean_test_other": {
715
+ "value": 25.036291995022815,
716
+ "display": "25.04"
717
+ }
718
+ },
719
+ "Translation": {
720
+ "etri_tst-COMMON": {
721
+ "value": 15.98267809185928,
722
+ "display": "15.98 / 36.78"
723
+ },
724
+ "etri_tst-HE": {
725
+ "value": 19.09056943080292,
726
+ "display": "19.09 / 40.77"
727
+ }
728
+ },
729
+ "LSQA": {
730
+ "mctest": {
731
+ "value": 51.07033639143731,
732
+ "display": "51.07"
733
+ },
734
+ "mctest_other": {
735
+ "value": 50.45871559633027,
736
+ "display": "50.46"
737
+ }
738
+ }
739
+ }
740
+ },
741
+ {
742
+ "id": "qwen3_vllm",
743
+ "rank_name": "qwen3_vllm",
744
+ "model": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
745
+ "url": "",
746
+ "tasks": {
747
+ "K-disentQA": {
748
+ "history_after_chosun": {
749
+ "value": 84.14634146341463,
750
+ "display": "84.15"
751
+ },
752
+ "history_after_chosun_other": {
753
+ "value": 82.92682926829268,
754
+ "display": "82.93"
755
+ },
756
+ "history_before_chosun": {
757
+ "value": 94.05940594059405,
758
+ "display": "94.06"
759
+ },
760
+ "history_before_chosun_other": {
761
+ "value": 94.05940594059405,
762
+ "display": "94.06"
763
+ },
764
+ "k-sports": {
765
+ "value": 88.63636363636364,
766
+ "display": "88.64"
767
+ },
768
+ "k-sports_other": {
769
+ "value": 89.77272727272727,
770
+ "display": "89.77"
771
+ },
772
+ "kpop": {
773
+ "value": 88.3495145631068,
774
+ "display": "88.35"
775
+ },
776
+ "kpop_other": {
777
+ "value": 85.43689320388349,
778
+ "display": "85.44"
779
+ }
780
+ },
781
+ "SQA": {
782
+ "click": {
783
+ "value": 63.96023198011599,
784
+ "display": "63.96"
785
+ },
786
+ "click_other": {
787
+ "value": 61.55758077879039,
788
+ "display": "61.56"
789
+ },
790
+ "kobest_boolq": {
791
+ "value": 51.341681574239715,
792
+ "display": "51.34"
793
+ },
794
+ "kobest_boolq_other": {
795
+ "value": 51.43112701252236,
796
+ "display": "51.43"
797
+ }
798
+ },
799
+ "Instruct": {
800
+ "alpaca": {
801
+ "value": 84.05797101449278,
802
+ "display": "84.06"
803
+ },
804
+ "alpaca_other": {
805
+ "value": 83.04347826086959,
806
+ "display": "83.04"
807
+ },
808
+ "kudge": {
809
+ "value": 71.86714542190298,
810
+ "display": "71.87"
811
+ },
812
+ "kudge_other": {
813
+ "value": 71.82226211849184,
814
+ "display": "71.82"
815
+ },
816
+ "openhermes": {
817
+ "value": 86.5384615384616,
818
+ "display": "86.54"
819
+ },
820
+ "openhermes_other": {
821
+ "value": 85.19230769230771,
822
+ "display": "85.19"
823
+ },
824
+ "vicuna": {
825
+ "value": 79.64285714285715,
826
+ "display": "79.64"
827
+ },
828
+ "vicuna_other": {
829
+ "value": 78.42857142857143,
830
+ "display": "78.43"
831
+ }
832
+ },
833
+ "ASR": {
834
+ "common_voice_korea": {
835
+ "value": 4.961759082217973,
836
+ "display": "4.96"
837
+ },
838
+ "common_voice_korea_other": {
839
+ "value": 6.779011872845652,
840
+ "display": "6.78"
841
+ },
842
+ "ksponspeech_eval_clean": {
843
+ "value": 8.459624992161208,
844
+ "display": "8.46"
845
+ },
846
+ "ksponspeech_eval_other": {
847
+ "value": 7.907058154290465,
848
+ "display": "7.91"
849
+ },
850
+ "zeroth_korean_test": {
851
+ "value": 3.3336789713811696,
852
+ "display": "3.33"
853
+ },
854
+ "zeroth_korean_test_other": {
855
+ "value": 3.9143508917461634,
856
+ "display": "3.91"
857
+ }
858
+ },
859
+ "Translation": {
860
+ "etri_tst-COMMON": {
861
+ "value": 28.528409006302443,
862
+ "display": "28.53 / 52.02"
863
+ },
864
+ "etri_tst-HE": {
865
+ "value": 31.70803814468474,
866
+ "display": "31.71 / 55.79"
867
+ }
868
+ },
869
+ "LSQA": {
870
+ "mctest": {
871
+ "value": 92.04892966360856,
872
+ "display": "92.05"
873
+ },
874
+ "mctest_other": {
875
+ "value": 95.71865443425077,
876
+ "display": "95.72"
877
+ }
878
+ }
879
+ }
880
+ },
881
+ {
882
+ "id": "voxtral3b",
883
+ "rank_name": "voxtral3b",
884
+ "model": "mistralai/Voxtral-Mini-3B-2507",
885
+ "url": "",
886
+ "tasks": {
887
+ "K-disentQA": {
888
+ "history_after_chosun": {
889
+ "value": 81.70731707317073,
890
+ "display": "81.71"
891
+ },
892
+ "history_after_chosun_other": {
893
+ "value": 78.04878048780488,
894
+ "display": "78.05"
895
+ },
896
+ "history_before_chosun": {
897
+ "value": 86.13861386138613,
898
+ "display": "86.14"
899
+ },
900
+ "history_before_chosun_other": {
901
+ "value": 86.13861386138613,
902
+ "display": "86.14"
903
+ },
904
+ "k-sports": {
905
+ "value": 94.31818181818183,
906
+ "display": "94.32"
907
+ },
908
+ "k-sports_other": {
909
+ "value": 94.31818181818183,
910
+ "display": "94.32"
911
+ },
912
+ "kpop": {
913
+ "value": 89.32038834951457,
914
+ "display": "89.32"
915
+ },
916
+ "kpop_other": {
917
+ "value": 90.29126213592234,
918
+ "display": "90.29"
919
+ }
920
+ },
921
+ "SQA": {
922
+ "click": {
923
+ "value": 41.25932062966031,
924
+ "display": "41.26"
925
+ },
926
+ "click_other": {
927
+ "value": 41.01077050538525,
928
+ "display": "41.01"
929
+ },
930
+ "kobest_boolq": {
931
+ "value": 51.788908765652955,
932
+ "display": "51.79"
933
+ },
934
+ "kobest_boolq_other": {
935
+ "value": 51.073345259391765,
936
+ "display": "51.07"
937
+ }
938
+ },
939
+ "Instruct": {
940
+ "alpaca": {
941
+ "value": 72.89855072463769,
942
+ "display": "72.90"
943
+ },
944
+ "alpaca_other": {
945
+ "value": 72.46376811594205,
946
+ "display": "72.46"
947
+ },
948
+ "kudge": {
949
+ "value": 61.9658886894074,
950
+ "display": "61.97"
951
+ },
952
+ "kudge_other": {
953
+ "value": 61.69658886894065,
954
+ "display": "61.70"
955
+ },
956
+ "openhermes": {
957
+ "value": 69.10256410256412,
958
+ "display": "69.10"
959
+ },
960
+ "openhermes_other": {
961
+ "value": 69.61538461538463,
962
+ "display": "69.62"
963
+ },
964
+ "vicuna": {
965
+ "value": 67.78571428571428,
966
+ "display": "67.79"
967
+ },
968
+ "vicuna_other": {
969
+ "value": 69.49999999999997,
970
+ "display": "69.50"
971
+ }
972
+ },
973
+ "ASR": {
974
+ "common_voice_korea": {
975
+ "value": 60.09560229445506,
976
+ "display": "60.10"
977
+ },
978
+ "common_voice_korea_other": {
979
+ "value": 58.98123324396782,
980
+ "display": "58.98"
981
+ },
982
+ "ksponspeech_eval_clean": {
983
+ "value": 62.62463680260875,
984
+ "display": "62.62"
985
+ },
986
+ "ksponspeech_eval_other": {
987
+ "value": 56.042240989512685,
988
+ "display": "56.04"
989
+ },
990
+ "zeroth_korean_test": {
991
+ "value": 40.92181667357943,
992
+ "display": "40.92"
993
+ },
994
+ "zeroth_korean_test_other": {
995
+ "value": 39.06574035669846,
996
+ "display": "39.07"
997
+ }
998
+ },
999
+ "Translation": {
1000
+ "etri_tst-COMMON": {
1001
+ "value": 23.06821405492274,
1002
+ "display": "23.07 / 45.95"
1003
+ },
1004
+ "etri_tst-HE": {
1005
+ "value": 26.078611466879202,
1006
+ "display": "26.08 / 50.16"
1007
+ }
1008
+ },
1009
+ "LSQA": {
1010
+ "mctest": {
1011
+ "value": 88.37920489296636,
1012
+ "display": "88.38"
1013
+ },
1014
+ "mctest_other": {
1015
+ "value": 89.29663608562691,
1016
+ "display": "89.30"
1017
+ }
1018
+ }
1019
+ }
1020
+ }
1021
+ ]
1022
+ }
data/results_real/ASR/gemini_flash/common_voice_korea/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 523,
5
+ "total_cer": 0.13738049713193118,
6
+ "total_edit_distance": 1437,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 1678.7555549144745,
11
+ "timestamp": "2026-03-12T02:49:36.480317"
12
+ }
data/results_real/ASR/gemini_flash/common_voice_korea/prompt_v2/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 523,
5
+ "total_cer": 0.13738049713193118,
6
+ "total_edit_distance": 1437,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 1678.7555549144745,
11
+ "timestamp": "2026-03-12T02:49:36.480317"
12
+ }
data/results_real/ASR/gemini_flash/common_voice_korea_other/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 523,
5
+ "total_cer": 0.2673996175908222,
6
+ "total_edit_distance": 2797,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2389.414547920227,
11
+ "timestamp": "2026-03-12T03:29:28.746633"
12
+ }
data/results_real/ASR/gemini_flash/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 523,
5
+ "total_cer": 0.2673996175908222,
6
+ "total_edit_distance": 2797,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2389.414547920227,
11
+ "timestamp": "2026-03-12T03:29:28.746633"
12
+ }
data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 3000,
5
+ "total_cer": 0.8318526725056962,
6
+ "total_edit_distance": 39795,
7
+ "total_ref_length": 47839,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 14890.70679473877,
11
+ "timestamp": "2026-03-12T07:37:42.309543"
12
+ }
data/results_real/ASR/gemini_flash/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 3000,
5
+ "total_cer": 0.8318526725056962,
6
+ "total_edit_distance": 39795,
7
+ "total_ref_length": 47839,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 14890.70679473877,
11
+ "timestamp": "2026-03-12T07:37:42.309543"
12
+ }
data/results_real/ASR/gemini_flash/ksponspeech_eval_other/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 3000,
5
+ "total_cer": 0.4513922315086276,
6
+ "total_edit_distance": 30947,
7
+ "total_ref_length": 68559,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 14233.098718166351,
11
+ "timestamp": "2026-03-12T11:34:58.652305"
12
+ }
data/results_real/ASR/gemini_flash/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 3000,
5
+ "total_cer": 0.4513922315086276,
6
+ "total_edit_distance": 30947,
7
+ "total_ref_length": 68559,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 14233.098718166351,
11
+ "timestamp": "2026-03-12T11:34:58.652305"
12
+ }
data/results_real/ASR/gemini_flash/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 457,
5
+ "total_cer": 0.13599128992119452,
6
+ "total_edit_distance": 2623,
7
+ "total_ref_length": 19288,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2212.4609863758087,
11
+ "timestamp": "2026-03-12T12:11:54.141376"
12
+ }
data/results_real/ASR/gemini_flash/zeroth_korean_test/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 457,
5
+ "total_cer": 0.13599128992119452,
6
+ "total_edit_distance": 2623,
7
+ "total_ref_length": 19288,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2212.4609863758087,
11
+ "timestamp": "2026-03-12T12:11:54.141376"
12
+ }
data/results_real/ASR/gemini_flash/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 457,
5
+ "total_cer": 0.14558274574865201,
6
+ "total_edit_distance": 2808,
7
+ "total_ref_length": 19288,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2302.8534002304077,
11
+ "timestamp": "2026-03-12T12:50:19.843093"
12
+ }
data/results_real/ASR/gemini_flash/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "gemini-2.5-flash-lite",
4
+ "total_samples": 457,
5
+ "total_cer": 0.14558274574865201,
6
+ "total_edit_distance": 2808,
7
+ "total_ref_length": 19288,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 2302.8534002304077,
11
+ "timestamp": "2026-03-12T12:50:19.843093"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 523,
5
+ "total_cer": 1.4457934990439771,
6
+ "total_edit_distance": 15123,
7
+ "total_ref_length": 10460,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 110.93934440612793,
11
+ "timestamp": "2026-03-06T09:23:46.623712"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v1/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 523,
5
+ "total_cer": 2.8693116634799236,
6
+ "total_edit_distance": 30013,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 307.95989894866943,
11
+ "timestamp": "2026-03-10T19:22:21.797930"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v2/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 523,
5
+ "total_cer": 1.6489483747609943,
6
+ "total_edit_distance": 17248,
7
+ "total_ref_length": 10460,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 134.68821096420288,
11
+ "timestamp": "2026-03-06T09:21:55.660171"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v3/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 523,
5
+ "total_cer": 1.4457934990439771,
6
+ "total_edit_distance": 15123,
7
+ "total_ref_length": 10460,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 110.93934440612793,
11
+ "timestamp": "2026-03-06T09:23:46.623712"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea/prompt_v4/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 523,
5
+ "total_cer": 1.8219885277246655,
6
+ "total_edit_distance": 19058,
7
+ "total_ref_length": 10460,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 147.14897418022156,
11
+ "timestamp": "2026-03-06T09:26:13.796740"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 522,
5
+ "total_cer": 1.7834163155878973,
6
+ "total_edit_distance": 18626,
7
+ "total_ref_length": 10444,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 147.34291195869446,
11
+ "timestamp": "2026-03-06T09:36:14.543997"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v1/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 522,
5
+ "total_cer": 1.945710455764075,
6
+ "total_edit_distance": 20321,
7
+ "total_ref_length": 10444,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 185.2561948299408,
11
+ "timestamp": "2026-03-06T09:30:59.463344"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 522,
5
+ "total_cer": 2.036480275756415,
6
+ "total_edit_distance": 21269,
7
+ "total_ref_length": 10444,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 167.68726301193237,
11
+ "timestamp": "2026-03-06T09:33:47.176678"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v3/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 522,
5
+ "total_cer": 1.7834163155878973,
6
+ "total_edit_distance": 18626,
7
+ "total_ref_length": 10444,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 147.34291195869446,
11
+ "timestamp": "2026-03-06T09:36:14.543997"
12
+ }
data/results_real/ASR/gemma3n_vllm/common_voice_korea_other/prompt_v4/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 522,
5
+ "total_cer": 2.239946380697051,
6
+ "total_edit_distance": 23394,
7
+ "total_ref_length": 10444,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 191.64880776405334,
11
+ "timestamp": "2026-03-06T09:39:26.216829"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.4299420974518697,
6
+ "total_edit_distance": 68407,
7
+ "total_ref_length": 47839,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 570.3824157714844,
11
+ "timestamp": "2026-03-06T10:10:14.044395"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v1/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.4493613996948096,
6
+ "total_edit_distance": 69336,
7
+ "total_ref_length": 47839,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 603.2837119102478,
11
+ "timestamp": "2026-03-06T09:51:00.969666"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v2/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.5473985660235372,
6
+ "total_edit_distance": 74026,
7
+ "total_ref_length": 47839,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 582.5036385059357,
11
+ "timestamp": "2026-03-06T10:00:43.569889"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v3/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.4299420974518697,
6
+ "total_edit_distance": 68407,
7
+ "total_ref_length": 47839,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 570.3824157714844,
11
+ "timestamp": "2026-03-06T10:10:14.044395"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_clean/prompt_v4/ksponspeech_eval_clean_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_clean.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.804092894918372,
6
+ "total_edit_distance": 86306,
7
+ "total_ref_length": 47839,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 712.8236339092255,
11
+ "timestamp": "2026-03-06T10:22:06.959856"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.3076182558088654,
6
+ "total_edit_distance": 89649,
7
+ "total_ref_length": 68559,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 584.8936712741852,
11
+ "timestamp": "2026-03-06T10:55:11.069525"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v1/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.3692585947869718,
6
+ "total_edit_distance": 93875,
7
+ "total_ref_length": 68559,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 658.2554221153259,
11
+ "timestamp": "2026-03-06T10:34:39.983630"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v2/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.463206872912382,
6
+ "total_edit_distance": 100316,
7
+ "total_ref_length": 68559,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 646.0058970451355,
11
+ "timestamp": "2026-03-06T10:45:26.085344"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v3/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.3076182558088654,
6
+ "total_edit_distance": 89649,
7
+ "total_ref_length": 68559,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 584.8936712741852,
11
+ "timestamp": "2026-03-06T10:55:11.069525"
12
+ }
data/results_real/ASR/gemma3n_vllm/ksponspeech_eval_other/prompt_v4/ksponspeech_eval_other_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "ksponspeech_eval_other.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 3000,
5
+ "total_cer": 1.6952114237372191,
6
+ "total_edit_distance": 116222,
7
+ "total_ref_length": 68559,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 814.8545458316803,
11
+ "timestamp": "2026-03-06T11:08:46.021211"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v1/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.2059829946080465,
6
+ "total_edit_distance": 23261,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 121.40821027755737,
11
+ "timestamp": "2026-03-06T11:12:15.746632"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v2/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.150819162173372,
6
+ "total_edit_distance": 22197,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 85.42956852912903,
11
+ "timestamp": "2026-03-06T11:13:41.191670"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v3/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.0788054749066778,
6
+ "total_edit_distance": 20808,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 90.69133472442627,
11
+ "timestamp": "2026-03-06T11:15:11.905767"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/prompt_v4/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.4709664039817503,
6
+ "total_edit_distance": 28372,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 155.58400297164917,
11
+ "timestamp": "2026-03-06T11:17:47.512263"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test/zeroth_korean_test_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.0788054749066778,
6
+ "total_edit_distance": 20808,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 90.69133472442627,
11
+ "timestamp": "2026-03-06T11:15:11.905767"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v1/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.227447117378681,
6
+ "total_edit_distance": 23675,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•˜๊ณ  ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 128.4206943511963,
11
+ "timestamp": "2026-03-06T11:21:24.146182"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v2/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.294068851099129,
6
+ "total_edit_distance": 24960,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 118.1246964931488,
11
+ "timestamp": "2026-03-06T11:23:22.295773"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v3/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.1862297801742017,
6
+ "total_edit_distance": 22880,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 110.6101644039154,
11
+ "timestamp": "2026-03-06T11:25:12.929083"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/prompt_v4/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.6815636665284115,
6
+ "total_edit_distance": 32434,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋งฅ๋ฝ์œผ๋กœ ์ฃผ์–ด์ง„ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๋˜ ๋‹ค๋ฅธ ์„ค๋ช… ์—†์ด ์ „์‚ฌ๋œ ํ•œ๊ตญ์–ด ํ…์ŠคํŠธ๋งŒ ์‘๋‹ตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 198.1166172027588,
11
+ "timestamp": "2026-03-06T11:28:31.068015"
12
+ }
data/results_real/ASR/gemma3n_vllm/zeroth_korean_test_other/zeroth_korean_test_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "zeroth_korean_test_noisy.jsonl",
3
+ "model": "google/gemma-3n-E4B-it",
4
+ "total_samples": 457,
5
+ "total_cer": 1.1862297801742017,
6
+ "total_edit_distance": 22880,
7
+ "total_ref_length": 19288,
8
+ "normalization": "์ˆซ์ž ๋ณ€ํ˜• ํ—ˆ์šฉ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "์ œ๊ณต๋œ ์Œ์„ฑ์˜ ๋‚ด์šฉ์„ ํ•œ๊ตญ์–ด๋กœ ์ •ํ™•ํ•˜๊ฒŒ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์ „์‚ฌ๋œ ๋ฌธ์žฅ๋งŒ ์ถœ๋ ฅํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 110.6101644039154,
11
+ "timestamp": "2026-03-06T11:25:12.929083"
12
+ }
data/results_real/ASR/gpt_realtime_mini/common_voice_korea/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "gpt-audio-mini",
4
+ "total_samples": 523,
5
+ "total_cer": 0.33049713193116637,
6
+ "total_edit_distance": 3457,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 480.61955213546753,
11
+ "timestamp": "2026-03-11T18:28:40.586049"
12
+ }
data/results_real/ASR/gpt_realtime_mini/common_voice_korea/prompt_v2/common_voice_korea_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea.jsonl",
3
+ "model": "gpt-audio-mini",
4
+ "total_samples": 523,
5
+ "total_cer": 0.33049713193116637,
6
+ "total_edit_distance": 3457,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 480.61955213546753,
11
+ "timestamp": "2026-03-11T18:28:40.586049"
12
+ }
data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "gpt-audio-mini",
4
+ "total_samples": 523,
5
+ "total_cer": 0.3621414913957935,
6
+ "total_edit_distance": 3788,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 486.3854024410248,
11
+ "timestamp": "2026-03-11T18:36:47.342577"
12
+ }
data/results_real/ASR/gpt_realtime_mini/common_voice_korea_other/prompt_v2/common_voice_korea_noisy_summary.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": "common_voice_korea_noisy.jsonl",
3
+ "model": "gpt-audio-mini",
4
+ "total_samples": 523,
5
+ "total_cer": 0.3621414913957935,
6
+ "total_edit_distance": 3788,
7
+ "total_ref_length": 10460,
8
+ "normalization": "๊ตฌ๋‘์  ์ œ๊ฑฐ + ๊ณต๋ฐฑ ์ œ๊ฑฐ",
9
+ "prompt": "๋‹ค์Œ ์Œ์„ฑ์„ ํ•œ๊ตญ์–ด๋กœ ์ „์‚ฌํ•ด ์ฃผ์‹œ๊ณ  ์„ค๋ช…์ด๋‚˜ ์ถ”๊ฐ€ ๋ฌธ์žฅ ์—†์ด ์ „์‚ฌ ๊ฒฐ๊ณผ๋งŒ ์ œ๊ณตํ•ด ์ฃผ์„ธ์š”.",
10
+ "elapsed_time_seconds": 486.3854024410248,
11
+ "timestamp": "2026-03-11T18:36:47.342577"
12
+ }