Harheem Kim commited on
Commit
87c12c8
ยท
2 Parent(s): db58c35 4bd143f

en_version & kr_version

Browse files
app.py CHANGED
@@ -2,9 +2,10 @@ import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
  import gradio as gr
5
- from tabs.leaderboard_v1 import create_leaderboard_v2_interface
 
6
 
7
- # ๋‹คํฌ ๋ชจ๋“œ์—์„œ ํ”„๋กœ์ฆˆ/๋งˆํฌ๋‹ค์šด ํ…์ŠคํŠธ๋ฅผ ํ™•์‹คํžˆ ๋ฐ๊ฒŒ ๊ณ ์ •ํ•˜๋Š” CSS ๋ณด์ •
8
  FIX_DARK_TEXT_CSS = """
9
  html.dark .gr-prose,
10
  html.dark .gr-prose p,
@@ -17,14 +18,36 @@ html.dark .gr-markdown * {
17
  """
18
 
19
  def create_app():
20
- # ๊ถŒ์žฅ: ๋ช…์‹œ์ ์ธ ํ…Œ๋งˆ ๊ฐ์ฒด ์‚ฌ์šฉ (Default, Soft, Origin ๋“ฑ)
21
- theme = gr.themes.Default() # ํ•„์š” ์‹œ gr.themes.Origin() ๋“ฑ์œผ๋กœ ๋ณ€๊ฒฝ
22
 
23
  with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
24
- create_leaderboard_v2_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  return app
26
 
27
- demo = create_app()
28
 
29
- # Spaces/Gradio5์—์„œ SSR์ด ๊ผฌ์ด๋ฉด ์ผ๋‹จ ๊บผ์„œ ํ™•์ธ
30
- demo.launch(ssr_mode=False)
 
2
  warnings.filterwarnings("ignore")
3
 
4
  import gradio as gr
5
+ from tabs.leaderboard_v1_kr import create_leaderboard_v2_interface as leaderboard_kr
6
+ from tabs.leaderboard_v1_en import create_leaderboard_v2_interface as leaderboard_en
7
 
8
+ # ๋‹คํฌ ๋ชจ๋“œ ํ…์ŠคํŠธ ์ƒ‰์ƒ ๋ณด์ •
9
  FIX_DARK_TEXT_CSS = """
10
  html.dark .gr-prose,
11
  html.dark .gr-prose p,
 
18
  """
19
 
20
  def create_app():
21
+ theme = gr.themes.Default()
 
22
 
23
  with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
24
+ # ๐Ÿ”น ์™ผ์ชฝ ์ƒ๋‹จ์— ์–ธ์–ด ์ „ํ™˜ ๋ฒ„ํŠผ
25
+ with gr.Row():
26
+ lang_btn = gr.Button("๐ŸŒ English", scale=0, elem_id="lang-toggle-btn")
27
+
28
+ # ๐Ÿ”น ๊ธฐ๋ณธ์€ ํ•œ๊ตญ์–ด UI
29
+ with gr.Column(visible=True) as kr_view:
30
+ leaderboard_kr()
31
+
32
+ # ๐Ÿ”น ์˜์–ด UI๋Š” ์ˆจ๊น€
33
+ with gr.Column(visible=False) as en_view:
34
+ leaderboard_en()
35
+
36
+ # ๐Ÿ”น ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ํ† ๊ธ€
37
+ def toggle_language(current_label):
38
+ if "English" in current_label:
39
+ return "๐Ÿ‡ฐ๐Ÿ‡ท Korean", gr.update(visible=False), gr.update(visible=True)
40
+ else:
41
+ return "๐ŸŒ English", gr.update(visible=True), gr.update(visible=False)
42
+
43
+ lang_btn.click(
44
+ toggle_language,
45
+ inputs=[lang_btn],
46
+ outputs=[lang_btn, kr_view, en_view],
47
+ )
48
+
49
  return app
50
 
 
51
 
52
+ demo = create_app()
53
+ demo.launch(ssr_mode=False)
tabs/leaderboard_v1_en.py ADDED
The diff for this file is too large to render. See raw diff
 
tabs/{leaderboard_v1.py โ†’ leaderboard_v1_kr.py} RENAMED
@@ -1103,7 +1103,7 @@ def create_leaderboard_v2_tab():
1103
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1104
  <line x1="8" y1="12" x2="16" y2="12"/>
1105
  </svg>
1106
- <span>Blog</span>
1107
  </a>
1108
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1109
  <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
@@ -1118,7 +1118,7 @@ def create_leaderboard_v2_tab():
1118
  <polyline points="7 10 12 15 17 10"/>
1119
  <line x1="12" y1="15" x2="12" y2="3"/>
1120
  </svg>
1121
- <span>Dataset</span>
1122
  </a>
1123
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1124
  <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
@@ -1127,7 +1127,7 @@ def create_leaderboard_v2_tab():
1127
  <path d="M12 17V7"/>
1128
  <path d="M17 17v-3"/>
1129
  </svg>
1130
- <span>Metrics</span>
1131
  </a>
1132
  </div>
1133
  """)
@@ -1141,7 +1141,7 @@ def create_leaderboard_v2_tab():
1141
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋‹จ์ˆœ ๋„๊ตฌ ํ˜ธ์ถœ๋ถ€ํ„ฐ ์žฅ๊ธฐ์  ๋งฅ๋ฝ ๋Šฅ๋ ฅ, ๊ฐ•๊ฑด์„ฑ ์ฒ˜๋ฆฌ ๋Šฅ๋ ฅ๊นŒ์ง€ ์—์ด์ „ํŠธ์˜ ๋Šฅ๋ ฅ์„ 7๋‹จ๊ณ„๋กœ ์ž…์ฒด์ ์œผ๋กœ ๋ถ„์„ํ•˜์˜€์Šต๋‹ˆ๋‹ค.</p>
1142
  <div class="phase-grid">
1143
  <div class="phase-card">
1144
- <h3>Single-Turn</h3>
1145
  <div class="phase-chart" style="--progress:80%;">
1146
  <span>80%</span>
1147
  </div>
@@ -1154,7 +1154,7 @@ def create_leaderboard_v2_tab():
1154
  </ul>
1155
  </div>
1156
  <div class="phase-card">
1157
- <h3>Multi-Turn</h3>
1158
  <div class="phase-chart" style="--progress:20%;">
1159
  <span>20%</span>
1160
  </div>
@@ -1944,8 +1944,8 @@ def create_leaderboard_v2_tab():
1944
  with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
1945
  gr.HTML("""
1946
  <div class="domain-header">
1947
- <h2 class="domain-title" style="color: white;">๐Ÿง  Select Task Level</h2>
1948
- <p class="domain-subtitle" style="color: white;">Ko-AgentBench์˜ ALL ยท L1~L7 ๋‹จ๊ณ„๋ณ„ ์—์ด์ „ํŠธ ์„ฑ๋Šฅ์„ ์†์‰ฝ๊ฒŒ ๋น„๊ตํ•˜์„ธ์š”.</p>
1949
  </div>
1950
  """)
1951
  domain_filter = gr.Radio(
@@ -1961,14 +1961,14 @@ def create_leaderboard_v2_tab():
1961
  with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
1962
  gr.HTML("""
1963
  <div class="domain-header">
1964
- <h2 class="domain-title" style="color: white;">๐Ÿ” Filters & Sorting</h2>
1965
  <p class="domain-subtitle" style="color: white;">๋ชจ๋ธ ์ ‘๊ทผ ๋ฐฉ์‹๊ณผ ์ •๋ ฌ ์ˆœ์„œ๋ฅผ ์„ ํƒํ•ด ๋งž์ถค ๋ทฐ๋ฅผ ๊ตฌ์„ฑํ•˜์„ธ์š”.</p>
1966
  </div>
1967
  """)
1968
  with gr.Row(elem_classes=["filters-sorting-row"]):
1969
  with gr.Column(scale=1, elem_classes=["filter-group"]):
1970
  with gr.Row(elem_classes=["filter-group-row"]):
1971
- gr.HTML("<span class='filter-group-label' style='color: white;'>Model Access</span>")
1972
  model_type_filter = gr.Radio(
1973
  choices=["All", "OSS", "API"],
1974
  value="All",
@@ -1978,7 +1978,7 @@ def create_leaderboard_v2_tab():
1978
  )
1979
  with gr.Column(scale=1, elem_classes=["filter-group"]):
1980
  with gr.Row(elem_classes=["filter-group-row"]):
1981
- gr.HTML("<span class='filter-group-label' style='color: white;'>Sort Order</span>")
1982
  sort_order = gr.Radio(
1983
  choices=["Descending", "Ascending"],
1984
  value="Descending",
@@ -2000,16 +2000,16 @@ def create_leaderboard_v2_tab():
2000
  gr.HTML("""
2001
  <div class="domain-selector-container domain-performance-container">
2002
  <div class="domain-header">
2003
- <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
2004
- <p class="domain-subtitle" style="color: white;">Track six essential pillars: Success, Execution, Reasoning, Robustness, Efficiency, and Call Validity.</p>
2005
  </div>
2006
  """)
2007
 
2008
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
2009
  gr.HTML("""
2010
  <div class="domain-header">
2011
- <h2 class="domain-title" style="color: white;">๐ŸŽฏ Select Models for Comparison</h2>
2012
- <p class="domain-subtitle" style="color: white;">Choose up to 5 models to map on the capability radar.</p>
2013
  </div>
2014
  """)
2015
  model_selector = gr.Dropdown(
@@ -2040,16 +2040,16 @@ def create_leaderboard_v2_tab():
2040
  gr.HTML("""
2041
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2042
  <div class="domain-header">
2043
- <h2 class="domain-title" style="color: white;">Level-Specific Metric Spotlight</h2>
2044
- <p class="domain-subtitle" style="color: white;">Dive deeper into each Ko-AgentBench stage and compare model scores across its unique evaluation metrics.</p>
2045
  </div>
2046
  """)
2047
 
2048
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2049
  gr.HTML("""
2050
  <div class="domain-header">
2051
- <h2 class="domain-title" style="color: white;">๐Ÿงญ Select Task Level and Models</h2>
2052
- <p class="domain-subtitle" style="color: white;">Choose a level and up to 5 models to explore their detailed SR-driven metrics.</p>
2053
  </div>
2054
  """)
2055
  level_metric_selector = gr.Dropdown(
@@ -2086,8 +2086,8 @@ def create_leaderboard_v2_tab():
2086
  gr.HTML("""
2087
  <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2088
  <div class="domain-header">
2089
- <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
2090
- <p class="domain-subtitle" style="color: white;">View Ko-AgentBench SR scores across L1~L7 for each model in a single glance.</p>
2091
  </div>
2092
  <div class="chart-container heatmap-chart-container">
2093
  """)
@@ -2452,8 +2452,8 @@ def create_leaderboard_v2_tab():
2452
  gr.HTML("""
2453
  <div class="domain-selector-container performance-card-container">
2454
  <div class="domain-header">
2455
- <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
2456
- <p class="domain-subtitle" style="color: white;">Comprehensive performance card for any model - perfect for presentations and reports</p>
2457
  </div>
2458
  <div class="performance-card-content">
2459
  """)
@@ -3182,7 +3182,17 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3182
  height=800,
3183
  width=900,
3184
  margin=dict(t=30, b=50, l=10, r=10),
3185
- autosize=True
 
 
 
 
 
 
 
 
 
 
3186
  )
3187
 
3188
  return fig
 
1103
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1104
  <line x1="8" y1="12" x2="16" y2="12"/>
1105
  </svg>
1106
+ <span>๋ธ”๋กœ๊ทธ</span>
1107
  </a>
1108
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1109
  <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
 
1118
  <polyline points="7 10 12 15 17 10"/>
1119
  <line x1="12" y1="15" x2="12" y2="3"/>
1120
  </svg>
1121
+ <span>๋ฐ์ดํ„ฐ์…‹</span>
1122
  </a>
1123
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1124
  <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
 
1127
  <path d="M12 17V7"/>
1128
  <path d="M17 17v-3"/>
1129
  </svg>
1130
+ <span>ํ‰๊ฐ€ ์ง€ํ‘œ</span>
1131
  </a>
1132
  </div>
1133
  """)
 
1141
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋‹จ์ˆœ ๋„๊ตฌ ํ˜ธ์ถœ๋ถ€ํ„ฐ ์žฅ๊ธฐ์  ๋งฅ๋ฝ ๋Šฅ๋ ฅ, ๊ฐ•๊ฑด์„ฑ ์ฒ˜๋ฆฌ ๋Šฅ๋ ฅ๊นŒ์ง€ ์—์ด์ „ํŠธ์˜ ๋Šฅ๋ ฅ์„ 7๋‹จ๊ณ„๋กœ ์ž…์ฒด์ ์œผ๋กœ ๋ถ„์„ํ•˜์˜€์Šต๋‹ˆ๋‹ค.</p>
1142
  <div class="phase-grid">
1143
  <div class="phase-card">
1144
+ <h3>๋‹จ์ผ ํ„ด</h3>
1145
  <div class="phase-chart" style="--progress:80%;">
1146
  <span>80%</span>
1147
  </div>
 
1154
  </ul>
1155
  </div>
1156
  <div class="phase-card">
1157
+ <h3>๋‹ค์ค‘ ํ„ด</h3>
1158
  <div class="phase-chart" style="--progress:20%;">
1159
  <span>20%</span>
1160
  </div>
 
1944
  with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
1945
  gr.HTML("""
1946
  <div class="domain-header">
1947
+ <h2 class="domain-title" style="color: white;">๐Ÿง  ํƒœ์Šคํฌ ๋ ˆ๋ฒจ ์„ ํƒ</h2>
1948
+ <p class="domain-subtitle" style="color: white;">Ko-AgentBench์˜ ALL ยท ๋ชจ๋ธ๋ณ„ ์—์ด์ „ํŠธ ์ˆ˜ํ–‰ ๋Šฅ๋ ฅ์„ ๋‹จ๊ณ„๋ณ„๋กœ ์ธก์ •ํ•ด๋ณด์„ธ์š”.</p>
1949
  </div>
1950
  """)
1951
  domain_filter = gr.Radio(
 
1961
  with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
1962
  gr.HTML("""
1963
  <div class="domain-header">
1964
+ <h2 class="domain-title" style="color: white;">๐Ÿ” ํ•„ํ„ฐ ๋ฐ ์ •๋ ฌ</h2>
1965
  <p class="domain-subtitle" style="color: white;">๋ชจ๋ธ ์ ‘๊ทผ ๋ฐฉ์‹๊ณผ ์ •๋ ฌ ์ˆœ์„œ๋ฅผ ์„ ํƒํ•ด ๋งž์ถค ๋ทฐ๋ฅผ ๊ตฌ์„ฑํ•˜์„ธ์š”.</p>
1966
  </div>
1967
  """)
1968
  with gr.Row(elem_classes=["filters-sorting-row"]):
1969
  with gr.Column(scale=1, elem_classes=["filter-group"]):
1970
  with gr.Row(elem_classes=["filter-group-row"]):
1971
+ gr.HTML("<span class='filter-group-label' style='color: white;'>๋ชจ๋ธ ์ ‘๊ทผ</span>")
1972
  model_type_filter = gr.Radio(
1973
  choices=["All", "OSS", "API"],
1974
  value="All",
 
1978
  )
1979
  with gr.Column(scale=1, elem_classes=["filter-group"]):
1980
  with gr.Row(elem_classes=["filter-group-row"]):
1981
+ gr.HTML("<span class='filter-group-label' style='color: white;'>์ •๋ ฌ ์ˆœ์„œ</span>")
1982
  sort_order = gr.Radio(
1983
  choices=["Descending", "Ascending"],
1984
  value="Descending",
 
2000
  gr.HTML("""
2001
  <div class="domain-selector-container domain-performance-container">
2002
  <div class="domain-header">
2003
+ <h2 class="domain-title" style="color: white;">ํ•ต์‹ฌ ์—ญ๋Ÿ‰ ๋ ˆ์ด๋”</h2>
2004
+ <p class="domain-subtitle" style="color: white;">6๊ฐ€์ง€ ํ•„์ˆ˜ ํ•ต์‹ฌ ์š”์†Œ(์„ฑ๊ณต, ์‹คํ–‰, ์ถ”๋ก , ๊ฐ•๊ฑด์„ฑ, ํšจ์œจ์„ฑ, ํ˜ธ์ถœ ์œ ํšจ์„ฑ)๋ฅผ ์ถ”์ ํ•ฉ๋‹ˆ๋‹ค.</p>
2005
  </div>
2006
  """)
2007
 
2008
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
2009
  gr.HTML("""
2010
  <div class="domain-header">
2011
+ <h2 class="domain-title" style="color: white;">๐ŸŽฏ ๋น„๊ตํ•  ๋ชจ๋ธ ์„ ํƒ</h2>
2012
+ <p class="domain-subtitle" style="color: white;">์—ญ๋Ÿ‰ ๋ ˆ์ด๋”์— ํ‘œ์‹œํ•  ๋ชจ๋ธ์„ ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€ ์„ ํƒํ•˜์„ธ์š”.</p>
2013
  </div>
2014
  """)
2015
  model_selector = gr.Dropdown(
 
2040
  gr.HTML("""
2041
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2042
  <div class="domain-header">
2043
+ <h2 class="domain-title" style="color: white;">๋ ˆ๋ฒจ๋ณ„ ์ƒ์„ธ ์ง€ํ‘œ</h2>
2044
+ <p class="domain-subtitle" style="color: white;">๊ฐ Ko-AgentBench ๋‹จ๊ณ„๋ณ„ ๊ณ ์œ  ํ‰๊ฐ€ ์ง€ํ‘œ๋ฅผ ํ†ตํ•ด ๋ชจ๋ธ ์ ์ˆ˜๋ฅผ ๋น„๊ตํ•˜๊ณ  ๋” ์ž์„ธํžˆ ์‚ดํŽด๋ณด์„ธ์š”.</p>
2045
  </div>
2046
  """)
2047
 
2048
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2049
  gr.HTML("""
2050
  <div class="domain-header">
2051
+ <h2 class="domain-title" style="color: white;">๐Ÿงญ ํƒœ์Šคํฌ ๋ ˆ๋ฒจ ๋ฐ ๋ชจ๋ธ ์„ ํƒ</h2>
2052
+ <p class="domain-subtitle" style="color: white;">๋ ˆ๋ฒจ๊ณผ ๋ชจ๋ธ(์ตœ๋Œ€ 5๊ฐœ)์„ ์„ ํƒํ•˜์—ฌ ์ƒ์„ธ SR(์„ฑ๊ณต๋ฅ ) ๊ธฐ๋ฐ˜ ์ง€ํ‘œ๋ฅผ ํƒ์ƒ‰ํ•˜์„ธ์š”.</p>
2053
  </div>
2054
  """)
2055
  level_metric_selector = gr.Dropdown(
 
2086
  gr.HTML("""
2087
  <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2088
  <div class="domain-header">
2089
+ <h2 class="domain-title" style="color: white;">์ข…ํ•ฉ ์„ฑ๋Šฅ ํžˆํŠธ๋งต</h2>
2090
+ <p class="domain-subtitle" style="color: white;">๊ฐ ๋ชจ๋ธ์˜ L1~L7 Ko-AgentBench SR(์„ฑ๊ณต๋ฅ ) ์ ์ˆ˜๋ฅผ ํ•œ๋ˆˆ์— ๋ณด์„ธ์š”.</p>
2091
  </div>
2092
  <div class="chart-container heatmap-chart-container">
2093
  """)
 
2452
  gr.HTML("""
2453
  <div class="domain-selector-container performance-card-container">
2454
  <div class="domain-header">
2455
+ <h2 class="domain-title" style="color: white;">๋ชจ๋ธ ์„ฑ๋Šฅ ์นด๋“œ</h2>
2456
+ <p class="domain-subtitle" style="color: white;">๋ชจ๋“  ๋ชจ๋ธ์˜ ์ข…ํ•ฉ ์„ฑ๋Šฅ ์นด๋“œ๋กœ, ๋ฐœํ‘œ ๋ฐ ๋ณด๊ณ ์„œ์— ์ ํ•ฉํ•ฉ๋‹ˆ๋‹ค.</p>
2457
  </div>
2458
  <div class="performance-card-content">
2459
  """)
 
3182
  height=800,
3183
  width=900,
3184
  margin=dict(t=30, b=50, l=10, r=10),
3185
+ autosize=True,
3186
+ annotations=[
3187
+ dict(
3188
+ text="Galileo Agent Leaderboard",
3189
+ xref="paper", yref="paper",
3190
+ x=0.98, y=0.02,
3191
+ xanchor='right', yanchor='bottom',
3192
+ font=dict(size=10, color='#64748B'),
3193
+ showarrow=False
3194
+ )
3195
+ ]
3196
  )
3197
 
3198
  return fig