Spaces:

huggingface-KREW
/

Ko-AgentBench

Running on CPU Upgrade

App Files Files Community

en-version

by harheem - opened Oct 28, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+549

-5272

Files changed (9) hide show

.DS_Store +0 -0
app.py +8 -31
banner_wide.png +0 -3
combined_evaluation_summary.csv +7 -16
components/leaderboard_components.py +10 -13
styles/leaderboard_styles.py +30 -205
tabs/{leaderboard_v1_kr.py → leaderboard_v1.py} +484 -690
tabs/leaderboard_v1_en.py +0 -0
utils.py +10 -13

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -2,10 +2,9 @@ import warnings
 warnings.filterwarnings("ignore")
 import gradio as gr
-from tabs.leaderboard_v1_kr import create_leaderboard_v2_interface as leaderboard_kr
-from tabs.leaderboard_v1_en import create_leaderboard_v2_interface as leaderboard_en
-# 다크 모드 텍스트 색상 보정
 FIX_DARK_TEXT_CSS = """
 html.dark .gr-prose,
 html.dark .gr-prose p,
@@ -18,36 +17,14 @@ html.dark .gr-markdown * {
 """
 def create_app():
-    theme = gr.themes.Default()
     with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
-        # 🔹 왼쪽 상단에 언어 전환 버튼
-        with gr.Row():
-            lang_btn = gr.Button("🌍 English", scale=0, elem_id="lang-toggle-btn")
-        # 🔹 기본은 한국어 UI
-        with gr.Column(visible=True) as kr_view:
-            leaderboard_kr()
-        # 🔹 영어 UI는 숨김
-        with gr.Column(visible=False) as en_view:
-            leaderboard_en()
-        # 🔹 버튼 클릭 시 토글
-        def toggle_language(current_label):
-            if "English" in current_label:
-                return "🇰🇷 Korean", gr.update(visible=False), gr.update(visible=True)
-            else:
-                return "🌍 English", gr.update(visible=True), gr.update(visible=False)
-        lang_btn.click(
-            toggle_language,
-            inputs=[lang_btn],
-            outputs=[lang_btn, kr_view, en_view],
-        )
     return app
 demo = create_app()
-demo.launch(ssr_mode=False)

 warnings.filterwarnings("ignore")
 import gradio as gr
+from tabs.leaderboard_v1 import create_leaderboard_v2_interface
+# 다크 모드에서 프로즈/마크다운 텍스트를 확실히 밝게 고정하는 CSS 보정
 FIX_DARK_TEXT_CSS = """
 html.dark .gr-prose,
 html.dark .gr-prose p,
 """
 def create_app():
+    # 권장: 명시적인 테마 객체 사용 (Default, Soft, Origin 등)
+    theme = gr.themes.Default()  # 필요 시 gr.themes.Origin() 등으로 변경
     with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
+        create_leaderboard_v2_interface()
     return app
 demo = create_app()
+# Spaces/Gradio5에서 SSR이 꼬이면 일단 꺼서 확인
+demo.launch(ssr_mode=False)

banner_wide.png DELETED Viewed

Git LFS Details

SHA256: 66fa5541384dde4eac497d3aa9fbcfeccbb44cc7aa1e251acb200adbddf914a1
Pointer size: 131 Bytes
Size of remote file: 347 kB

combined_evaluation_summary.csv CHANGED Viewed

@@ -1,16 +1,7 @@
-Model,Vendor,Model Type,L1_Total_Tasks,L2_Total_Tasks,L3_Total_Tasks,L4_Total_Tasks,L5_Total_Tasks,L6_Total_Tasks,L7_Total_Tasks,L1_Evaluated_Tasks,L2_Evaluated_Tasks,L3_Evaluated_Tasks,L4_Evaluated_Tasks,L5_Evaluated_Tasks,L6_Evaluated_Tasks,L7_Evaluated_Tasks,L1_Avg_Exec_Time,L2_Avg_Exec_Time,L3_Avg_Exec_Time,L4_Avg_Exec_Time,L5_Avg_Exec_Time,L6_Avg_Exec_Time,L7_Avg_Exec_Time,L1_Avg_Tokens,L2_Avg_Tokens,L3_Avg_Tokens,L4_Avg_Tokens,L5_Avg_Tokens,L6_Avg_Tokens,L7_Avg_Tokens,L1_Avg_TPS,L2_Avg_TPS,L3_Avg_TPS,L4_Avg_TPS,L5_Avg_TPS,L6_Avg_TPS,L7_Avg_TPS,L1_Avg_TTFT,L2_Avg_TTFT,L3_Avg_TTFT,L4_Avg_TTFT,L5_Avg_TTFT,L6_Avg_TTFT,L7_Avg_TTFT,L1_RRR,L2_RRR,L3_RRR,L4_RRR,L5_RRR,L6_RRR,L7_RRR,L1_SR,L2_SR,L3_SR,L4_SR,L5_SR,L6_SR,L7_SR,L1_EPR_CVR,L2_EPR_CVR,L3_EPR_CVR,L4_EPR_CVR,L5_EPR_CVR,L6_EPR_CVR,L7_EPR_CVR,L1_pass@k,L2_pass@k,L3_pass@k,L4_pass@k,L5_pass@k,L6_pass@k,L7_pass@k,L1_TooAcc,L1_ArgAcc,L1_CallEM,L1_RespOK,L2_SelectAcc,L3_FSM,L3_PSM,L3_ΔSteps_norm,L4_Coverage,L4_SourceEPR,L5_AdaptiveRoutingScore,L5_FallbackSR,L6_RedundantCallRate,L6_EffScore,L7_ContextRetention,L7_RefRecall
-nova-2-lite,Amazon,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,8.93,5.72,9.82,18.44,9.28,1.54,2.34,3327.64,5633.33,16431.2,23542.0,8650.3,2454.87,3767.7,372.65,984.32,1672.39,1276.67,932.02,1590.31,1612.51,4.2313,2.2447,2.0483,4.5429,1.8541,1.5429,2.3359,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.9,0.55,0.8,0.9,1.0,1.0,0.9417,1.0,0.2542,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7955,0.4545,1.0,1.0,0.5,1.0,0.45,0.65,0.65,0.2625,0.65,1.0,0.0,0.95,0.85
-gpt-4o,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.42,7.28,12.74,19.25,8.37,4.45,3.68,3302.0,2871.0,11588.0,16022.6,3909.25,1564.2,4044.4,609.02,394.63,909.92,832.37,467.04,351.32,1098.86,1.5767,3.2437,3.3023,5.9534,1.5256,4.452,2.9725,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.8667,0.5,1.0,0.8667,1.0,1.0,0.1833,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5909,0.2727,1.0,1.0,0.4,1.0,0.35,0.7167,0.7167,0.2583,0.6,1.0,0.0,0.95,0.95
-DeepSeek-V3.1,DeepSeek,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,3.53,10.56,18.4,28.31,13.21,7.63,3.25,1469.73,10547.33,23309.7,42090.4,9212.0,4614.6,4392.9,416.14,998.63,1266.84,1486.94,697.45,604.79,1351.85,1.8044,3.7647,4.442,6.5445,2.0181,5.3715,2.6493,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9333,0.9,1.0,0.65,0.8667,0.5,0.8182,0.8667,0.74,0.775,0.2117,0.2,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8182,0.5455,0.2727,1.0,0.8667,0.3,0.8,0.3,0.6667,0.575,0.2133,0.7,1.0,0.1667,1.0,0.975
-gemini-2.5-flash,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,4.3,4.88,7.75,11.4,6.07,2.63,4.14,1733.73,4466.67,3951.7,12934.7,2054.0,3562.87,4716.3,402.93,914.6,509.6,1135.09,338.22,1353.71,1137.94,1.975,1.9409,3.4497,3.5025,1.7375,1.8039,2.5157,1.0,1.0,1.0,1.0,1.0,1.0,0.7,0.9091,1.0,0.7,0.6,0.25,0.7333,0.3,0.9091,1.0,0.6,0.8,0.1,0.3333,0.3,1.0,1.0,1.0,1.0,1.0,1.0,0.7,0.9091,0.6136,0.1818,1.0,1.0,0.5,0.55,0.35,0.35,0.35,0.1,0.2,1.0,0.3333,0.9,0.875
-glm-4.6v,Z.ai,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,10.08,15.04,27.21,34.65,37.16,11.9,8.41,2465.09,8454.93,15996.0,35309.7,16376.0,2716.53,4826.2,244.61,562.21,587.98,1019.06,440.71,228.19,574.12,4.8704,5.7804,7.0628,8.554,5.1927,10.9267,6.6557,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,1.0,1.0,1.0,0.65,0.9333,0.6,1.0,1.0,1.0,1.0,0.3083,0.0667,0.3,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,0.6818,0.3636,1.0,1.0,0.6,0.9667,0.6,0.5667,0.5667,0.2601,0.75,1.0,0.0667,0.9,0.8
-grok-4.1-fast,xAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,11.99,17.15,27.66,44.02,39.33,12.44,17.68,3710.64,6381.8,15081.5,13744.4,16053.9,2901.0,5535.2,309.6,372.15,545.25,312.25,408.21,233.21,313.08,5.7846,6.5545,8.836,12.3512,6.6412,11.301,14.0295,1.0,1.0,1.0,0.8,1.0,1.0,1.0,0.9091,1.0,1.0,0.8,0.8,0.9333,0.8,1.0,1.0,1.0,0.8,0.3797,0.0667,0.4,1.0,1.0,1.0,0.8,1.0,1.0,1.0,1.0,0.75,0.5455,1.0,1.0,0.6,1.0,0.5667,0.5667,0.5667,0.3475,0.95,1.0,0.0667,0.975,0.85
-claude-haiku-4-5,Anthropic,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.18,9.9,14.65,21.61,18.33,3.69,4.22,4504.64,11367.93,23333.9,42628.5,13977.65,2732.53,7153.3,869.59,1148.23,1593.07,1972.65,762.46,741.38,1697.01,2.4328,3.2797,4.1784,5.2912,2.2585,3.6851,3.3065,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,1.0,1.0,0.9,0.65,0.8,0.7,1.0,1.0,1.0,1.0,0.2358,0.0,0.3,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,0.6136,0.2727,1.0,1.0,0.6,1.0,0.5,0.75,0.7389,0.2283,0.75,1.0,0.0,1.0,0.925
-gemini-2.5-flash-lite,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,1.62,2.83,1.55,5.72,3.74,1.66,2.97,1930.09,3337.87,5892.0,15236.2,1795.9,1572.73,2577.8,1188.63,1179.12,3797.73,2664.96,480.67,944.86,868.65,0.6444,0.9106,0.6729,1.1369,0.5226,0.7943,0.6945,1.0,1.0,1.0,1.0,0.9,1.0,0.4,1.0,0.8667,0.2,0.7,0.25,0.6,0.4,1.0,0.8667,0.275,0.6,0.1167,0.2,0.2,1.0,1.0,1.0,1.0,0.9,1.0,0.4,1.0,0.6364,0.2727,1.0,0.8667,0.1,0.2,0.1,0.35,0.35,0.125,0.25,1.0,0.1333,0.975,0.825
-claude-sonnet-4-5,Anthropic,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,6.77,11.69,19.86,34.08,19.1,5.45,7.18,3215.09,5874.0,19958.4,60071.8,10702.45,2710.47,10297.8,474.96,502.51,1004.85,1762.73,560.27,497.52,1434.99,3.1551,5.243,5.9522,8.9693,3.4574,5.4468,4.6806,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.55,0.8,0.6,1.0,1.0,1.0,1.0,0.1742,0.0,0.4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6591,0.2727,1.0,1.0,0.9,1.0,0.75,0.75,0.75,0.1892,0.6,1.0,0.0,1.0,0.975
-gpt-4o-mini,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,2.79,5.61,8.13,25.46,7.19,2.63,2.9,1389.55,4236.13,11772.4,11700.1,5203.7,1561.93,3940.3,498.7,755.34,1448.9,459.62,724.0,594.06,1357.18,1.2394,1.9904,2.5526,9.1994,0.9279,2.6286,2.1975,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,1.0,1.0,1.0,0.6,0.6667,0.5,1.0,0.8667,1.0,1.0,0.1946,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6591,0.2727,1.0,1.0,0.5,0.9167,0.5,0.5833,0.5833,0.2171,0.75,1.0,0.0,0.925,0.975
-gpt-5,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.64,11.23,14.48,24.59,19.64,9.31,10.15,2306.18,16867.2,19321.9,29718.7,10773.2,6753.07,9451.3,409.06,1501.34,1334.6,1208.62,548.57,725.02,931.01,2.4414,3.442,5.8573,7.5822,3.1615,5.978,5.431,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,0.9333,1.0,0.9,0.85,0.8667,0.8,1.0,1.0,0.7,0.7,0.2728,0.2,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7273,0.3636,1.0,1.0,0.1,0.5667,0.4,0.55,0.5333,0.3,0.85,1.0,0.1444,1.0,0.975
-qwen3-next-80b-a3b,Alibaba,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,4.13,12.63,17.18,28.84,10.59,9.59,7.92,1937.82,4725.0,15345.8,22067.0,6512.1,2198.27,5761.5,469.0,374.15,893.49,765.08,615.2,229.2,727.4,1.907,5.8972,5.5666,10.0412,1.985,9.5896,5.561,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6,0.9333,0.7,1.0,1.0,1.0,1.0,0.2375,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7727,0.4545,1.0,1.0,0.8,1.0,0.65,0.7,0.7,0.2542,0.7,1.0,0.0,0.975,0.95
-gpt-5-mini,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,7.14,7.36,12.37,13.11,11.67,7.22,8.02,2963.73,4288.47,9704.4,8528.4,3510.45,2465.07,5810.8,414.91,582.29,784.64,650.71,300.9,341.21,724.39,3.4248,3.2995,5.2383,6.41,2.7195,6.5991,6.5065,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,0.9333,0.9,0.8,0.2,0.8667,1.0,1.0,0.8667,0.6,0.6,0.0917,0.0667,0.3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7045,0.3636,1.0,1.0,0.3,0.55,0.25,0.3667,0.3667,0.0917,0.2,1.0,0.0667,1.0,0.95
-nova-lite,Amazon,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,3.29,7.72,12.08,18.88,11.81,5.05,3.2,2760.64,7563.27,17904.5,43855.6,12621.5,23029.87,6711.7,839.35,979.15,1482.74,2323.41,1068.7,4562.8,2094.59,1.4877,2.958,2.4853,4.0705,1.4959,2.0742,2.2498,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,1.0,0.5,0.9,0.3,0.8,0.4,1.0,1.0,1.0,1.0,0.1373,0.4667,0.4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5909,0.1818,1.0,1.0,0.5,0.85,0.45,0.5667,0.5667,0.1376,0.6,0.3,0.3133,0.725,0.675
-gemini-2.5-pro,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,10.88,11.9,23.24,19.5,23.03,7.52,9.7,2524.45,4880.93,3022.7,15671.5,4011.9,5005.8,9071.0,232.11,410.31,130.06,803.81,174.17,665.86,935.55,5.2265,5.6138,9.9988,8.3578,5.6094,4.8197,5.9149,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9333,0.1,0.6,0.3,0.7333,0.5,1.0,1.0,0.5,0.7,0.125,0.4,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7045,0.3636,1.0,1.0,0.0,0.2667,0.2,0.4667,0.4667,0.125,0.3,1.0,0.1333,0.875,0.85

+Model,Vendor,Model Type,L1_Total_Tasks,L2_Total_Tasks,L3_Total_Tasks,L4_Total_Tasks,L5_Total_Tasks,L6_Total_Tasks,L7_Total_Tasks,L1_Evaluated_Tasks,L2_Evaluated_Tasks,L3_Evaluated_Tasks,L4_Evaluated_Tasks,L5_Evaluated_Tasks,L6_Evaluated_Tasks,L7_Evaluated_Tasks,L1_Avg_Exec_Time,L2_Avg_Exec_Time,L3_Avg_Exec_Time,L4_Avg_Exec_Time,L5_Avg_Exec_Time,L6_Avg_Exec_Time,L7_Avg_Exec_Time,L1_Avg_Tokens,L2_Avg_Tokens,L3_Avg_Tokens,L4_Avg_Tokens,L5_Avg_Tokens,L6_Avg_Tokens,L7_Avg_Tokens,L1_Avg_TPS,L2_Avg_TPS,L3_Avg_TPS,L4_Avg_TPS,L5_Avg_TPS,L6_Avg_TPS,L7_Avg_TPS,L1_Avg_TTFT,L2_Avg_TTFT,L3_Avg_TTFT,L4_Avg_TTFT,L5_Avg_TTFT,L6_Avg_TTFT,L7_Avg_TTFT,L1_RRR,L2_RRR,L3_RRR,L4_RRR,L5_RRR,L6_RRR,L7_RRR,L1_SR,L2_SR,L3_SR,L4_SR,L5_SR,L6_SR,L7_SR,L1_EPR_CVR,L2_EPR_CVR,L3_EPR_CVR,L4_EPR_CVR,L5_EPR_CVR,L6_EPR_CVR,L7_EPR_CVR,L1_pass@k,L2_pass@k,L3_pass@k,L4_pass@k,L5_pass@k,L6_pass@k,L7_pass@k,L1_TooAcc,L1_ArgAcc,L1_CallEM,L1_RespOK,L2_SelectAcc,L3_FSM,L3_PSM,L3_ΔSteps_norm,L3_ProvAcc,L4_Coverage,L4_SourceEPR,L5_AdaptiveRoutingScore,L5_FallbackSR,L6_ReuseRage,L6_RedundantCallRate,L6_EffScore,L7_ContextRetention,L7_RefRecall
+kanana-1.5-8b-instruct-2505,Kakao,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.53,17.22,14.51,23.78,9.44,52.98,47.39,4556.36,6107.6,5723.4,7188.3,5665.9,28502.33,28738.1,823.46,354.62,394.38,302.24,599.94,538.01,606.41,1.5236,6.7827,5.9015,7.4927,1.4163,7.764,5.1605,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8409,0.925,0.55,0.55,0.45,0.7167,0.4,1.0,1.0,1.0,0.9,0.225,1.0,0.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6364,0.2727,1.0,1.0,0.0,0.5333,0.0,0.0,0.2667,0.2667,0.225,0.45,0.4,1.0,0.6,0.825,0.75
+skt_A.X-4.0-Light,SKT,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.15,17.37,21.51,9.06,9.23,38.97,33.94,4286.73,7456.1,13579.8,2284.9,6500.85,27744.0,25032.0,833.07,429.13,631.27,252.27,704.42,711.88,737.55,1.3615,5.8379,6.0725,6.2881,1.3627,5.3648,3.902,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5455,0.7417,0.525,0.35,0.2875,0.55,0.45,1.0,1.0,1.0,0.3,0.2583,0.8667,0.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8182,0.4545,1.0,1.0,0.2,0.7833,0.65,0.1,0.05,0.05,0.25,0.55,0.4,1.0,0.4667,0.8,0.775
+qwen3-8B,알리바바,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,24.54,33.11,38.89,61.09,46.28,102.03,92.19,5798.0,7600.07,8380.0,14758.8,9789.4,45946.13,55163.2,236.28,229.53,215.5,241.58,211.54,450.34,598.37,11.0876,13.3456,23.3045,16.4015,8.5784,16.7883,11.2336,1.0,1.0,0.9,0.9,1.0,1.0,1.0,0.5909,0.8083,0.175,0.35,0.45,0.7833,0.525,1.0,1.0,0.4,0.9,0.2258,1.0,0.95,1.0,1.0,0.9,0.8,0.9667,1.0,1.0,1.0,0.7955,0.4545,1.0,1.0,0.2,0.3,0.2,0.1,0.4667,0.4667,0.2333,0.55,0.2,1.0,0.5667,0.85,0.775
+gemini-2.5-pro,Google,API,11,30,10,10,20,15,10,11,30,10,10,20,15,10,9.01,10.45,11.43,29.65,15.91,43.0,33.16,5257.45,5761.23,6384.2,22304.6,7592.2,54436.6,50150.6,583.2,551.49,558.73,752.35,477.25,1266.0,1512.44,4.6263,5.4812,7.9657,8.8433,4.9659,7.1894,5.2974,0.9091,0.8,0.8,1.0,0.8,0.8667,0.9,0.8409,0.6583,0.2,0.425,0.4,0.4,0.35,0.9091,0.7667,0.2,0.7,0.1583,0.8667,0.9,0.9091,0.8,0.8,1.0,0.8,0.8667,0.9,0.9091,0.6364,0.2727,0.9091,0.7667,0.1,0.1667,0.1,0.0,0.4833,0.4833,0.1583,0.35,0.5333,1.0,0.1222,0.825,0.7
+Qwen3-4B-Instruct-2507,알리바바,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,6.66,22.89,14.8,51.19,11.71,86.63,60.09,5273.09,6447.9,9087.8,17502.5,5363.85,36058.4,37068.1,791.39,281.66,613.83,341.91,458.02,416.23,616.84,2.093,9.1244,4.4172,13.7638,1.8319,14.8681,8.245,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6364,0.6583,0.15,0.375,0.3,0.6167,0.425,1.0,1.0,1.0,0.9,0.15,1.0,1.0,1.0,1.0,1.0,0.9333,1.0,1.0,1.0,1.0,0.75,0.3636,1.0,1.0,0.2,0.6333,0.7,0.0,0.5167,0.5167,0.15,0.3,0.1333,1.0,0.4,0.875,0.8
+Midm-2.0-Base-Instruct,KT,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.39,3.9,3.06,3.75,8.13,28.66,16.08,4185.82,2514.93,3418.3,2388.8,3084.5,22909.13,14079.1,775.89,644.46,1117.59,636.3,379.51,799.33,875.38,1.4775,1.8563,1.8855,1.6781,1.0824,1.6794,1.1356,1.0,1.0,1.0,1.0,0.95,1.0,1.0,0.5909,0.5167,0.25,0.325,0.275,0.4833,0.35,0.9091,0.5667,0.2,0.3,0.0667,0.9333,0.6,1.0,1.0,1.0,0.8667,0.9833,1.0,1.0,0.9091,0.6364,0.2727,1.0,0.5667,0.0,0.1,0.0,0.0,0.0,0.0,0.0667,0.15,0.0,0.9333,0.3,0.55,0.5

components/leaderboard_components.py CHANGED Viewed

@@ -5,8 +5,8 @@ These are stable components that don't change frequently
 def get_chart_colors():
     return {
-        "Private": "#593B1D",  # Rich brown for API
-        "Open source": "#FACC15",  # Warm amber for OSS
         "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
         "text": "white",
         "background": "#01091A",
@@ -16,12 +16,10 @@ def get_chart_colors():
 def get_rank_badge(rank):
     """Generate HTML for rank badge with appropriate styling"""
-    tag_background = "#593B1D"
-    tag_text_color = "#FFFFFF"
     badge_styles = {
-        1: ("1st", tag_background, tag_text_color),
-        2: ("2nd", tag_background, tag_text_color),
-        3: ("3rd", tag_background, tag_text_color),
     }
     if rank in badge_styles:
@@ -61,25 +59,24 @@ def get_type_badge(model_type):
     """Generate HTML for model type badge"""
     colors = get_chart_colors()
     color_map = {
-        "Open source": colors.get("Open source", "#FACC15"),
-        "Proprietary": colors.get("Private", "#593B1D"),
-        "Private": colors.get("Private", "#593B1D"),
     }
     label_map = {
         "Open source": "OSS",
         "Proprietary": "API",
         "Private": "API",
     }
-    bg_color = color_map.get(model_type, "#593B1D")
     display_label = label_map.get(model_type, model_type)
-    text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
     return f"""
         <div style="
             display: inline-flex;
             align-items: center;
             padding: 4px 8px;
             background: {bg_color};
-            color: {text_color};
             border-radius: 4px;
             font-size: 0.85em;
             font-weight: 500;

 def get_chart_colors():
     return {
+        "Private": "#1098F7",  # Airglow Blue for Proprietary
+        "Open source": "#58BC82",  # Green for Open source
         "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
         "text": "white",
         "background": "#01091A",
 def get_rank_badge(rank):
     """Generate HTML for rank badge with appropriate styling"""
     badge_styles = {
+        1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
+        2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
+        3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
     }
     if rank in badge_styles:
     """Generate HTML for model type badge"""
     colors = get_chart_colors()
     color_map = {
+        "Open source": colors.get("Open source", "#58BC82"),
+        "Proprietary": colors.get("Private", "#1098F7"),
+        "Private": colors.get("Private", "#1098F7"),
     }
     label_map = {
         "Open source": "OSS",
         "Proprietary": "API",
         "Private": "API",
     }
+    bg_color = color_map.get(model_type, "#4F46E5")
     display_label = label_map.get(model_type, model_type)
     return f"""
         <div style="
             display: inline-flex;
             align-items: center;
             padding: 4px 8px;
             background: {bg_color};
+            color: white;
             border-radius: 4px;
             font-size: 0.85em;
             font-weight: 500;

styles/leaderboard_styles.py CHANGED Viewed

@@ -34,9 +34,9 @@ def get_leaderboard_css():
         --border-subtle: rgba(245, 246, 247, 0.08);
         --border-default: rgba(245, 246, 247, 0.12);
         --border-strong: rgba(245, 246, 247, 0.2);
-        --text-primary: #FFFFFF;
-        --text-secondary: #E2E8F0;
-        --text-muted: #94A3B8;
         --accent-primary: #ffd21e;
         --accent-secondary: #1098F7;
         --accent-tertiary: #F5F6F7;
@@ -44,38 +44,12 @@ def get_leaderboard_css():
         --glow-secondary: rgba(16, 152, 247, 0.4);
         --glow-tertiary: rgba(245, 246, 247, 0.3);
     }
-    html.light,
-    html.light body,
-    html.light .gradio-container {
-        --bg-primary: #F8FAFC;
-        --bg-secondary: rgba(15, 23, 42, 0.06);
-        --bg-card: rgba(255, 255, 255, 0.92);
-        --border-subtle: rgba(15, 23, 42, 0.08);
-        --border-default: rgba(15, 23, 42, 0.12);
-        --border-strong: rgba(15, 23, 42, 0.18);
-        --text-primary: #0B1120;
-        --text-secondary: #1E293B;
-        --text-muted: #475569;
-        --accent-primary: #F59E0B;
-        --accent-secondary: #2563EB;
-        --accent-tertiary: #111827;
-        --glow-primary: rgba(245, 158, 11, 0.25);
-        --glow-secondary: rgba(37, 99, 235, 0.2);
-        --glow-tertiary: rgba(15, 23, 42, 0.18);
-    }
-    html.light [style*="color: white"],
-    html.light [style*="color:white"],
-    html.light [style*="#FFFFFF"],
-    html.light [style*="#ffffff"] {
-        color: var(--text-primary) !important;
-    }
     /* Global font and background */
-    html, body, .gradio-container {
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
         background: var(--bg-primary) !important;
     }
     /* Headers and text */
@@ -86,15 +60,18 @@ def get_leaderboard_css():
     }
     p, span, div, li, ul li {
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
     /* Labels and info text */
     label {
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
     .gr-box label {
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
@@ -181,7 +158,7 @@ def get_leaderboard_css():
     /* Radio button labels */
     input[type="radio"] + label {
-        color: var(--text-primary) !important;
     }
     input[type="radio"]:checked {
@@ -194,22 +171,26 @@ def get_leaderboard_css():
     .dropdown {
         border-color: var(--border-default) !important;
         background: var(--bg-card) !important;
         transition: all 0.2s ease !important;
     }
     /* Dropdown option styling */
     .dropdown option {
         background: var(--bg-card) !important;
     }
     /* Gradio dropdown specific styling */
     .gradio-dropdown select,
     .gradio-dropdown [role="combobox"],
     .gradio-dropdown input {
         background: var(--bg-card) !important;
     }
     .gradio-dropdown option {
         background: var(--bg-card) !important;
     }
@@ -229,16 +210,19 @@ def get_leaderboard_css():
         overflow-y: auto !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
     }
     /* Table cells and headers */
     .dataframe td,
     .dataframe th {
     }
     /* Button styling */
     button {
         background: var(--bg-card) !important;
         border: 1px solid var(--border-default) !important;
         transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
     }
@@ -379,7 +363,7 @@ def get_leaderboard_css():
         display: inline-block !important;
         padding: 14px 28px !important;
         background: #ffd21e !important;
-        color: var(--text-primary) !important;
         text-decoration: none !important;
         border-radius: 16px !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -398,7 +382,7 @@ def get_leaderboard_css():
         transform: translateY(-3px) !important;
         box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
         background: #ffd21e !important;
-        color: var(--text-primary) !important;
         text-decoration: none !important;
         text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
     }
@@ -440,176 +424,24 @@ def get_leaderboard_css():
         border-color: #ffd21e !important;
         box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
         text-decoration: none !important;
-        color: var(--text-primary) !important;
-    }
-    /* Ensure key hero/body text stays bright */
-    .hero-subtitle,
-    .section-lead,
-    .section-subtitle,
-    .criteria-card li,
-    .scenario-body,
-    .hero-action-button,
-    .hero-action-button span,
-    #lang-toggle-btn,
-    #lang-toggle-btn button {
-        color: #FFFFFF !important;
-    }
-    .secondary.svelte-1ixn6qd {
-        color: #FFFFFF !important;
-    }
-    /* Responsive adjustments */
-    @media (max-width: 1024px) {
-        .hero-title {
-            font-size: 4.5rem !important;
-        }
-        .hero-subtitle {
-            font-size: 1.6rem !important;
-        }
-        .hero-actions {
-            flex-wrap: wrap !important;
-            gap: 12px !important;
-        }
-        .performance-card {
-            padding: 24px !important;
-        }
-        .domain-selector-container {
-            padding: 24px !important;
-        }
-        .dashboard-section {
-            padding: 28px !important;
-        }
-    }
-    @media (max-width: 768px) {
-        .hero-banner-wrapper {
-            width: 100% !important;
-            margin: 0 0 16px 0 !important;
-        }
-        .hero-title {
-            font-size: 3.2rem !important;
-        }
-        .hero-subtitle {
-            font-size: 1.3rem !important;
-        }
-        .hero-actions {
-            flex-direction: column !important;
-            align-items: stretch !important;
-        }
-        .hero-action-button {
-            width: 100% !important;
-            justify-content: center !important;
-        }
-        .dashboard-section,
-        .domain-selector-container,
-        .performance-card {
-            margin: 20px 12px !important;
-            padding: 20px !important;
-        }
-        .performance-card .card-body {
-            grid-template-columns: 1fr !important;
-            gap: 20px !important;
-        }
-        .radar-slot {
-            width: 100% !important;
-            max-width: 260px !important;
-            margin: 0 auto !important;
-        }
-        .v2-table-container {
-            overflow-x: auto !important;
-        }
-        .v2-styled-table {
-            min-width: 720px !important;
-        }
-        .hero-actions svg {
-            width: 18px !important;
-            height: 18px !important;
-        }
-        .section-title {
-            font-size: 1.8rem !important;
-        }
-        .section-lead,
-        .section-subtitle {
-            font-size: 1rem !important;
-        }
-        .criteria-card {
-            padding: 16px !important;
-        }
-        .criteria-grid {
-            grid-template-columns: 1fr !important;
-            gap: 16px !important;
-        }
-        .phase-grid {
-            grid-template-columns: 1fr !important;
-        }
-        .hero-subtitle,
-        .section-lead,
-        .section-subtitle,
-        .criteria-card li,
-        .scenario-body {
-            text-align: left !important;
-        }
-    }
-    @media (max-width: 480px) {
-        .hero-title {
-            font-size: 2.4rem !important;
-        }
-        .hero-subtitle {
-            font-size: 1.1rem !important;
-        }
-        .hero-action-button {
-            font-size: 0.95rem !important;
-            padding: 10px 16px !important;
-        }
-        .performance-card {
-            padding: 18px !important;
-        }
-        .card-top-row {
-            flex-direction: column !important;
-            gap: 12px !important;
-        }
-        .rank-panel {
-            align-self: flex-start !important;
-        }
-        .model-selector-container,
-        .level-selector-container {
-            margin: 0 !important;
-        }
-        .hero-banner-wrapper {
-            margin-bottom: 12px !important;
-        }
-    }
-    }
-    /* Language toggle button */
-    #lang-toggle-btn button,
-    #lang-toggle-btn {
         color: #FFFFFF !important;
-        border-color: #ffd21e !important;
-    }
-    .hero-action-button {
-        border-color: #ffd21e !important;
     }
     /* Numeric content styling */
     .numeric-cell, .metric-value, .rank-value,
     .level-tile-score, .core-metric-card .metric-value {
-        color: var(--text-primary) !important;
         font-family: 'Geist Mono', monospace !important;
     }
     /* Table content */
     td, th, table * {
-        color: var(--text-primary) !important;
     }
     /* All numeric and data elements */
     .performance-card *, .v2-styled-table *, .dataframe * {
-        color: var(--text-primary) !important;
     }
     /* Enhanced dropdown styling - more specific selectors
@@ -622,18 +454,20 @@ def get_leaderboard_css():
     .model-dropdown [role="combobox"],
     .model-dropdown button {
         background: rgba(1, 9, 26, 0.95) !important;
         border: 1px solid var(--border-default) !important;
         border-radius: 8px !important;
     }
     .gradio-dropdown option,
     .model-dropdown option {
         background: rgba(1, 9, 26, 0.95) !important;
     }
     /* Force dropdown text color */
     /* .gradio-dropdown *, .model-dropdown * {
-        color: var(--text-primary) !important;
     } */
     /* Gradio 5.x compatible dropdown styling */
@@ -641,31 +475,22 @@ def get_leaderboard_css():
     .gradio-container [data-testid="dropdown"],
     .gradio-container select {
         background-color: rgba(1, 9, 26, 0.95) !important;
         border: 1px solid rgba(245, 246, 247, 0.12) !important;
     }
     .gradio-container .gradio-dropdown option,
     .gradio-container select option {
         background-color: rgba(1, 9, 26, 0.95) !important;
     }
     /* Target the actual visible text in dropdown */
     .gradio-container [role="combobox"],
     .gradio-container .gradio-dropdown .wrap > div {
         background-color: rgba(1, 9, 26, 0.95) !important;
     }
-    html.light .model-dropdown .gradio-dropdown,
-    html.light .model-dropdown [role="combobox"],
-    html.light .model-dropdown button,
-    html.light .gradio-container [data-testid="dropdown"],
-    html.light .gradio-container select,
-    html.light .gradio-container [role="combobox"],
-    html.light .gradio-container .gradio-dropdown .wrap > div {
-        background-color: rgba(255, 255, 255, 0.95) !important;
-        border-color: rgba(15, 23, 42, 0.12) !important;
-        box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08) !important;
-    }
     </style>
     """

         --border-subtle: rgba(245, 246, 247, 0.08);
         --border-default: rgba(245, 246, 247, 0.12);
         --border-strong: rgba(245, 246, 247, 0.2);
+        --text-primary: #F5F6F7;
+        --text-secondary: #94A3B8;
+        --text-muted: #64748B;
         --accent-primary: #ffd21e;
         --accent-secondary: #1098F7;
         --accent-tertiary: #F5F6F7;
         --glow-secondary: rgba(16, 152, 247, 0.4);
         --glow-tertiary: rgba(245, 246, 247, 0.3);
     }
     /* Global font and background */
+    .gradio-container {
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
         background: var(--bg-primary) !important;
+        color: var(--text-primary) !important;
     }
     /* Headers and text */
     }
     p, span, div, li, ul li {
+        color: white !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
     /* Labels and info text */
     label {
+        color: white !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
     .gr-box label {
+        color: white !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
     }
     /* Radio button labels */
     input[type="radio"] + label {
+        color: white !important;
     }
     input[type="radio"]:checked {
     .dropdown {
         border-color: var(--border-default) !important;
         background: var(--bg-card) !important;
+        color: white !important;
         transition: all 0.2s ease !important;
     }
     /* Dropdown option styling */
     .dropdown option {
         background: var(--bg-card) !important;
+        color: white !important;
     }
     /* Gradio dropdown specific styling */
     .gradio-dropdown select,
     .gradio-dropdown [role="combobox"],
     .gradio-dropdown input {
+        color: white !important;
         background: var(--bg-card) !important;
     }
     .gradio-dropdown option {
+        color: white !important;
         background: var(--bg-card) !important;
     }
         overflow-y: auto !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
+        color: white !important;
     }
     /* Table cells and headers */
     .dataframe td,
     .dataframe th {
+        color: white !important;
     }
     /* Button styling */
     button {
         background: var(--bg-card) !important;
+        color: white !important;
         border: 1px solid var(--border-default) !important;
         transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
     }
         display: inline-block !important;
         padding: 14px 28px !important;
         background: #ffd21e !important;
+        color: #FFFFFF !important;
         text-decoration: none !important;
         border-radius: 16px !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         transform: translateY(-3px) !important;
         box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
         background: #ffd21e !important;
+        color: #FFFFFF !important;
         text-decoration: none !important;
         text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
     }
         border-color: #ffd21e !important;
         box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
         text-decoration: none !important;
         color: #FFFFFF !important;
     }
     /* Numeric content styling */
     .numeric-cell, .metric-value, .rank-value,
     .level-tile-score, .core-metric-card .metric-value {
+        color: white !important;
         font-family: 'Geist Mono', monospace !important;
     }
     /* Table content */
     td, th, table * {
+        color: white !important;
     }
     /* All numeric and data elements */
     .performance-card *, .v2-styled-table *, .dataframe * {
+        color: white !important;
     }
     /* Enhanced dropdown styling - more specific selectors
     .model-dropdown [role="combobox"],
     .model-dropdown button {
         background: rgba(1, 9, 26, 0.95) !important;
+        color: white !important;
         border: 1px solid var(--border-default) !important;
         border-radius: 8px !important;
     }
     .gradio-dropdown option,
     .model-dropdown option {
         background: rgba(1, 9, 26, 0.95) !important;
+        color: white !important;
     }
     /* Force dropdown text color */
     /* .gradio-dropdown *, .model-dropdown * {
+        color: white !important;
     } */
     /* Gradio 5.x compatible dropdown styling */
     .gradio-container [data-testid="dropdown"],
     .gradio-container select {
         background-color: rgba(1, 9, 26, 0.95) !important;
+        color: white !important;
         border: 1px solid rgba(245, 246, 247, 0.12) !important;
     }
     .gradio-container .gradio-dropdown option,
     .gradio-container select option {
         background-color: rgba(1, 9, 26, 0.95) !important;
+        color: white !important;
     }
     /* Target the actual visible text in dropdown */
     .gradio-container [role="combobox"],
     .gradio-container .gradio-dropdown .wrap > div {
+        color: white !important;
         background-color: rgba(1, 9, 26, 0.95) !important;
     }
     </style>
     """

tabs/{leaderboard_v1_kr.py → leaderboard_v1.py} RENAMED Viewed

@@ -53,8 +53,7 @@ def create_leaderboard_v2_tab():
         # Clean and prepare data
         df = df.copy()
-        exclude_cols = {'Model', 'Vendor', 'Model Type', 'LLM Type'}
-        numeric_candidate_cols = [col for col in df.columns if col not in exclude_cols]
         for col in numeric_candidate_cols:
             df[col] = pd.to_numeric(df[col], errors='coerce')
@@ -119,45 +118,36 @@ def create_leaderboard_v2_tab():
             df['Call Validity'] = df[epr_cols].mean(axis=1)
         # Use LLM Type from CSV directly, with mapping to display names
-        def normalize_model_type(value):
-            """Normalize raw type labels from CSV (e.g., OSS/API) to display values."""
-            if pd.isna(value):
-                return None
-            cleaned = str(value).strip()
-            if not cleaned:
-                return None
-            upper = cleaned.upper()
-            if upper in ("OSS", "OPEN SOURCE", "OPEN-SOURCE", "OPEN_SOURCE"):
-                return "Open source"
-            if upper in ("API", "PROPRIETARY", "PRIVATE", "CLOSED"):
-                return "Proprietary"
-            return None
-        # Prefer explicit type columns from the CSV, then fall back to vendor mapping
-        if 'Model Type' in df.columns:
-            df['Model Type'] = df['Model Type'].apply(normalize_model_type)
-        elif 'LLM Type' in df.columns:
-            df['Model Type'] = df['LLM Type'].apply(normalize_model_type)
         else:
-            df['Model Type'] = None
-        vendor_model_type_map = {
-            "OpenAI": "Proprietary",
-            "Anthropic": "Proprietary",
-            "Google": "Proprietary",
-            "Microsoft": "Proprietary",
-            "Mistral": "Proprietary",
-            "Databricks": "Open source",
-            "Meta": "Open source",
-            "Alibaba": "Open source",
-            "알리바바": "Open source",  # Korean name for Alibaba
-            "Kakao": "Open source",
-            "SKT": "Open source",
-            "KT": "Open source",
-            "xAI": "Proprietary",
-        }
-        df['Model Type'] = df['Model Type'].fillna(df['Vendor'].map(vendor_model_type_map))
-        df['Model Type'] = df['Model Type'].fillna('Proprietary')
         # Round numeric columns for better display
         round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
@@ -234,36 +224,36 @@ def create_leaderboard_v2_tab():
     # Level metadata for the 7-stage task framework
     level_details = {
         "ALL": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL · 전체 태스크</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>7개의 태스크 전반의 평균 성능을 한눈에 살펴보고 각 레벨 비교를 위한 기준점을 제공합니다.</span>"
         },
         "L1": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 · 단일 도구 호출</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>단일 도구 호출 능력과 기본적인 명령 수행 정확도를 평가합니다.</span>"
         },
         "L2": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 · 도구 선택</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>요구 사항에 맞는 도구를 고르고 적절한 파라미터로 호출하는 능력을 측정합니다.</span>"
         },
         "L3": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 · 도구 순차 추론</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>복수 단계의 순차적 reasoning을 통해 문제를 해결하는 과정을 검증합니다.</span>"
         },
         "L4": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 · 도구 병렬 추론</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>여러 소스의 정보를 병렬적으로 통합하고 요약하는 능력을 평가합니다.</span>"
         },
         "L5": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 · 오류 처리와 강건성</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>예상치 못한 오류나 실패 상황에 대한 인지와 대응 전략을 확인합니다.</span>"
         },
         "L6": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 · 효율적인 도구 활용</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>최소한의 호출과 비용으로 목표를 달성하는 운영 효율을 살펴봅니다.</span>"
         },
         "L7": {
-            "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 · 장기 컨텍스트 기억</span>",
-            "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>장기 대화 맥락을 유지하고 적절히 활용하는 능력을 집중적으로 분석합니다.</span>"
         }
     }
     default_level = "ALL"
@@ -301,7 +291,7 @@ def create_leaderboard_v2_tab():
                 border-collapse: collapse;
                 font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
                 background: var(--bg-card);
-                color: var(--text-primary);
             }
             .v2-styled-table thead {
@@ -315,7 +305,7 @@ def create_leaderboard_v2_tab():
                 padding: 14px 12px;
                 text-align: left;
                 font-weight: 600;
-                color: var(--text-primary);
                 border-bottom: 2px solid var(--accent-primary);
                 font-size: 13px;
                 text-transform: uppercase;
@@ -329,7 +319,7 @@ def create_leaderboard_v2_tab():
             .v2-styled-table td {
                 padding: 12px;
                 border-bottom: 1px solid var(--border-subtle);
-                color: var(--text-primary);
                 transition: all 0.2s ease;
             }
@@ -349,30 +339,30 @@ def create_leaderboard_v2_tab():
             .model-name {
                 font-weight: 500;
-                color: var(--text-primary);
                 transition: color 0.2s ease;
             }
             /* Keep model name color consistent on hover to emphasize row highlight */
             .v2-styled-table tr:hover .model-name {
-                color: var(--text-primary);
             }
             .numeric-cell {
                 font-family: 'Geist Mono', monospace;
                 font-size: 13px;
                 text-align: center;
-                color: var(--text-primary);
             }
             .highlight-header {
                 background: rgba(255, 210, 30, 0.14);
-                color: var(--text-primary);
             }
             .highlight-cell {
                 background: rgba(255, 210, 30, 0.08);
-                color: var(--text-primary);
                 font-weight: 600;
             }
         </style>
@@ -470,8 +460,8 @@ def create_leaderboard_v2_tab():
         return f"""
         <div class="domain-selector-container leaderboard-intro">
             <div class="domain-header">
-                <h2 class="domain-title" style="color: var(--text-primary);">Agent Leaderboard · {level_title}</h2>
-                <p class="domain-subtitle" style="color: var(--text-primary);">{level_description}</p>
             </div>
             <div class="dataframe-container">
         """
@@ -521,14 +511,6 @@ def create_leaderboard_v2_tab():
     # Load initial data
     initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
     initial_df = load_leaderboard_data()  # Load raw data for model selector
-    if not initial_df.empty:
-        overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
-        if overall_success_numeric.notna().any():
-            initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
-                'Overall Success', ascending=False, na_position='last'
-            )
-        else:
-            initial_df = initial_df.sort_values('Model')
     initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
     initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
     initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
@@ -754,14 +736,12 @@ def create_leaderboard_v2_tab():
     # Header styles and navigation
     gr.HTML("""
     <style>
-    @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700;800&family=Gowun+Dodum&family=Do+Hyeon&display=swap');
     /* Enhanced button styling with better gradio compatibility */
     .header-action-button {
         display: inline-block !important;
         padding: 14px 28px !important;
         background: #ffd21e !important;
-        color: var(--text-primary) !important;
         text-decoration: none !important;
         border-radius: 16px !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -795,7 +775,7 @@ def create_leaderboard_v2_tab():
         transform: translateY(-3px) !important;
         box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
         background: #ffd21e !important;
-        color: var(--text-primary) !important;
         text-decoration: none !important;
         text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
     }
@@ -810,52 +790,34 @@ def create_leaderboard_v2_tab():
         filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
     }
-    .hero-banner-wrapper {
-        position: relative;
-        width: 100vw;
-        margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
-        border-radius: 0 !important;
-        overflow: hidden !important;
-        box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
-    }
-    .hero-banner-wrapper::before {
-        content: "";
-        position: absolute;
-        inset: 0;
-        background: #01091A;
-        z-index: 0;
-    }
     #hero-banner {
-        position: relative;
-        width: 100% !important;
-        height: auto !important;
-        z-index: 1;
     }
     #hero-banner img {
-        width: 100% !important;
-        height: auto !important;
-        display: block !important;
-        object-fit: cover !important;
     }
     .hero-title {
-        font-size: 10rem;
         font-weight: 800;
         line-height: 1.1;
         background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
         -webkit-background-clip: text;
         -webkit-text-fill-color: transparent;
         margin-bottom: 1rem;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .hero-subtitle {
         color: var(--text-secondary);
-        font-size: 3rem;
-        font-family: 'Nanum Gothic', sans-serif !important;
         margin-top: 0;
     }
@@ -914,7 +876,6 @@ def create_leaderboard_v2_tab():
         box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
         backdrop-filter: blur(12px);
         -webkit-backdrop-filter: blur(12px);
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .dashboard-section.emphasized {
@@ -934,16 +895,15 @@ def create_leaderboard_v2_tab():
     }
     .section-title {
-        font-size: 3.75rem;
         font-weight: 700;
         color: var(--text-primary);
         margin-bottom: 12px;
         text-align: center !important;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .section-lead, .section-subtitle {
-        font-size: 1.32rem !important;
         color: var(--text-secondary);
         max-width: 720px;
         margin: 0 auto 24px auto;
@@ -952,7 +912,6 @@ def create_leaderboard_v2_tab():
         word-break: keep-all;
         white-space: normal;
         display: block;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .phase-grid {
@@ -970,11 +929,10 @@ def create_leaderboard_v2_tab():
     }
     .phase-card h3 {
-        font-size: 1.44rem !important;
         color: var(--text-primary);
         margin-bottom: 20px;
         font-weight: 700;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .phase-chart {
@@ -1002,26 +960,11 @@ def create_leaderboard_v2_tab():
     .phase-chart span {
         position: relative;
-        font-size: 1.2rem !important;
         font-weight: 700;
-        color: var(--text-primary) !important;
-        font-family: 'Nanum Gothic', sans-serif !important;
-    }
-    /* 추가적인 구체적 선택자 */
-    .phase-card .phase-chart span {
-        color: var(--text-primary) !important;
-        text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
-        font-family: 'Nanum Gothic', sans-serif !important;
-    }
-    .phase-grid .phase-chart span {
-        color: var(--text-primary) !important;
-        z-index: 10 !important;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .phase-list {
         list-style: none;
         padding: 0;
@@ -1036,8 +979,7 @@ def create_leaderboard_v2_tab():
         background: rgba(245, 246, 247, 0.05);
         border: 1px solid rgba(245, 246, 247, 0.08);
         color: var(--text-secondary);
-        font-size: 1.08rem !important;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     .scenario-body {
@@ -1100,7 +1042,7 @@ def create_leaderboard_v2_tab():
     /* Responsive design */
     @media (max-width: 768px) {
         .hero-title {
-            font-size: 10rem;
         }
         .hero-action-button {
             width: 100% !important;
@@ -1124,7 +1066,7 @@ def create_leaderboard_v2_tab():
             gap: 8px;
         }
         .section-title {
-            font-size: 2.7rem;
         }
         .phase-chart {
             width: 100px;
@@ -1138,15 +1080,13 @@ def create_leaderboard_v2_tab():
     </style>
     """)
-    gr.HTML("<div class='hero-banner-wrapper'>")
     gr.Image(
-        value="banner_wide.png",
         show_label=False,
         interactive=False,
         type="filepath",
         elem_id="hero-banner"
     )
-    gr.HTML("</div>")
     gr.HTML("""
     <div style="text-align: center; padding: 20px 0;">
@@ -1159,35 +1099,35 @@ def create_leaderboard_v2_tab():
     gr.HTML("""
     <div class="hero-actions">
         <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
-            <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
                 <line x1="8" y1="12" x2="16" y2="12"/>
             </svg>
-            <span>블로그</span>
         </a>
         <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
-            <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M9 19c-5 1.5-5-2.5-7-3"/>
                 <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
             </svg>
             <span>GitHub</span>
         </a>
         <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
-            <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
                 <polyline points="7 10 12 15 17 10"/>
                 <line x1="12" y1="15" x2="12" y2="3"/>
             </svg>
-            <span>데이터셋</span>
         </a>
-        <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench?tab=readme-ov-file#-%ED%8F%89%EA%B0%80-%EC%A7%80%ED%91%9C" target="_blank" rel="noopener noreferrer" class="hero-action-button">
-            <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M3 3v18h18"/>
                 <path d="M7 17v-6"/>
                 <path d="M12 17V7"/>
                 <path d="M17 17v-3"/>
             </svg>
-            <span>평가 지표</span>
         </a>
     </div>
     """)
@@ -1196,31 +1136,31 @@ def create_leaderboard_v2_tab():
     gr.HTML("""
     <div class="dashboard-section">
         <div class="section-header">
-            <h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">단계별 태스크 설계</h2>
         </div>
         <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">단순 도구 호출부터 장기적 맥락 능력, 강건성 처리 능력까지 에이전트의 능력을 7단계로 입체적으로 분석하였습니다.</p>
         <div class="phase-grid">
             <div class="phase-card">
-                <h3>단일 턴</h3>
                 <div class="phase-chart" style="--progress:80%;">
-                    <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
                 </div>
                 <ul class="phase-list">
-                    <li style="color: var(--text-primary);">L1: 단일 도구 호출</li>
-                    <li style="color: var(--text-primary);">L2: 도구 선택</li>
-                    <li style="color: var(--text-primary);">L3: 도구 순차 추론</li>
-                    <li style="color: var(--text-primary);">L4: 도구 병렬 추론</li>
-                    <li style="color: var(--text-primary);">L5: 오류 처리와 강건성</li>
                 </ul>
             </div>
             <div class="phase-card">
-                <h3>다중 턴</h3>
                 <div class="phase-chart" style="--progress:20%;">
-                    <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
                 </div>
                 <ul class="phase-list">
-                    <li style="color: var(--text-primary);">L6: 효율적인 도구 활용</li>
-                    <li style="color: var(--text-primary);">L7: 장기 컨텍스트 기억</li>
                 </ul>
             </div>
         </div>
@@ -1231,21 +1171,20 @@ def create_leaderboard_v2_tab():
     gr.HTML("""
     <div class="dashboard-section emphasized">
         <div class="section-header">
-            <h2 class="section-title" style="font-size: 2.0rem;">18가지 한국형 API 사용 및 실생활 환경에 특화된 고품질 시나리오 구성</h2>
         </div>
         <div class="scenario-body">
-            <p style="color: var(--text-primary);">네이버, 카카오 등 국내 실사용 API를 기반으로, '약속 예약', '블로그 후기 검색'처럼 일상에 유용한 현실적인 문제 해결 시나리오를 구현했습니다.</p>
         </div>
-    </div>
         <div class="section-flow">⌄</div>
     """)
     # Section 3: 핵심 평가 기준
     gr.HTML("""
     <div class="dashboard-section">
         <div class="section-header">
-            <h2 class="section-title" style="font-size: 2.0rem;">핵심 평가 기준</h2>
         </div>
         <div class="criteria-grid">
             <div class="criteria-card">
@@ -1279,8 +1218,6 @@ def create_leaderboard_v2_tab():
     # Domain filter section with enhanced styling
     gr.HTML("""
     <style>
-    @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700;800&family=Gowun+Dodum&family=Do+Hyeon&display=swap');
     /* Enhanced domain selector styling */
     .domain-selector-container {
         background: #ffd21e0d;
@@ -1383,11 +1320,10 @@ def create_leaderboard_v2_tab():
         -webkit-background-clip: text;
         background-clip: text;
         -webkit-text-fill-color: transparent;
-        text-shadow: 0 0 3px rgba(255, 210, 30, 0.08), 0 0 8px rgba(255, 210, 30, 0.05);
-        filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
         letter-spacing: 0.02em;
-        animation: title-shimmer 1.25s ease-in-out infinite;
-        font-family: 'Nanum Gothic', sans-serif !important;
     }
     @keyframes title-shimmer {
@@ -1675,8 +1611,8 @@ def create_leaderboard_v2_tab():
     .model-dropdown select,
     .model-dropdown [role="combobox"] {
-        background: #000000 !important;
-        border: 1px solid #333333 !important;
         border-radius: 999px !important;
         padding: 12px 24px !important;
         color: var(--text-primary) !important;
@@ -1707,8 +1643,8 @@ def create_leaderboard_v2_tab():
         gap: 8px !important;
         width: 100% !important;
         padding: 12px 24px !important;
-        background: #000000 !important;
-        border: 1px solid #333333 !important;
         border-radius: 999px !important;
         color: var(--text-primary) !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -1765,7 +1701,7 @@ def create_leaderboard_v2_tab():
         background: #ffd21e !important;
         border: 1px solid rgba(255, 210, 30, 0.6) !important;
         border-radius: 999px !important;
-        color: var(--text-primary) !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         font-weight: 600 !important;
         font-size: 0.95rem !important;
@@ -1812,7 +1748,7 @@ def create_leaderboard_v2_tab():
         font-size: 1.5rem;
         margin-bottom: 4px;
         display: block;
-        filter: drop-shadow(0 0 10px white);
     }
     .domain-name {
@@ -1827,7 +1763,7 @@ def create_leaderboard_v2_tab():
         top: 8px;
         right: 8px;
         background: var(--accent-primary);
-        color: var(--text-primary);
         font-size: 0.75rem;
         padding: 2px 8px;
         border-radius: 12px;
@@ -1999,147 +1935,92 @@ def create_leaderboard_v2_tab():
         padding: 12px 20px !important;
         font-size: 0.95rem !important;
     }
-    /* Leaderboard controls row styling */
-    .leaderboard-controls-row {
-        margin: 20px 0 !important;
-        padding: 20px !important;
-        background: transparent !important;
-        border: none !important;
-        gap: 40px !important;
-    }
-    .leaderboard-controls-row .gr-column,
-    .leaderboard-controls-row .gr-row,
-    .leaderboard-controls-row .gr-box,
-    .leaderboard-controls-row .gradio-column,
-    .leaderboard-controls-row .gradio-row,
-    .leaderboard-controls-row .gradio-group {
-        background: transparent !important;
-        border: none !important;
-        box-shadow: none !important;
-        padding: 0 !important;
-    }
-    /* Remove all container backgrounds for leaderboard controls */
-    .leaderboard-controls-row * {
-        background-color: transparent !important;
-        background-image: none !important;
-        border: none !important;
-        box-shadow: none !important;
-    }
-    .leaderboard-controls-row .inline-radio,
-    .leaderboard-controls-row .domain-radio {
-        background: transparent !important;
-        border: none !important;
-        box-shadow: none !important;
-    }
-    /* Inline radio styling for integrated controls */
-    .inline-radio {
-        background: transparent !important;
-        border: none !important;
-        box-shadow: none !important;
-        padding: 0 !important;
-    }
-    .inline-radio .wrap {
-        display: flex !important;
-        gap: 8px !important;
-        flex-wrap: wrap !important;
-        justify-content: flex-start !important;
-        background: transparent !important;
-        border: none !important;
-        box-shadow: none !important;
-        padding: 0 !important;
-    }
-    .inline-radio label {
-        padding: 8px 16px !important;
-        background: rgba(245, 246, 247, 0.06) !important;
-        border: 1px solid var(--border-subtle) !important;
-        border-radius: 20px !important;
-        font-size: 0.85rem !important;
-        color: var(--text-primary) !important;
-        transition: all 0.2s ease !important;
-        cursor: pointer !important;
-    }
-    .inline-radio label:hover {
-        background: rgba(255, 210, 30, 0.12) !important;
-        border-color: var(--accent-primary) !important;
-    }
-    .inline-radio input[type="radio"]:checked + label,
-    .inline-radio label[aria-checked="true"] {
-        background: rgba(255, 210, 30, 0.2) !important;
-        border-color: var(--accent-primary) !important;
-        color: var(--text-primary) !important;
-        font-weight: 600 !important;
-    }
     </style>
     """)
     level_options = list(level_details.keys())
-    # Main leaderboard table with dynamic title and integrated controls
-    leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
-    # Integrated controls within leaderboard section - stacked vertically
-    gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>태스크 레벨 선택</p>")
-    domain_filter = gr.Radio(
-        choices=level_options,
-        value=default_level,
-        label="",
-        interactive=True,
-        container=False,
-        elem_classes=["domain-radio", "inline-radio"]
-    )
-    gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>🔍 필터 및 정렬</p>")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>모델 접근</span>")
-            model_type_filter = gr.Radio(
-                choices=["All", "OSS", "API"],
-                value="All",
-                label="",
-                elem_classes=["domain-radio", "inline-radio"],
-                container=False
-            )
-        with gr.Column(scale=1):
-            gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>정렬 순서</span>")
-            sort_order = gr.Radio(
-                choices=["Descending", "Ascending"],
-                value="Descending",
-                label="",
-                elem_classes=["domain-radio", "inline-radio"],
-                container=False
-            )
     leaderboard_table = gr.HTML(initial_table)
     # Radar Chart Section
     gr.HTML("""
     <div class="domain-selector-container domain-performance-container">
         <div class="domain-header">
-            <h2 class="domain-title" style="color: var(--text-primary);">핵심 역량 레이더</h2>
-            <p class="domain-subtitle" style="color: var(--text-primary);">6가지 필수 핵심 요소(성공, 실행, 추론, 강건성, 효율성, 호출 유효성)를 추적합니다.</p>
         </div>
     """)
-    gr.HTML("<p style='color: var(--text-primary); margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>비교할 모델을 선택하세요. 최대 5개까지 가능합니다.</p>")
-    # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>모델은 최대 5개까지 선택 가능 합니다.</p>")
-    model_selector = gr.Dropdown(
-        choices=initial_df['Model'].tolist()[:10],
-        value=initial_df['Model'].tolist()[:5],
-        multiselect=True,
-        label="",
-        info=None,
-        container=False,
-    )
     # Radar chart plot - wrapped in centered container
     gr.HTML('<div class="chart-container radar-chart-container">')
@@ -2155,29 +2036,292 @@ def create_leaderboard_v2_tab():
     gr.HTML("</div>")
-    # Define generate_performance_card function before using it
-    def generate_performance_card(model_name):
-        """Generate HTML for the model performance card"""
-        if not model_name:
-            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
-                Please select a model to generate its performance card
-            </div>"""
-        # Get model data
         df = load_leaderboard_data()
-        model_data = df[df['Model'] == model_name]
-        if model_data.empty:
-            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
-                Model not found in the database
-            </div>"""
-        row = model_data.iloc[0]
-        # Get overall rank based on overall success
-        df_with_success = df.copy()
-        df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
         df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
         df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
         try:
@@ -2308,22 +2452,18 @@ def create_leaderboard_v2_tab():
     gr.HTML("""
     <div class="domain-selector-container performance-card-container">
         <div class="domain-header">
-            <h2 class="domain-title" style="color: var(--text-primary);">모델 성능 카드</h2>
-            <p class="domain-subtitle" style="color: var(--text-primary);">
-                모델의 성능 스펙트럼을 6대 핵심 지표와 L1~L7 단계별 종합 성공률(SR)로 시각화한 정밀 분석 카드를 확인해보세요.
-            </p>
-            <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
-                 ※ Rank는 L1~L7 단계별 SR의 평균값을 기준으로 선정되었습니다.
-            </p>
         </div>
         <div class="performance-card-content">
     """)
     with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
         gr.HTML("""
-        <p class="domain-subtitle" style="color: var(--text-primary);">분석 카드를 생성할 모델을 선택하세요.</p>
         """)
         card_model_selector = gr.Dropdown(
             choices=initial_df['Model'].tolist(),
@@ -2331,10 +2471,10 @@ def create_leaderboard_v2_tab():
             label="",
             info=None,
             container=False,
-            # elem_classes=["model-dropdown"]
         )
         download_card_btn = gr.Button(
-            "PNG로 다운로드",
             elem_id="download-card-btn",
             elem_classes=["pill-button"]
         )
@@ -2353,275 +2493,6 @@ def create_leaderboard_v2_tab():
         </div>
     </div>
     """)
-    # Level metric breakdown section
-    gr.HTML("""
-    <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
-        <div class="domain-header">
-            <h2 class="domain-title" style="color: var(--text-primary);">레벨별 상세 지표</h2>
-            <p class="domain-subtitle" style="color: var(--text-primary);">각 Ko-AgentBench 단계별 고유 평가 지표를 통해 모델 점수를 비교하고 더 자세히 살펴보세요.</p>
-        </div>
-    """)
-    with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
-        level_metric_selector = gr.Dropdown(
-            choices=level_ids,
-            value=level_ids[0] if level_ids else None,
-            multiselect=False,
-            label="",
-            info=None,
-            container=False,
-            elem_classes=["level-dropdown"]
-        )
-        level_model_selector = gr.Dropdown(
-            choices=initial_level_model_choices,
-            value=initial_level_model_values,
-            multiselect=True,
-            label="",
-            info=None,
-            container=False,
-            elem_classes=["model-dropdown", "level-model-dropdown"]
-        )
-    gr.HTML('<div class="chart-container level-metric-chart-container">')
-    level_metric_chart = gr.Plot(
-        label="",
-        value=initial_level_metric_chart,
-        elem_classes=["level-metric-plot", "plot-container"]
-    )
-    gr.HTML("""
-        </div>
-    </div>
-    """)
-    # # Heatmap section
-    # gr.HTML("""
-    # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
-    #     <div class="domain-header">
-    #         <h2 class="domain-title" style="color: var(--text-primary);">종합 성능 히트맵</h2>
-    #         <p class="domain-subtitle" style="color: var(--text-primary);">각 모델의 L1~L7 Ko-AgentBench SR(성공률) 점수를 한눈에 보세요.</p>
-    #     </div>
-    #     <div class="chart-container heatmap-chart-container">
-    # """)
-    # heatmap_chart = gr.Plot(
-    #     label="",
-    #     value=initial_heatmap,
-    #     elem_classes=["heatmap-plot", "plot-container"]
-    # )
-    # gr.HTML("""
-    #     </div>
-    # </div>
-    # """)
-    # Update functions
-    def get_optimal_sort_order(sort_by_value):
-        """Return the optimal sort order for a given metric"""
-        # Metrics where higher is better (descending)
-        descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
-        # Metrics where lower is better (ascending)
-        ascending_metrics = []
-        if sort_by_value in descending_metrics:
-            return "Descending"
-        elif sort_by_value in ascending_metrics:
-            return "Ascending"
-        else:
-            return "Descending"  # Default fallback
-    def update_table(level_filter, model_type_filter, sort_order):
-        title_html = update_leaderboard_title(level_filter)
-        sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
-        table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
-        return title_html, table_html
-    def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
-        # Get filtered dataframe
-        df = load_leaderboard_data()
-        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
-        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
-        # Update model selector choices based on filtered data
-        available_models_all = filtered_df['Model'].tolist()
-        available_models = available_models_all[:15]  # Top 15 from filtered results
-        # If selected models are not in available models, reset to top 5
-        if selected_models:
-            valid_selected = [m for m in selected_models if m in available_models]
-            # Check if more than 5 models are selected and show alert
-            if len(valid_selected) > 5:
-                gr.Warning("최대 5개 까지만 선택 가능합니다")
-                # Remove the last selected item (6th item) instead of keeping first 5
-                valid_selected = valid_selected[:-1]
-            if not valid_selected:
-                valid_selected = available_models[:5]
-        else:
-            valid_selected = available_models[:5]
-        # Create radar chart
-        chart = create_domain_radar_chart(filtered_df, valid_selected)
-        # Prepare heatmap order prioritizing selected models
-        # Level metric chart
-        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
-        available_level_models = available_models_all
-        if level_selected_models:
-            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
-            if not valid_level_models:
-                valid_level_models = available_level_models[:5]
-        else:
-            valid_level_models = available_level_models[:5]
-        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
-        return (
-            gr.Dropdown(
-                choices=available_models,
-                value=valid_selected,
-                multiselect=True,
-                label="",
-                info=None,
-                container=False,
-                # elem_classes=["model-dropdown"]
-            ),
-            chart,
-            gr.Dropdown(
-                choices=available_level_models,
-                value=valid_level_models,
-                multiselect=True,
-                label="",
-                info=None,
-                container=False,
-                elem_classes=["model-dropdown", "level-model-dropdown"]
-            ),
-            level_metric_fig,
-        )
-    def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
-        # Get filtered dataframe
-        df = load_leaderboard_data()
-        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
-        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
-        available_models_all = filtered_df['Model'].tolist()
-        if selected_models:
-            valid_selected = [m for m in selected_models if m in available_models_all]
-            # Check if more than 5 models are selected and show alert
-            if len(valid_selected) > 5:
-                # JavaScript alert for exceeding 5 models
-                gr.Warning("최대 5개 까지만 선택 가능합니다")
-                # Remove the last selected item (6th item) instead of keeping first 5
-                valid_selected = valid_selected[:-1]
-            if not valid_selected:
-                valid_selected = available_models_all[:5]
-        else:
-            valid_selected = available_models_all[:5]
-        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
-        available_level_models = available_models_all
-        if level_selected_models:
-            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
-            if not valid_level_models:
-                valid_level_models = available_level_models[:5]
-        else:
-            valid_level_models = available_level_models[:5]
-        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
-        return (
-            gr.Dropdown(
-                choices=available_models_all[:15],
-                value=valid_selected,
-                multiselect=True,
-                label="",
-                info=None,
-                container=False,
-            ),
-            create_domain_radar_chart(filtered_df, valid_selected),
-            gr.Dropdown(
-                choices=available_level_models,
-                value=valid_level_models,
-                multiselect=True,
-                label="",
-                info=None,
-                container=False,
-                elem_classes=["model-dropdown", "level-model-dropdown"]
-            ),
-            level_metric_fig,
-        )
-    def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
-        df = load_leaderboard_data()
-        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
-        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
-        available_models = filtered_df['Model'].tolist()
-        if level_selected_models:
-            valid_level_models = [m for m in level_selected_models if m in available_models]
-            # Check if more than 5 models are selected and show alert
-            if len(valid_level_models) > 5:
-                gr.Warning("최대 5개 까지만 선택 가능합니다")
-                # Remove the last selected item (6th item) instead of keeping first 5
-                valid_level_models = valid_level_models[:-1]
-            if not valid_level_models:
-                valid_level_models = available_models[:5]
-        else:
-            valid_level_models = available_models[:5]
-        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
-        level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
-        return (
-            gr.Dropdown(
-                choices=available_models,
-                value=valid_level_models,
-                multiselect=True,
-                label="",
-                info=None,
-                container=False,
-                elem_classes=["model-dropdown", "level-model-dropdown"]
-            ),
-            level_chart,
-        )
-    # Update table when filters change
-    filter_inputs = [domain_filter, model_type_filter, sort_order]
-    for input_component in filter_inputs:
-        input_component.change(
-            fn=update_table,
-            inputs=filter_inputs,
-            outputs=[leaderboard_title, leaderboard_table]
-        )
-        # Also update radar chart when filters change
-        input_component.change(
-            fn=update_radar_chart,
-            inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
-            outputs=[model_selector, radar_chart, level_model_selector, level_metric_chart]
-        )
-    # Update radar chart when model selection changes
-    model_selector.change(
-        fn=update_radar_only,
-        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
-        outputs=[model_selector, radar_chart, level_model_selector, level_metric_chart]
-    )
-    level_metric_selector.change(
-        fn=update_level_metric_only,
-        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
-        outputs=[level_model_selector, level_metric_chart]
-    )
-    level_model_selector.change(
-        fn=update_level_metric_only,
-        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
-        outputs=[level_model_selector, level_metric_chart]
-    )
     # Add custom CSS for the performance card
     gr.HTML("""
@@ -2844,8 +2715,8 @@ def create_leaderboard_v2_tab():
     .level-dropdown select,
     .level-dropdown [role="combobox"],
     .level-dropdown button {
-        background: #000000 !important;
-        border: 1px solid #333333 !important;
         border-radius: 999px !important;
         padding: 12px 20px !important;
         color: var(--text-primary) !important;
@@ -2855,7 +2726,7 @@ def create_leaderboard_v2_tab():
         text-align: center !important;
         min-height: 46px !important;
         transition: all 0.3s ease !important;
-        box-shadow: 0 10px 24px rgba(0, 0, 0, 0.3) !important;
     }
     .level-dropdown select:hover,
@@ -2872,14 +2743,6 @@ def create_leaderboard_v2_tab():
         margin: 12px auto 0 !important;
     }
-    .level-model-dropdown select,
-    .level-model-dropdown [role="combobox"],
-    .level-model-dropdown button {
-        background: #000000 !important;
-        border: 1px solid #333333 !important;
-        color: var(--text-primary) !important;
-    }
     .radar-placeholder {
         display: flex;
         flex-direction: column;
@@ -3032,74 +2895,6 @@ def create_leaderboard_v2_tab():
         }
     }
-    /* 폰트 강제 적용 - 최종 우선순위 */
-    .dashboard-section,
-    .dashboard-section *,
-    .dashboard-section h2,
-    .dashboard-section h3,
-    .dashboard-section p,
-    .dashboard-section li,
-    .section-lead,
-    .section-subtitle,
-    .phase-card h3,
-    .phase-list li,
-    .scenario-body p,
-    .criteria-card h3,
-    .criteria-card ul,
-    .criteria-card li {
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
-    /* section-title 강제 적용 */
-    .section-title,
-    h2.section-title,
-    .dashboard-section .section-title,
-    .section-header .section-title {
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
-    .domain-title,
-    h2.domain-title,
-    .domain-header .domain-title {
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
-    .hero-title,
-    .hero-subtitle,
-    h1.hero-title,
-    p.hero-subtitle {
-        font-family: "Nanum Gothic", sans-serif !important;
-        font-size: 2rem; !important;
-    }
-    /* hero-title 크기 강제 적용 */
-    .hero-title,
-    h1.hero-title {
-        font-size: 4rem !important;
-    }
-    .phase-chart span,
-    .phase-card .phase-chart span,
-    .phase-grid .phase-chart span {
-        font-family: "Nanum Gothic", sans-serif !important;
-        font-size: 1.2rem !important;
-    }
-    .section-lead, .section-subtitle {
-        font-size: 1.32rem !important;
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
-    .phase-card h3 {
-        font-size: 1.44rem !important;
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
-    .phase-list li {
-        font-size: 1.08rem !important;
-        font-family: "Nanum Gothic", sans-serif !important;
-    }
     </style>
     """)
@@ -3207,7 +3002,7 @@ def create_leaderboard_v2_tab():
                 label="",
                 info=None,
                 container=False,
-                # elem_classes=["model-dropdown"]
             )
         input_component.change(
@@ -3262,8 +3057,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
     palette = [
         {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
         {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
-        {'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
-        {'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
         {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
     ]
@@ -3387,8 +3182,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
         height=800,
         width=900,
         margin=dict(t=30, b=50, l=10, r=10),
-        autosize=True,
-        annotations=[]
     )
     return fig
@@ -3647,8 +3441,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
     model_palette = [
         '#ffd21e',
         '#FF8A3C',
-        '#A16207',
-        '#DC2626',
         '#F8FAFC',
         '#38BDF8',
     ]
@@ -3686,7 +3480,7 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
         paper_bgcolor="#01091A",
         plot_bgcolor="rgba(245, 246, 247, 0.02)",
         height=plot_height,
-        autosize=True,
         margin=dict(t=90, b=80, l=220, r=160),
         legend=dict(
             orientation="h",
@@ -3738,7 +3532,7 @@ def create_empty_level_metric_chart(message):
         paper_bgcolor="#01091A",
         plot_bgcolor="rgba(245, 246, 247, 0.02)",
         height=420,
-        autosize=True,
         margin=dict(t=80, b=60, l=80, r=120),
         title=dict(
             text="<b>Level Metric Breakdown</b>",

         # Clean and prepare data
         df = df.copy()
+        numeric_candidate_cols = [col for col in df.columns if col not in ('Model', 'Vendor')]
         for col in numeric_candidate_cols:
             df[col] = pd.to_numeric(df[col], errors='coerce')
             df['Call Validity'] = df[epr_cols].mean(axis=1)
         # Use LLM Type from CSV directly, with mapping to display names
+        if 'LLM Type' in df.columns:
+            # Clean the LLM Type column to remove any whitespace
+            df['LLM Type'] = df['LLM Type'].astype(str).str.strip()
+            # Map LLM Type to Model Type
+            def map_llm_type(llm_type):
+                if llm_type.upper() == "OSS":
+                    return "Open source"
+                else:
+                    return "Proprietary"
+            df['Model Type'] = df['LLM Type'].apply(map_llm_type)
         else:
+            # Fallback to vendor mapping if LLM Type column doesn't exist
+            vendor_model_type_map = {
+                "OpenAI": "Proprietary",
+                "Anthropic": "Proprietary",
+                "Google": "Proprietary",
+                "Microsoft": "Proprietary",
+                "Mistral": "Proprietary",
+                "Databricks": "Open source",
+                "Meta": "Open source",
+                "Alibaba": "Open source",
+                "알리바바": "Open source",  # Korean name for Alibaba
+                "Kakao": "Open source",
+                "SKT": "Open source",
+                "KT": "Open source",
+                "xAI": "Proprietary",
+            }
+            df['Model Type'] = df['Vendor'].map(vendor_model_type_map).fillna('Proprietary')
         # Round numeric columns for better display
         round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
     # Level metadata for the 7-stage task framework
     level_details = {
         "ALL": {
+            "title": "ALL · 전체 태스크",
+            "description": "7개의 태스크 전반의 평균 성능을 한눈에 살펴보고 각 레벨 비교를 위한 기준점을 제공합니다."
         },
         "L1": {
+            "title": "<span style='color: white;'>L1 · 단일 도구 실행</span>",
+            "description": "<span style='color: white;'>단일 도구 실행 능력과 기본적인 명령 수행 정확도를 평가합니다.</span>"
         },
         "L2": {
+            "title": "<span style='color: white;'>L2 · 도구 선택 능력</span>",
+            "description": "<span style='color: white;'>요구 사항에 맞는 도구를 고르고 적절한 파라미터로 호출하는 능력을 측정합니다.</span>"
         },
         "L3": {
+            "title": "<span style='color: white;'>L3 · 순차적 추론 (Chaining)</span>",
+            "description": "<span style='color: white;'>복수 단계의 순차적 reasoning을 통해 문제를 해결하는 과정을 검증합니다.</span>"
         },
         "L4": {
+            "title": "<span style='color: white;'>L4 · 병렬적 추론 (Aggregation)</span>",
+            "description": "<span style='color: white;'>여러 소스의 정보를 병렬적으로 통합하고 요약하는 능력을 평가합니다.</span>"
         },
         "L5": {
+            "title": "<span style='color: white;'>L5 · 강건성 (Robustness / Fallback)</span>",
+            "description": "<span style='color: white;'>예상치 못한 오류나 실패 상황에 대한 인지와 대응 전략을 확인합니다.</span>"
         },
         "L6": {
+            "title": "<span style='color: white;'>L6 · 효율성 (Efficiency)</span>",
+            "description": "<span style='color: white;'>최소한의 호출과 비용으로 목표를 달성하는 운영 효율을 살펴봅니다.</span>"
         },
         "L7": {
+            "title": "<span style='color: white;'>L7 · 장기 컨텍스트 기억 (Contextual Memory)</span>",
+            "description": "<span style='color: white;'>장기 대화 맥락을 유지하고 적절히 활용하는 능력을 집중적으로 분석합니다.</span>"
         }
     }
     default_level = "ALL"
                 border-collapse: collapse;
                 font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
                 background: var(--bg-card);
+                color: white;
             }
             .v2-styled-table thead {
                 padding: 14px 12px;
                 text-align: left;
                 font-weight: 600;
+                color: white;
                 border-bottom: 2px solid var(--accent-primary);
                 font-size: 13px;
                 text-transform: uppercase;
             .v2-styled-table td {
                 padding: 12px;
                 border-bottom: 1px solid var(--border-subtle);
+                color: white;
                 transition: all 0.2s ease;
             }
             .model-name {
                 font-weight: 500;
+                color: white;
                 transition: color 0.2s ease;
             }
             /* Keep model name color consistent on hover to emphasize row highlight */
             .v2-styled-table tr:hover .model-name {
+                color: white;
             }
             .numeric-cell {
                 font-family: 'Geist Mono', monospace;
                 font-size: 13px;
                 text-align: center;
+                color: white;
             }
             .highlight-header {
                 background: rgba(255, 210, 30, 0.14);
+                color: white;
             }
             .highlight-cell {
                 background: rgba(255, 210, 30, 0.08);
+                color: white;
                 font-weight: 600;
             }
         </style>
         return f"""
         <div class="domain-selector-container leaderboard-intro">
             <div class="domain-header">
+                <h2 class="domain-title" style="color: white;">Agent Leaderboard · {level_title}</h2>
+                <p class="domain-subtitle" style="color: white;">{level_description}</p>
             </div>
             <div class="dataframe-container">
         """
     # Load initial data
     initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
     initial_df = load_leaderboard_data()  # Load raw data for model selector
     initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
     initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
     initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
     # Header styles and navigation
     gr.HTML("""
     <style>
     /* Enhanced button styling with better gradio compatibility */
     .header-action-button {
         display: inline-block !important;
         padding: 14px 28px !important;
         background: #ffd21e !important;
+        color: #FFFFFF !important;
         text-decoration: none !important;
         border-radius: 16px !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         transform: translateY(-3px) !important;
         box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
         background: #ffd21e !important;
+        color: #FFFFFF !important;
         text-decoration: none !important;
         text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
     }
         filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
     }
     #hero-banner {
+        max-width: 960px;
+        margin: 0 auto 20px auto;
+        border-radius: 16px;
+        overflow: hidden;
+        box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25);
     }
     #hero-banner img {
+        width: 100%;
+        height: auto;
+        display: block;
     }
     .hero-title {
+        font-size: 5rem;
         font-weight: 800;
         line-height: 1.1;
         background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
         -webkit-background-clip: text;
         -webkit-text-fill-color: transparent;
         margin-bottom: 1rem;
     }
     .hero-subtitle {
         color: var(--text-secondary);
+        font-size: 1.25rem;
+        font-family: 'Geist', sans-serif;
         margin-top: 0;
     }
         box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
         backdrop-filter: blur(12px);
         -webkit-backdrop-filter: blur(12px);
     }
     .dashboard-section.emphasized {
     }
     .section-title {
+        font-size: 2.2rem;
         font-weight: 700;
         color: var(--text-primary);
         margin-bottom: 12px;
         text-align: center !important;
     }
     .section-lead, .section-subtitle {
+        font-size: 1.1rem;
         color: var(--text-secondary);
         max-width: 720px;
         margin: 0 auto 24px auto;
         word-break: keep-all;
         white-space: normal;
         display: block;
     }
     .phase-grid {
     }
     .phase-card h3 {
+        font-size: 1.5rem;
         color: var(--text-primary);
         margin-bottom: 20px;
         font-weight: 700;
     }
     .phase-chart {
     .phase-chart span {
         position: relative;
+        font-size: 1.5rem;
         font-weight: 700;
+        color: var(--text-primary);
     }
     .phase-list {
         list-style: none;
         padding: 0;
         background: rgba(245, 246, 247, 0.05);
         border: 1px solid rgba(245, 246, 247, 0.08);
         color: var(--text-secondary);
+        font-size: 0.95rem;
     }
     .scenario-body {
     /* Responsive design */
     @media (max-width: 768px) {
         .hero-title {
+            font-size: 3rem;
         }
         .hero-action-button {
             width: 100% !important;
             gap: 8px;
         }
         .section-title {
+            font-size: 1.8rem;
         }
         .phase-chart {
             width: 100px;
     </style>
     """)
     gr.Image(
+        value="banner.png",
         show_label=False,
         interactive=False,
         type="filepath",
         elem_id="hero-banner"
     )
     gr.HTML("""
     <div style="text-align: center; padding: 20px 0;">
     gr.HTML("""
     <div class="hero-actions">
         <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
                 <line x1="8" y1="12" x2="16" y2="12"/>
             </svg>
+            <span>Blog</span>
         </a>
         <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M9 19c-5 1.5-5-2.5-7-3"/>
                 <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
             </svg>
             <span>GitHub</span>
         </a>
         <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
                 <polyline points="7 10 12 15 17 10"/>
                 <line x1="12" y1="15" x2="12" y2="3"/>
             </svg>
+            <span>Dataset</span>
         </a>
+        <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
                 <path d="M3 3v18h18"/>
                 <path d="M7 17v-6"/>
                 <path d="M12 17V7"/>
                 <path d="M17 17v-3"/>
             </svg>
+            <span>Metrics</span>
         </a>
     </div>
     """)
     gr.HTML("""
     <div class="dashboard-section">
         <div class="section-header">
+            <h2 class="section-title">단계별 태스크 설계</h2>
         </div>
         <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">단순 도구 호출부터 장기적 맥락 능력, 강건성 처리 능력까지 에이전트의 능력을 7단계로 입체적으로 분석하였습니다.</p>
         <div class="phase-grid">
             <div class="phase-card">
+                <h3>Single-Turn</h3>
                 <div class="phase-chart" style="--progress:80%;">
+                    <span>80%</span>
                 </div>
                 <ul class="phase-list">
+                    <li style="color: white;">L1: 단일 도구 실행</li>
+                    <li style="color: white;">L2: 도구 선택 능력</li>
+                    <li style="color: white;">L3: 순차적 reasoning (Chaining)</li>
+                    <li style="color: white;">L4: 병렬적 reasoning (Aggregation)</li>
+                    <li style="color: white;">L5: 강건성 (Robustness / Fallback)</li>
                 </ul>
             </div>
             <div class="phase-card">
+                <h3>Multi-Turn</h3>
                 <div class="phase-chart" style="--progress:20%;">
+                    <span>20%</span>
                 </div>
                 <ul class="phase-list">
+                    <li style="color: white;">L6: 효율성 (Efficiency)</li>
+                    <li style="color: white;">L7: 장기 컨텍스트 기억 (Contextual Memory)</li>
                 </ul>
             </div>
         </div>
     gr.HTML("""
     <div class="dashboard-section emphasized">
         <div class="section-header">
+            <h2 class="section-title">18가지 한국형 API 사용 및 실생활 환경에 특화된 고품질 시나리오 구성</h2>
         </div>
         <div class="scenario-body">
+            <p>네이버, 지도, 카카오, 웹사이트 등 한국 실사용 환경 기반의 API를 기반으로 국내 사용자의 일상과 밀접한 '약속 예약', '블로그 후기 검색'과 같은 현실적인 문제 해결 시나리오를 구현했습니다.</p>
         </div>
         <div class="section-flow">⌄</div>
+    </div>
     """)
     # Section 3: 핵심 평가 기준
     gr.HTML("""
     <div class="dashboard-section">
         <div class="section-header">
+            <h2 class="section-title">핵심 평가 기준</h2>
         </div>
         <div class="criteria-grid">
             <div class="criteria-card">
     # Domain filter section with enhanced styling
     gr.HTML("""
     <style>
     /* Enhanced domain selector styling */
     .domain-selector-container {
         background: #ffd21e0d;
         -webkit-background-clip: text;
         background-clip: text;
         -webkit-text-fill-color: transparent;
+        text-shadow: 0 0 22px rgba(255, 210, 30, 0.65), 0 0 45px rgba(255, 210, 30, 0.4);
+        filter: drop-shadow(0 0 16px rgba(255, 210, 30, 0.35));
         letter-spacing: 0.02em;
+        animation: title-shimmer 5s ease-in-out infinite;
     }
     @keyframes title-shimmer {
     .model-dropdown select,
     .model-dropdown [role="combobox"] {
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
         border-radius: 999px !important;
         padding: 12px 24px !important;
         color: var(--text-primary) !important;
         gap: 8px !important;
         width: 100% !important;
         padding: 12px 24px !important;
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
         border-radius: 999px !important;
         color: var(--text-primary) !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         background: #ffd21e !important;
         border: 1px solid rgba(255, 210, 30, 0.6) !important;
         border-radius: 999px !important;
+        color: #FFFFFF !important;
         font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
         font-weight: 600 !important;
         font-size: 0.95rem !important;
         font-size: 1.5rem;
         margin-bottom: 4px;
         display: block;
+        filter: drop-shadow(0 0 10px currentColor);
     }
     .domain-name {
         top: 8px;
         right: 8px;
         background: var(--accent-primary);
+        color: white;
         font-size: 0.75rem;
         padding: 2px 8px;
         border-radius: 12px;
         padding: 12px 20px !important;
         font-size: 0.95rem !important;
     }
     </style>
     """)
     level_options = list(level_details.keys())
+    with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🧠 Select Task Level</h2>
+            <p class="domain-subtitle" style="color: white;">Ko-AgentBench의 ALL · L1~L7 단계별 에이전트 성능을 손쉽게 비교하세요.</p>
+        </div>
+        """)
+        domain_filter = gr.Radio(
+            choices=level_options,
+            value=default_level,
+            label="",
+            interactive=True,
+            container=False,
+            elem_classes=["domain-radio"]
+        )
+    # Filter controls with domain styling
+    with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🔍 Filters & Sorting</h2>
+            <p class="domain-subtitle" style="color: white;">모델 접근 방식과 정렬 순서를 선택해 맞춤 뷰를 구성하세요.</p>
+        </div>
+        """)
+        with gr.Row(elem_classes=["filters-sorting-row"]):
+            with gr.Column(scale=1, elem_classes=["filter-group"]):
+                with gr.Row(elem_classes=["filter-group-row"]):
+                    gr.HTML("<span class='filter-group-label' style='color: white;'>Model Access</span>")
+                    model_type_filter = gr.Radio(
+                        choices=["All", "OSS", "API"],
+                        value="All",
+                        label="",
+                        elem_classes=["domain-radio"],
+                        container=False
+                    )
+            with gr.Column(scale=1, elem_classes=["filter-group"]):
+                with gr.Row(elem_classes=["filter-group-row"]):
+                    gr.HTML("<span class='filter-group-label' style='color: white;'>Sort Order</span>")
+                    sort_order = gr.Radio(
+                        choices=["Descending", "Ascending"],
+                        value="Descending",
+                        label="",
+                        elem_classes=["domain-radio"],
+                        container=False
+                    )
+    # Main leaderboard table with dynamic title
+    leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
     leaderboard_table = gr.HTML(initial_table)
+    gr.HTML("""
+        </div>
+    </div>""")
     # Radar Chart Section
     gr.HTML("""
     <div class="domain-selector-container domain-performance-container">
         <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
+            <p class="domain-subtitle" style="color: white;">Track six essential pillars: Success, Execution, Reasoning, Robustness, Efficiency, and Call Validity.</p>
         </div>
     """)
+    with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🎯 Select Models for Comparison</h2>
+            <p class="domain-subtitle" style="color: white;">Choose up to 5 models to map on the capability radar.</p>
+        </div>
+        """)
+        model_selector = gr.Dropdown(
+            choices=initial_df['Model'].tolist()[:10],
+            value=initial_df['Model'].tolist()[:5],
+            multiselect=True,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["model-dropdown"]
+        )
     # Radar chart plot - wrapped in centered container
     gr.HTML('<div class="chart-container radar-chart-container">')
     gr.HTML("</div>")
+    # Level metric breakdown section
+    gr.HTML("""
+    <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Level-Specific Metric Spotlight</h2>
+            <p class="domain-subtitle" style="color: white;">Dive deeper into each Ko-AgentBench stage and compare model scores across its unique evaluation metrics.</p>
+        </div>
+    """)
+    with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🧭 Select Task Level and Models</h2>
+            <p class="domain-subtitle" style="color: white;">Choose a level and up to 5 models to explore their detailed SR-driven metrics.</p>
+        </div>
+        """)
+        level_metric_selector = gr.Dropdown(
+            choices=level_ids,
+            value=level_ids[0] if level_ids else None,
+            multiselect=False,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["level-dropdown"]
+        )
+        level_model_selector = gr.Dropdown(
+            choices=initial_level_model_choices,
+            value=initial_level_model_values,
+            multiselect=True,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["model-dropdown", "level-model-dropdown"]
+        )
+    gr.HTML('<div class="chart-container level-metric-chart-container">')
+    level_metric_chart = gr.Plot(
+        label="",
+        value=initial_level_metric_chart,
+        elem_classes=["level-metric-plot", "plot-container"]
+    )
+    gr.HTML("""
+        </div>
+    </div>
+    """)
+    # Heatmap section
+    gr.HTML("""
+    <div class="domain-selector-container domain-performance-container heatmap-wrapper">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
+            <p class="domain-subtitle" style="color: white;">View Ko-AgentBench SR scores across L1~L7 for each model in a single glance.</p>
+        </div>
+        <div class="chart-container heatmap-chart-container">
+    """)
+    heatmap_chart = gr.Plot(
+        label="",
+        value=initial_heatmap,
+        elem_classes=["heatmap-plot", "plot-container"]
+    )
+    gr.HTML("""
+        </div>
+    </div>
+    """)
+    # Update functions
+    def get_optimal_sort_order(sort_by_value):
+        """Return the optimal sort order for a given metric"""
+        # Metrics where higher is better (descending)
+        descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
+        # Metrics where lower is better (ascending)
+        ascending_metrics = []
+        if sort_by_value in descending_metrics:
+            return "Descending"
+        elif sort_by_value in ascending_metrics:
+            return "Ascending"
+        else:
+            return "Descending"  # Default fallback
+    def update_table(level_filter, model_type_filter, sort_order):
+        title_html = update_leaderboard_title(level_filter)
+        sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
+        table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
+        return title_html, table_html
+    def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        # Get filtered dataframe
         df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        # Update model selector choices based on filtered data
+        available_models_all = filtered_df['Model'].tolist()
+        available_models = available_models_all[:15]  # Top 15 from filtered results
+        # If selected models are not in available models, reset to top 5
+        if selected_models:
+            valid_selected = [m for m in selected_models if m in available_models]
+            if not valid_selected:
+                valid_selected = available_models[:5]
+        else:
+            valid_selected = available_models[:5]
+        # Create radar chart
+        chart = create_domain_radar_chart(filtered_df, valid_selected)
+        # Prepare heatmap order prioritizing selected models
+        heatmap_order = []
+        for model in valid_selected:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        for model in available_models_all:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        heatmap_order = heatmap_order[:12]
+        heatmap_fig = create_performance_heatmap(filtered_df, heatmap_order)
+        # Level metric chart
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        available_level_models = available_models_all
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_level_models[:5]
+        else:
+            valid_level_models = available_level_models[:5]
+        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        return (
+            gr.Dropdown(
+                choices=available_models,
+                value=valid_selected,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown"]
+            ),
+            chart,
+            heatmap_fig,
+            gr.Dropdown(
+                choices=available_level_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_metric_fig,
+        )
+    def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        # Get filtered dataframe
+        df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        available_models_all = filtered_df['Model'].tolist()
+        if selected_models:
+            valid_selected = [m for m in selected_models if m in available_models_all]
+            if not valid_selected:
+                valid_selected = available_models_all[:5]
+        else:
+            valid_selected = available_models_all[:5]
+        heatmap_order = []
+        for model in valid_selected:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        for model in available_models_all:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        heatmap_order = heatmap_order[:12]
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        available_level_models = available_models_all
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_level_models[:5]
+        else:
+            valid_level_models = available_level_models[:5]
+        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        return (
+            create_domain_radar_chart(filtered_df, valid_selected),
+            create_performance_heatmap(filtered_df, heatmap_order),
+            gr.Dropdown(
+                choices=available_level_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_metric_fig,
+        )
+    def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        available_models = filtered_df['Model'].tolist()
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_models[:5]
+        else:
+            valid_level_models = available_models[:5]
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        return (
+            gr.Dropdown(
+                choices=available_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_chart,
+        )
+    # Update table when filters change
+    filter_inputs = [domain_filter, model_type_filter, sort_order]
+    for input_component in filter_inputs:
+        input_component.change(
+            fn=update_table,
+            inputs=filter_inputs,
+            outputs=[leaderboard_title, leaderboard_table]
+        )
+        # Also update radar chart when filters change
+        input_component.change(
+            fn=update_radar_chart,
+            inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+            outputs=[model_selector, radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
+        )
+    # Update radar chart when model selection changes
+    model_selector.change(
+        fn=update_radar_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
+    )
+    level_metric_selector.change(
+        fn=update_level_metric_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[level_model_selector, level_metric_chart]
+    )
+    level_model_selector.change(
+        fn=update_level_metric_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[level_model_selector, level_metric_chart]
+    )
+    # Define generate_performance_card function before using it
+    def generate_performance_card(model_name):
+        """Generate HTML for the model performance card"""
+        if not model_name:
+            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                Please select a model to generate its performance card
+            </div>"""
+        # Get model data
+        df = load_leaderboard_data()
+        model_data = df[df['Model'] == model_name]
+        if model_data.empty:
+            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                Model not found in the database
+            </div>"""
+        row = model_data.iloc[0]
+        # Get overall rank based on overall success
+        df_with_success = df.copy()
+        df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
         df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
         df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
         try:
     gr.HTML("""
     <div class="domain-selector-container performance-card-container">
         <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
+            <p class="domain-subtitle" style="color: white;">Comprehensive performance card for any model - perfect for presentations and reports</p>
         </div>
         <div class="performance-card-content">
     """)
     with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
         gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🤖 Select Model</h2>
+            <p class="domain-subtitle" style="color: white;">비교할 모델을 선택하세요.</p>
+        </div>
         """)
         card_model_selector = gr.Dropdown(
             choices=initial_df['Model'].tolist(),
             label="",
             info=None,
             container=False,
+            elem_classes=["model-dropdown"]
         )
         download_card_btn = gr.Button(
+            "Download Card as PNG",
             elem_id="download-card-btn",
             elem_classes=["pill-button"]
         )
         </div>
     </div>
     """)
     # Add custom CSS for the performance card
     gr.HTML("""
     .level-dropdown select,
     .level-dropdown [role="combobox"],
     .level-dropdown button {
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
         border-radius: 999px !important;
         padding: 12px 20px !important;
         color: var(--text-primary) !important;
         text-align: center !important;
         min-height: 46px !important;
         transition: all 0.3s ease !important;
+        box-shadow: 0 10px 24px rgba(255, 210, 30, 0.15) !important;
     }
     .level-dropdown select:hover,
         margin: 12px auto 0 !important;
     }
     .radar-placeholder {
         display: flex;
         flex-direction: column;
         }
     }
     </style>
     """)
                 label="",
                 info=None,
                 container=False,
+                elem_classes=["model-dropdown"]
             )
         input_component.change(
     palette = [
         {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
         {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
+        {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
+        {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
         {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
     ]
         height=800,
         width=900,
         margin=dict(t=30, b=50, l=10, r=10),
+        autosize=True
     )
     return fig
     model_palette = [
         '#ffd21e',
         '#FF8A3C',
+        '#F970B9',
+        '#8B5CF6',
         '#F8FAFC',
         '#38BDF8',
     ]
         paper_bgcolor="#01091A",
         plot_bgcolor="rgba(245, 246, 247, 0.02)",
         height=plot_height,
+        width=1450,
         margin=dict(t=90, b=80, l=220, r=160),
         legend=dict(
             orientation="h",
         paper_bgcolor="#01091A",
         plot_bgcolor="rgba(245, 246, 247, 0.02)",
         height=420,
+        width=1450,
         margin=dict(t=80, b=60, l=80, r=120),
         title=dict(
             text="<b>Level Metric Breakdown</b>",

tabs/leaderboard_v1_en.py DELETED Viewed

The diff for this file is too large to render. See raw diff

utils.py CHANGED Viewed

@@ -9,8 +9,8 @@ def get_chart_colors():
     #         "grid": (1, 1, 1, 0.1),  # RGBA tuple for grid
     #     }
     return {
-        "Private": "#593B1D",  # rich brown for API
-        "Open source": "#FACC15",  # warm amber for OSS
         "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
         "text": "#111827",
         "background": "#FFFFFF",
@@ -20,12 +20,10 @@ def get_chart_colors():
 def get_rank_badge(rank):
     """Generate HTML for rank badge with appropriate styling"""
-    tag_background = "#593B1D"
-    tag_text_color = "#FFFFFF"
     badge_styles = {
-        1: ("1st", tag_background, tag_text_color),
-        2: ("2nd", tag_background, tag_text_color),
-        3: ("3rd", tag_background, tag_text_color),
     }
     if rank in badge_styles:
@@ -65,25 +63,24 @@ def get_type_badge(model_type):
     """Generate HTML for model type badge"""
     colors = get_chart_colors()
     color_map = {
-        "Open source": colors.get("Open source", "#FACC15"),
-        "Proprietary": colors.get("Private", "#593B1D"),
-        "Private": colors.get("Private", "#593B1D"),
     }
     label_map = {
         "Open source": "OSS",
         "Proprietary": "API",
         "Private": "API",
     }
-    bg_color = color_map.get(model_type, "#593B1D")
     display_label = label_map.get(model_type, model_type)
-    text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
     return f"""
         <div style="
             display: inline-flex;
             align-items: center;
             padding: 4px 8px;
             background: {bg_color};
-            color: {text_color};
             border-radius: 4px;
             font-size: 0.85em;
             font-weight: 500;

     #         "grid": (1, 1, 1, 0.1),  # RGBA tuple for grid
     #     }
     return {
+        "Private": "#3F78FA",  # accent-blue light
+        "Open source": "#A13AE2",  # accent-purple light
         "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
         "text": "#111827",
         "background": "#FFFFFF",
 def get_rank_badge(rank):
     """Generate HTML for rank badge with appropriate styling"""
     badge_styles = {
+        1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
+        2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
+        3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
     }
     if rank in badge_styles:
     """Generate HTML for model type badge"""
     colors = get_chart_colors()
     color_map = {
+        "Open source": colors.get("Open source", "#A13AE2"),
+        "Proprietary": colors.get("Private", "#3F78FA"),
+        "Private": colors.get("Private", "#3F78FA"),
     }
     label_map = {
         "Open source": "OSS",
         "Proprietary": "API",
         "Private": "API",
     }
+    bg_color = color_map.get(model_type, "#4F46E5")
     display_label = label_map.get(model_type, model_type)
     return f"""
         <div style="
             display: inline-flex;
             align-items: center;
             padding: 4px 8px;
             background: {bg_color};
+            color: white;
             border-radius: 4px;
             font-size: 0.85em;
             font-weight: 500;