.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -2,10 +2,9 @@ import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
  import gradio as gr
5
- from tabs.leaderboard_v1_kr import create_leaderboard_v2_interface as leaderboard_kr
6
- from tabs.leaderboard_v1_en import create_leaderboard_v2_interface as leaderboard_en
7
 
8
- # 다크 모드 텍스트 색상 보정
9
  FIX_DARK_TEXT_CSS = """
10
  html.dark .gr-prose,
11
  html.dark .gr-prose p,
@@ -18,36 +17,14 @@ html.dark .gr-markdown * {
18
  """
19
 
20
  def create_app():
21
- theme = gr.themes.Default()
 
22
 
23
  with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
24
- # 🔹 왼쪽 상단에 언어 전환 버튼
25
- with gr.Row():
26
- lang_btn = gr.Button("🌍 English", scale=0, elem_id="lang-toggle-btn")
27
-
28
- # 🔹 기본은 한국어 UI
29
- with gr.Column(visible=True) as kr_view:
30
- leaderboard_kr()
31
-
32
- # 🔹 영어 UI는 숨김
33
- with gr.Column(visible=False) as en_view:
34
- leaderboard_en()
35
-
36
- # 🔹 버튼 클릭 시 토글
37
- def toggle_language(current_label):
38
- if "English" in current_label:
39
- return "🇰🇷 Korean", gr.update(visible=False), gr.update(visible=True)
40
- else:
41
- return "🌍 English", gr.update(visible=True), gr.update(visible=False)
42
-
43
- lang_btn.click(
44
- toggle_language,
45
- inputs=[lang_btn],
46
- outputs=[lang_btn, kr_view, en_view],
47
- )
48
-
49
  return app
50
 
51
-
52
  demo = create_app()
53
- demo.launch(ssr_mode=False)
 
 
 
2
  warnings.filterwarnings("ignore")
3
 
4
  import gradio as gr
5
+ from tabs.leaderboard_v1 import create_leaderboard_v2_interface
 
6
 
7
+ # 다크 모드에서 프로즈/마크다운 텍스트를 확실히 밝게 고정하는 CSS 보정
8
  FIX_DARK_TEXT_CSS = """
9
  html.dark .gr-prose,
10
  html.dark .gr-prose p,
 
17
  """
18
 
19
  def create_app():
20
+ # 권장: 명시적인 테마 객체 사용 (Default, Soft, Origin 등)
21
+ theme = gr.themes.Default() # 필요 시 gr.themes.Origin() 등으로 변경
22
 
23
  with gr.Blocks(theme=theme, css=FIX_DARK_TEXT_CSS) as app:
24
+ create_leaderboard_v2_interface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  return app
26
 
 
27
  demo = create_app()
28
+
29
+ # Spaces/Gradio5에서 SSR이 꼬이면 일단 꺼서 확인
30
+ demo.launch(ssr_mode=False)
banner_wide.png DELETED

Git LFS Details

  • SHA256: 66fa5541384dde4eac497d3aa9fbcfeccbb44cc7aa1e251acb200adbddf914a1
  • Pointer size: 131 Bytes
  • Size of remote file: 347 kB
combined_evaluation_summary.csv CHANGED
@@ -1,16 +1,7 @@
1
- Model,Vendor,Model Type,L1_Total_Tasks,L2_Total_Tasks,L3_Total_Tasks,L4_Total_Tasks,L5_Total_Tasks,L6_Total_Tasks,L7_Total_Tasks,L1_Evaluated_Tasks,L2_Evaluated_Tasks,L3_Evaluated_Tasks,L4_Evaluated_Tasks,L5_Evaluated_Tasks,L6_Evaluated_Tasks,L7_Evaluated_Tasks,L1_Avg_Exec_Time,L2_Avg_Exec_Time,L3_Avg_Exec_Time,L4_Avg_Exec_Time,L5_Avg_Exec_Time,L6_Avg_Exec_Time,L7_Avg_Exec_Time,L1_Avg_Tokens,L2_Avg_Tokens,L3_Avg_Tokens,L4_Avg_Tokens,L5_Avg_Tokens,L6_Avg_Tokens,L7_Avg_Tokens,L1_Avg_TPS,L2_Avg_TPS,L3_Avg_TPS,L4_Avg_TPS,L5_Avg_TPS,L6_Avg_TPS,L7_Avg_TPS,L1_Avg_TTFT,L2_Avg_TTFT,L3_Avg_TTFT,L4_Avg_TTFT,L5_Avg_TTFT,L6_Avg_TTFT,L7_Avg_TTFT,L1_RRR,L2_RRR,L3_RRR,L4_RRR,L5_RRR,L6_RRR,L7_RRR,L1_SR,L2_SR,L3_SR,L4_SR,L5_SR,L6_SR,L7_SR,L1_EPR_CVR,L2_EPR_CVR,L3_EPR_CVR,L4_EPR_CVR,L5_EPR_CVR,L6_EPR_CVR,L7_EPR_CVR,L1_pass@k,L2_pass@k,L3_pass@k,L4_pass@k,L5_pass@k,L6_pass@k,L7_pass@k,L1_TooAcc,L1_ArgAcc,L1_CallEM,L1_RespOK,L2_SelectAcc,L3_FSM,L3_PSM,L3_ΔSteps_norm,L4_Coverage,L4_SourceEPR,L5_AdaptiveRoutingScore,L5_FallbackSR,L6_RedundantCallRate,L6_EffScore,L7_ContextRetention,L7_RefRecall
2
- nova-2-lite,Amazon,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,8.93,5.72,9.82,18.44,9.28,1.54,2.34,3327.64,5633.33,16431.2,23542.0,8650.3,2454.87,3767.7,372.65,984.32,1672.39,1276.67,932.02,1590.31,1612.51,4.2313,2.2447,2.0483,4.5429,1.8541,1.5429,2.3359,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.9,0.55,0.8,0.9,1.0,1.0,0.9417,1.0,0.2542,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7955,0.4545,1.0,1.0,0.5,1.0,0.45,0.65,0.65,0.2625,0.65,1.0,0.0,0.95,0.85
3
- gpt-4o,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.42,7.28,12.74,19.25,8.37,4.45,3.68,3302.0,2871.0,11588.0,16022.6,3909.25,1564.2,4044.4,609.02,394.63,909.92,832.37,467.04,351.32,1098.86,1.5767,3.2437,3.3023,5.9534,1.5256,4.452,2.9725,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.8667,0.5,1.0,0.8667,1.0,1.0,0.1833,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5909,0.2727,1.0,1.0,0.4,1.0,0.35,0.7167,0.7167,0.2583,0.6,1.0,0.0,0.95,0.95
4
- DeepSeek-V3.1,DeepSeek,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,3.53,10.56,18.4,28.31,13.21,7.63,3.25,1469.73,10547.33,23309.7,42090.4,9212.0,4614.6,4392.9,416.14,998.63,1266.84,1486.94,697.45,604.79,1351.85,1.8044,3.7647,4.442,6.5445,2.0181,5.3715,2.6493,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9333,0.9,1.0,0.65,0.8667,0.5,0.8182,0.8667,0.74,0.775,0.2117,0.2,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8182,0.5455,0.2727,1.0,0.8667,0.3,0.8,0.3,0.6667,0.575,0.2133,0.7,1.0,0.1667,1.0,0.975
5
- gemini-2.5-flash,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,4.3,4.88,7.75,11.4,6.07,2.63,4.14,1733.73,4466.67,3951.7,12934.7,2054.0,3562.87,4716.3,402.93,914.6,509.6,1135.09,338.22,1353.71,1137.94,1.975,1.9409,3.4497,3.5025,1.7375,1.8039,2.5157,1.0,1.0,1.0,1.0,1.0,1.0,0.7,0.9091,1.0,0.7,0.6,0.25,0.7333,0.3,0.9091,1.0,0.6,0.8,0.1,0.3333,0.3,1.0,1.0,1.0,1.0,1.0,1.0,0.7,0.9091,0.6136,0.1818,1.0,1.0,0.5,0.55,0.35,0.35,0.35,0.1,0.2,1.0,0.3333,0.9,0.875
6
- glm-4.6v,Z.ai,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,10.08,15.04,27.21,34.65,37.16,11.9,8.41,2465.09,8454.93,15996.0,35309.7,16376.0,2716.53,4826.2,244.61,562.21,587.98,1019.06,440.71,228.19,574.12,4.8704,5.7804,7.0628,8.554,5.1927,10.9267,6.6557,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,1.0,1.0,1.0,0.65,0.9333,0.6,1.0,1.0,1.0,1.0,0.3083,0.0667,0.3,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,0.6818,0.3636,1.0,1.0,0.6,0.9667,0.6,0.5667,0.5667,0.2601,0.75,1.0,0.0667,0.9,0.8
7
- grok-4.1-fast,xAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,11.99,17.15,27.66,44.02,39.33,12.44,17.68,3710.64,6381.8,15081.5,13744.4,16053.9,2901.0,5535.2,309.6,372.15,545.25,312.25,408.21,233.21,313.08,5.7846,6.5545,8.836,12.3512,6.6412,11.301,14.0295,1.0,1.0,1.0,0.8,1.0,1.0,1.0,0.9091,1.0,1.0,0.8,0.8,0.9333,0.8,1.0,1.0,1.0,0.8,0.3797,0.0667,0.4,1.0,1.0,1.0,0.8,1.0,1.0,1.0,1.0,0.75,0.5455,1.0,1.0,0.6,1.0,0.5667,0.5667,0.5667,0.3475,0.95,1.0,0.0667,0.975,0.85
8
- claude-haiku-4-5,Anthropic,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.18,9.9,14.65,21.61,18.33,3.69,4.22,4504.64,11367.93,23333.9,42628.5,13977.65,2732.53,7153.3,869.59,1148.23,1593.07,1972.65,762.46,741.38,1697.01,2.4328,3.2797,4.1784,5.2912,2.2585,3.6851,3.3065,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,1.0,1.0,0.9,0.65,0.8,0.7,1.0,1.0,1.0,1.0,0.2358,0.0,0.3,1.0,1.0,1.0,1.0,0.95,1.0,1.0,1.0,0.6136,0.2727,1.0,1.0,0.6,1.0,0.5,0.75,0.7389,0.2283,0.75,1.0,0.0,1.0,0.925
9
- gemini-2.5-flash-lite,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,1.62,2.83,1.55,5.72,3.74,1.66,2.97,1930.09,3337.87,5892.0,15236.2,1795.9,1572.73,2577.8,1188.63,1179.12,3797.73,2664.96,480.67,944.86,868.65,0.6444,0.9106,0.6729,1.1369,0.5226,0.7943,0.6945,1.0,1.0,1.0,1.0,0.9,1.0,0.4,1.0,0.8667,0.2,0.7,0.25,0.6,0.4,1.0,0.8667,0.275,0.6,0.1167,0.2,0.2,1.0,1.0,1.0,1.0,0.9,1.0,0.4,1.0,0.6364,0.2727,1.0,0.8667,0.1,0.2,0.1,0.35,0.35,0.125,0.25,1.0,0.1333,0.975,0.825
10
- claude-sonnet-4-5,Anthropic,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,6.77,11.69,19.86,34.08,19.1,5.45,7.18,3215.09,5874.0,19958.4,60071.8,10702.45,2710.47,10297.8,474.96,502.51,1004.85,1762.73,560.27,497.52,1434.99,3.1551,5.243,5.9522,8.9693,3.4574,5.4468,4.6806,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9,0.55,0.8,0.6,1.0,1.0,1.0,1.0,0.1742,0.0,0.4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6591,0.2727,1.0,1.0,0.9,1.0,0.75,0.75,0.75,0.1892,0.6,1.0,0.0,1.0,0.975
11
- gpt-4o-mini,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,2.79,5.61,8.13,25.46,7.19,2.63,2.9,1389.55,4236.13,11772.4,11700.1,5203.7,1561.93,3940.3,498.7,755.34,1448.9,459.62,724.0,594.06,1357.18,1.2394,1.9904,2.5526,9.1994,0.9279,2.6286,2.1975,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,1.0,1.0,1.0,0.6,0.6667,0.5,1.0,0.8667,1.0,1.0,0.1946,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6591,0.2727,1.0,1.0,0.5,0.9167,0.5,0.5833,0.5833,0.2171,0.75,1.0,0.0,0.925,0.975
12
- gpt-5,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,5.64,11.23,14.48,24.59,19.64,9.31,10.15,2306.18,16867.2,19321.9,29718.7,10773.2,6753.07,9451.3,409.06,1501.34,1334.6,1208.62,548.57,725.02,931.01,2.4414,3.442,5.8573,7.5822,3.1615,5.978,5.431,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,0.9333,1.0,0.9,0.85,0.8667,0.8,1.0,1.0,0.7,0.7,0.2728,0.2,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7273,0.3636,1.0,1.0,0.1,0.5667,0.4,0.55,0.5333,0.3,0.85,1.0,0.1444,1.0,0.975
13
- qwen3-next-80b-a3b,Alibaba,OSS,11,15,10,10,20,15,10,11,15,10,10,20,15,10,4.13,12.63,17.18,28.84,10.59,9.59,7.92,1937.82,4725.0,15345.8,22067.0,6512.1,2198.27,5761.5,469.0,374.15,893.49,765.08,615.2,229.2,727.4,1.907,5.8972,5.5666,10.0412,1.985,9.5896,5.561,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6,0.9333,0.7,1.0,1.0,1.0,1.0,0.2375,0.0,0.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7727,0.4545,1.0,1.0,0.8,1.0,0.65,0.7,0.7,0.2542,0.7,1.0,0.0,0.975,0.95
14
- gpt-5-mini,OpenAI,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,7.14,7.36,12.37,13.11,11.67,7.22,8.02,2963.73,4288.47,9704.4,8528.4,3510.45,2465.07,5810.8,414.91,582.29,784.64,650.71,300.9,341.21,724.39,3.4248,3.2995,5.2383,6.41,2.7195,6.5991,6.5065,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,0.9333,0.9,0.8,0.2,0.8667,1.0,1.0,0.8667,0.6,0.6,0.0917,0.0667,0.3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7045,0.3636,1.0,1.0,0.3,0.55,0.25,0.3667,0.3667,0.0917,0.2,1.0,0.0667,1.0,0.95
15
- nova-lite,Amazon,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,3.29,7.72,12.08,18.88,11.81,5.05,3.2,2760.64,7563.27,17904.5,43855.6,12621.5,23029.87,6711.7,839.35,979.15,1482.74,2323.41,1068.7,4562.8,2094.59,1.4877,2.958,2.4853,4.0705,1.4959,2.0742,2.2498,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9091,1.0,0.5,0.9,0.3,0.8,0.4,1.0,1.0,1.0,1.0,0.1373,0.4667,0.4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5909,0.1818,1.0,1.0,0.5,0.85,0.45,0.5667,0.5667,0.1376,0.6,0.3,0.3133,0.725,0.675
16
- gemini-2.5-pro,Google,API,11,15,10,10,20,15,10,11,15,10,10,20,15,10,10.88,11.9,23.24,19.5,23.03,7.52,9.7,2524.45,4880.93,3022.7,15671.5,4011.9,5005.8,9071.0,232.11,410.31,130.06,803.81,174.17,665.86,935.55,5.2265,5.6138,9.9988,8.3578,5.6094,4.8197,5.9149,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.9333,0.1,0.6,0.3,0.7333,0.5,1.0,1.0,0.5,0.7,0.125,0.4,0.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.7045,0.3636,1.0,1.0,0.0,0.2667,0.2,0.4667,0.4667,0.125,0.3,1.0,0.1333,0.875,0.85
 
1
+ Model,Vendor,Model Type,L1_Total_Tasks,L2_Total_Tasks,L3_Total_Tasks,L4_Total_Tasks,L5_Total_Tasks,L6_Total_Tasks,L7_Total_Tasks,L1_Evaluated_Tasks,L2_Evaluated_Tasks,L3_Evaluated_Tasks,L4_Evaluated_Tasks,L5_Evaluated_Tasks,L6_Evaluated_Tasks,L7_Evaluated_Tasks,L1_Avg_Exec_Time,L2_Avg_Exec_Time,L3_Avg_Exec_Time,L4_Avg_Exec_Time,L5_Avg_Exec_Time,L6_Avg_Exec_Time,L7_Avg_Exec_Time,L1_Avg_Tokens,L2_Avg_Tokens,L3_Avg_Tokens,L4_Avg_Tokens,L5_Avg_Tokens,L6_Avg_Tokens,L7_Avg_Tokens,L1_Avg_TPS,L2_Avg_TPS,L3_Avg_TPS,L4_Avg_TPS,L5_Avg_TPS,L6_Avg_TPS,L7_Avg_TPS,L1_Avg_TTFT,L2_Avg_TTFT,L3_Avg_TTFT,L4_Avg_TTFT,L5_Avg_TTFT,L6_Avg_TTFT,L7_Avg_TTFT,L1_RRR,L2_RRR,L3_RRR,L4_RRR,L5_RRR,L6_RRR,L7_RRR,L1_SR,L2_SR,L3_SR,L4_SR,L5_SR,L6_SR,L7_SR,L1_EPR_CVR,L2_EPR_CVR,L3_EPR_CVR,L4_EPR_CVR,L5_EPR_CVR,L6_EPR_CVR,L7_EPR_CVR,L1_pass@k,L2_pass@k,L3_pass@k,L4_pass@k,L5_pass@k,L6_pass@k,L7_pass@k,L1_TooAcc,L1_ArgAcc,L1_CallEM,L1_RespOK,L2_SelectAcc,L3_FSM,L3_PSM,L3_ΔSteps_norm,L3_ProvAcc,L4_Coverage,L4_SourceEPR,L5_AdaptiveRoutingScore,L5_FallbackSR,L6_ReuseRage,L6_RedundantCallRate,L6_EffScore,L7_ContextRetention,L7_RefRecall
2
+ kanana-1.5-8b-instruct-2505,Kakao,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.53,17.22,14.51,23.78,9.44,52.98,47.39,4556.36,6107.6,5723.4,7188.3,5665.9,28502.33,28738.1,823.46,354.62,394.38,302.24,599.94,538.01,606.41,1.5236,6.7827,5.9015,7.4927,1.4163,7.764,5.1605,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8409,0.925,0.55,0.55,0.45,0.7167,0.4,1.0,1.0,1.0,0.9,0.225,1.0,0.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6364,0.2727,1.0,1.0,0.0,0.5333,0.0,0.0,0.2667,0.2667,0.225,0.45,0.4,1.0,0.6,0.825,0.75
3
+ skt_A.X-4.0-Light,SKT,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.15,17.37,21.51,9.06,9.23,38.97,33.94,4286.73,7456.1,13579.8,2284.9,6500.85,27744.0,25032.0,833.07,429.13,631.27,252.27,704.42,711.88,737.55,1.3615,5.8379,6.0725,6.2881,1.3627,5.3648,3.902,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5455,0.7417,0.525,0.35,0.2875,0.55,0.45,1.0,1.0,1.0,0.3,0.2583,0.8667,0.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.8182,0.4545,1.0,1.0,0.2,0.7833,0.65,0.1,0.05,0.05,0.25,0.55,0.4,1.0,0.4667,0.8,0.775
4
+ qwen3-8B,알리바바,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,24.54,33.11,38.89,61.09,46.28,102.03,92.19,5798.0,7600.07,8380.0,14758.8,9789.4,45946.13,55163.2,236.28,229.53,215.5,241.58,211.54,450.34,598.37,11.0876,13.3456,23.3045,16.4015,8.5784,16.7883,11.2336,1.0,1.0,0.9,0.9,1.0,1.0,1.0,0.5909,0.8083,0.175,0.35,0.45,0.7833,0.525,1.0,1.0,0.4,0.9,0.2258,1.0,0.95,1.0,1.0,0.9,0.8,0.9667,1.0,1.0,1.0,0.7955,0.4545,1.0,1.0,0.2,0.3,0.2,0.1,0.4667,0.4667,0.2333,0.55,0.2,1.0,0.5667,0.85,0.775
5
+ gemini-2.5-pro,Google,API,11,30,10,10,20,15,10,11,30,10,10,20,15,10,9.01,10.45,11.43,29.65,15.91,43.0,33.16,5257.45,5761.23,6384.2,22304.6,7592.2,54436.6,50150.6,583.2,551.49,558.73,752.35,477.25,1266.0,1512.44,4.6263,5.4812,7.9657,8.8433,4.9659,7.1894,5.2974,0.9091,0.8,0.8,1.0,0.8,0.8667,0.9,0.8409,0.6583,0.2,0.425,0.4,0.4,0.35,0.9091,0.7667,0.2,0.7,0.1583,0.8667,0.9,0.9091,0.8,0.8,1.0,0.8,0.8667,0.9,0.9091,0.6364,0.2727,0.9091,0.7667,0.1,0.1667,0.1,0.0,0.4833,0.4833,0.1583,0.35,0.5333,1.0,0.1222,0.825,0.7
6
+ Qwen3-4B-Instruct-2507,알리바바,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,6.66,22.89,14.8,51.19,11.71,86.63,60.09,5273.09,6447.9,9087.8,17502.5,5363.85,36058.4,37068.1,791.39,281.66,613.83,341.91,458.02,416.23,616.84,2.093,9.1244,4.4172,13.7638,1.8319,14.8681,8.245,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.6364,0.6583,0.15,0.375,0.3,0.6167,0.425,1.0,1.0,1.0,0.9,0.15,1.0,1.0,1.0,1.0,1.0,0.9333,1.0,1.0,1.0,1.0,0.75,0.3636,1.0,1.0,0.2,0.6333,0.7,0.0,0.5167,0.5167,0.15,0.3,0.1333,1.0,0.4,0.875,0.8
7
+ Midm-2.0-Base-Instruct,KT,OSS,11,30,10,10,20,15,10,11,30,10,10,20,15,10,5.39,3.9,3.06,3.75,8.13,28.66,16.08,4185.82,2514.93,3418.3,2388.8,3084.5,22909.13,14079.1,775.89,644.46,1117.59,636.3,379.51,799.33,875.38,1.4775,1.8563,1.8855,1.6781,1.0824,1.6794,1.1356,1.0,1.0,1.0,1.0,0.95,1.0,1.0,0.5909,0.5167,0.25,0.325,0.275,0.4833,0.35,0.9091,0.5667,0.2,0.3,0.0667,0.9333,0.6,1.0,1.0,1.0,0.8667,0.9833,1.0,1.0,0.9091,0.6364,0.2727,1.0,0.5667,0.0,0.1,0.0,0.0,0.0,0.0,0.0667,0.15,0.0,0.9333,0.3,0.55,0.5
 
 
 
 
 
 
 
 
 
components/leaderboard_components.py CHANGED
@@ -5,8 +5,8 @@ These are stable components that don't change frequently
5
 
6
  def get_chart_colors():
7
  return {
8
- "Private": "#593B1D", # Rich brown for API
9
- "Open source": "#FACC15", # Warm amber for OSS
10
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
11
  "text": "white",
12
  "background": "#01091A",
@@ -16,12 +16,10 @@ def get_chart_colors():
16
 
17
  def get_rank_badge(rank):
18
  """Generate HTML for rank badge with appropriate styling"""
19
- tag_background = "#593B1D"
20
- tag_text_color = "#FFFFFF"
21
  badge_styles = {
22
- 1: ("1st", tag_background, tag_text_color),
23
- 2: ("2nd", tag_background, tag_text_color),
24
- 3: ("3rd", tag_background, tag_text_color),
25
  }
26
 
27
  if rank in badge_styles:
@@ -61,25 +59,24 @@ def get_type_badge(model_type):
61
  """Generate HTML for model type badge"""
62
  colors = get_chart_colors()
63
  color_map = {
64
- "Open source": colors.get("Open source", "#FACC15"),
65
- "Proprietary": colors.get("Private", "#593B1D"),
66
- "Private": colors.get("Private", "#593B1D"),
67
  }
68
  label_map = {
69
  "Open source": "OSS",
70
  "Proprietary": "API",
71
  "Private": "API",
72
  }
73
- bg_color = color_map.get(model_type, "#593B1D")
74
  display_label = label_map.get(model_type, model_type)
75
- text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
76
  return f"""
77
  <div style="
78
  display: inline-flex;
79
  align-items: center;
80
  padding: 4px 8px;
81
  background: {bg_color};
82
- color: {text_color};
83
  border-radius: 4px;
84
  font-size: 0.85em;
85
  font-weight: 500;
 
5
 
6
  def get_chart_colors():
7
  return {
8
+ "Private": "#1098F7", # Airglow Blue for Proprietary
9
+ "Open source": "#58BC82", # Green for Open source
10
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
11
  "text": "white",
12
  "background": "#01091A",
 
16
 
17
  def get_rank_badge(rank):
18
  """Generate HTML for rank badge with appropriate styling"""
 
 
19
  badge_styles = {
20
+ 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
21
+ 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
22
+ 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
23
  }
24
 
25
  if rank in badge_styles:
 
59
  """Generate HTML for model type badge"""
60
  colors = get_chart_colors()
61
  color_map = {
62
+ "Open source": colors.get("Open source", "#58BC82"),
63
+ "Proprietary": colors.get("Private", "#1098F7"),
64
+ "Private": colors.get("Private", "#1098F7"),
65
  }
66
  label_map = {
67
  "Open source": "OSS",
68
  "Proprietary": "API",
69
  "Private": "API",
70
  }
71
+ bg_color = color_map.get(model_type, "#4F46E5")
72
  display_label = label_map.get(model_type, model_type)
 
73
  return f"""
74
  <div style="
75
  display: inline-flex;
76
  align-items: center;
77
  padding: 4px 8px;
78
  background: {bg_color};
79
+ color: white;
80
  border-radius: 4px;
81
  font-size: 0.85em;
82
  font-weight: 500;
styles/leaderboard_styles.py CHANGED
@@ -34,9 +34,9 @@ def get_leaderboard_css():
34
  --border-subtle: rgba(245, 246, 247, 0.08);
35
  --border-default: rgba(245, 246, 247, 0.12);
36
  --border-strong: rgba(245, 246, 247, 0.2);
37
- --text-primary: #FFFFFF;
38
- --text-secondary: #E2E8F0;
39
- --text-muted: #94A3B8;
40
  --accent-primary: #ffd21e;
41
  --accent-secondary: #1098F7;
42
  --accent-tertiary: #F5F6F7;
@@ -44,38 +44,12 @@ def get_leaderboard_css():
44
  --glow-secondary: rgba(16, 152, 247, 0.4);
45
  --glow-tertiary: rgba(245, 246, 247, 0.3);
46
  }
47
-
48
- html.light,
49
- html.light body,
50
- html.light .gradio-container {
51
- --bg-primary: #F8FAFC;
52
- --bg-secondary: rgba(15, 23, 42, 0.06);
53
- --bg-card: rgba(255, 255, 255, 0.92);
54
- --border-subtle: rgba(15, 23, 42, 0.08);
55
- --border-default: rgba(15, 23, 42, 0.12);
56
- --border-strong: rgba(15, 23, 42, 0.18);
57
- --text-primary: #0B1120;
58
- --text-secondary: #1E293B;
59
- --text-muted: #475569;
60
- --accent-primary: #F59E0B;
61
- --accent-secondary: #2563EB;
62
- --accent-tertiary: #111827;
63
- --glow-primary: rgba(245, 158, 11, 0.25);
64
- --glow-secondary: rgba(37, 99, 235, 0.2);
65
- --glow-tertiary: rgba(15, 23, 42, 0.18);
66
- }
67
-
68
- html.light [style*="color: white"],
69
- html.light [style*="color:white"],
70
- html.light [style*="#FFFFFF"],
71
- html.light [style*="#ffffff"] {
72
- color: var(--text-primary) !important;
73
- }
74
 
75
  /* Global font and background */
76
- html, body, .gradio-container {
77
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
78
  background: var(--bg-primary) !important;
 
79
  }
80
 
81
  /* Headers and text */
@@ -86,15 +60,18 @@ def get_leaderboard_css():
86
  }
87
 
88
  p, span, div, li, ul li {
 
89
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
90
  }
91
 
92
  /* Labels and info text */
93
  label {
 
94
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
95
  }
96
 
97
  .gr-box label {
 
98
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
99
  }
100
 
@@ -181,7 +158,7 @@ def get_leaderboard_css():
181
 
182
  /* Radio button labels */
183
  input[type="radio"] + label {
184
- color: var(--text-primary) !important;
185
  }
186
 
187
  input[type="radio"]:checked {
@@ -194,22 +171,26 @@ def get_leaderboard_css():
194
  .dropdown {
195
  border-color: var(--border-default) !important;
196
  background: var(--bg-card) !important;
 
197
  transition: all 0.2s ease !important;
198
  }
199
 
200
  /* Dropdown option styling */
201
  .dropdown option {
202
  background: var(--bg-card) !important;
 
203
  }
204
 
205
  /* Gradio dropdown specific styling */
206
  .gradio-dropdown select,
207
  .gradio-dropdown [role="combobox"],
208
  .gradio-dropdown input {
 
209
  background: var(--bg-card) !important;
210
  }
211
 
212
  .gradio-dropdown option {
 
213
  background: var(--bg-card) !important;
214
  }
215
 
@@ -229,16 +210,19 @@ def get_leaderboard_css():
229
  overflow-y: auto !important;
230
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
231
  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
 
232
  }
233
 
234
  /* Table cells and headers */
235
  .dataframe td,
236
  .dataframe th {
 
237
  }
238
 
239
  /* Button styling */
240
  button {
241
  background: var(--bg-card) !important;
 
242
  border: 1px solid var(--border-default) !important;
243
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
244
  }
@@ -379,7 +363,7 @@ def get_leaderboard_css():
379
  display: inline-block !important;
380
  padding: 14px 28px !important;
381
  background: #ffd21e !important;
382
- color: var(--text-primary) !important;
383
  text-decoration: none !important;
384
  border-radius: 16px !important;
385
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -398,7 +382,7 @@ def get_leaderboard_css():
398
  transform: translateY(-3px) !important;
399
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
400
  background: #ffd21e !important;
401
- color: var(--text-primary) !important;
402
  text-decoration: none !important;
403
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
404
  }
@@ -440,176 +424,24 @@ def get_leaderboard_css():
440
  border-color: #ffd21e !important;
441
  box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
442
  text-decoration: none !important;
443
- color: var(--text-primary) !important;
444
- }
445
-
446
- /* Ensure key hero/body text stays bright */
447
- .hero-subtitle,
448
- .section-lead,
449
- .section-subtitle,
450
- .criteria-card li,
451
- .scenario-body,
452
- .hero-action-button,
453
- .hero-action-button span,
454
- #lang-toggle-btn,
455
- #lang-toggle-btn button {
456
- color: #FFFFFF !important;
457
- }
458
-
459
- .secondary.svelte-1ixn6qd {
460
- color: #FFFFFF !important;
461
- }
462
-
463
- /* Responsive adjustments */
464
- @media (max-width: 1024px) {
465
- .hero-title {
466
- font-size: 4.5rem !important;
467
- }
468
- .hero-subtitle {
469
- font-size: 1.6rem !important;
470
- }
471
- .hero-actions {
472
- flex-wrap: wrap !important;
473
- gap: 12px !important;
474
- }
475
- .performance-card {
476
- padding: 24px !important;
477
- }
478
- .domain-selector-container {
479
- padding: 24px !important;
480
- }
481
- .dashboard-section {
482
- padding: 28px !important;
483
- }
484
- }
485
-
486
- @media (max-width: 768px) {
487
- .hero-banner-wrapper {
488
- width: 100% !important;
489
- margin: 0 0 16px 0 !important;
490
- }
491
- .hero-title {
492
- font-size: 3.2rem !important;
493
- }
494
- .hero-subtitle {
495
- font-size: 1.3rem !important;
496
- }
497
- .hero-actions {
498
- flex-direction: column !important;
499
- align-items: stretch !important;
500
- }
501
- .hero-action-button {
502
- width: 100% !important;
503
- justify-content: center !important;
504
- }
505
- .dashboard-section,
506
- .domain-selector-container,
507
- .performance-card {
508
- margin: 20px 12px !important;
509
- padding: 20px !important;
510
- }
511
- .performance-card .card-body {
512
- grid-template-columns: 1fr !important;
513
- gap: 20px !important;
514
- }
515
- .radar-slot {
516
- width: 100% !important;
517
- max-width: 260px !important;
518
- margin: 0 auto !important;
519
- }
520
- .v2-table-container {
521
- overflow-x: auto !important;
522
- }
523
- .v2-styled-table {
524
- min-width: 720px !important;
525
- }
526
- .hero-actions svg {
527
- width: 18px !important;
528
- height: 18px !important;
529
- }
530
- .section-title {
531
- font-size: 1.8rem !important;
532
- }
533
- .section-lead,
534
- .section-subtitle {
535
- font-size: 1rem !important;
536
- }
537
- .criteria-card {
538
- padding: 16px !important;
539
- }
540
- .criteria-grid {
541
- grid-template-columns: 1fr !important;
542
- gap: 16px !important;
543
- }
544
- .phase-grid {
545
- grid-template-columns: 1fr !important;
546
- }
547
- .hero-subtitle,
548
- .section-lead,
549
- .section-subtitle,
550
- .criteria-card li,
551
- .scenario-body {
552
- text-align: left !important;
553
- }
554
- }
555
-
556
- @media (max-width: 480px) {
557
- .hero-title {
558
- font-size: 2.4rem !important;
559
- }
560
- .hero-subtitle {
561
- font-size: 1.1rem !important;
562
- }
563
- .hero-action-button {
564
- font-size: 0.95rem !important;
565
- padding: 10px 16px !important;
566
- }
567
- .performance-card {
568
- padding: 18px !important;
569
- }
570
- .card-top-row {
571
- flex-direction: column !important;
572
- gap: 12px !important;
573
- }
574
- .rank-panel {
575
- align-self: flex-start !important;
576
- }
577
- .model-selector-container,
578
- .level-selector-container {
579
- margin: 0 !important;
580
- }
581
- .hero-banner-wrapper {
582
- margin-bottom: 12px !important;
583
- }
584
- }
585
- }
586
-
587
- /* Language toggle button */
588
- #lang-toggle-btn button,
589
- #lang-toggle-btn {
590
  color: #FFFFFF !important;
591
- border-color: #ffd21e !important;
592
- }
593
-
594
- .hero-action-button {
595
- border-color: #ffd21e !important;
596
  }
597
 
598
  /* Numeric content styling */
599
  .numeric-cell, .metric-value, .rank-value,
600
  .level-tile-score, .core-metric-card .metric-value {
601
- color: var(--text-primary) !important;
602
  font-family: 'Geist Mono', monospace !important;
603
  }
604
 
605
  /* Table content */
606
  td, th, table * {
607
- color: var(--text-primary) !important;
608
  }
609
 
610
  /* All numeric and data elements */
611
  .performance-card *, .v2-styled-table *, .dataframe * {
612
- color: var(--text-primary) !important;
613
  }
614
 
615
  /* Enhanced dropdown styling - more specific selectors
@@ -622,18 +454,20 @@ def get_leaderboard_css():
622
  .model-dropdown [role="combobox"],
623
  .model-dropdown button {
624
  background: rgba(1, 9, 26, 0.95) !important;
 
625
  border: 1px solid var(--border-default) !important;
626
  border-radius: 8px !important;
627
  }
628
-
629
  .gradio-dropdown option,
630
  .model-dropdown option {
631
  background: rgba(1, 9, 26, 0.95) !important;
 
632
  }
633
 
634
  /* Force dropdown text color */
635
  /* .gradio-dropdown *, .model-dropdown * {
636
- color: var(--text-primary) !important;
637
  } */
638
 
639
  /* Gradio 5.x compatible dropdown styling */
@@ -641,31 +475,22 @@ def get_leaderboard_css():
641
  .gradio-container [data-testid="dropdown"],
642
  .gradio-container select {
643
  background-color: rgba(1, 9, 26, 0.95) !important;
 
644
  border: 1px solid rgba(245, 246, 247, 0.12) !important;
645
  }
646
-
647
  .gradio-container .gradio-dropdown option,
648
  .gradio-container select option {
649
  background-color: rgba(1, 9, 26, 0.95) !important;
 
650
  }
651
-
652
  /* Target the actual visible text in dropdown */
653
  .gradio-container [role="combobox"],
654
  .gradio-container .gradio-dropdown .wrap > div {
 
655
  background-color: rgba(1, 9, 26, 0.95) !important;
656
  }
657
 
658
- html.light .model-dropdown .gradio-dropdown,
659
- html.light .model-dropdown [role="combobox"],
660
- html.light .model-dropdown button,
661
- html.light .gradio-container [data-testid="dropdown"],
662
- html.light .gradio-container select,
663
- html.light .gradio-container [role="combobox"],
664
- html.light .gradio-container .gradio-dropdown .wrap > div {
665
- background-color: rgba(255, 255, 255, 0.95) !important;
666
- border-color: rgba(15, 23, 42, 0.12) !important;
667
- box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08) !important;
668
- }
669
-
670
  </style>
671
  """
 
34
  --border-subtle: rgba(245, 246, 247, 0.08);
35
  --border-default: rgba(245, 246, 247, 0.12);
36
  --border-strong: rgba(245, 246, 247, 0.2);
37
+ --text-primary: #F5F6F7;
38
+ --text-secondary: #94A3B8;
39
+ --text-muted: #64748B;
40
  --accent-primary: #ffd21e;
41
  --accent-secondary: #1098F7;
42
  --accent-tertiary: #F5F6F7;
 
44
  --glow-secondary: rgba(16, 152, 247, 0.4);
45
  --glow-tertiary: rgba(245, 246, 247, 0.3);
46
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  /* Global font and background */
49
+ .gradio-container {
50
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
51
  background: var(--bg-primary) !important;
52
+ color: var(--text-primary) !important;
53
  }
54
 
55
  /* Headers and text */
 
60
  }
61
 
62
  p, span, div, li, ul li {
63
+ color: white !important;
64
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
65
  }
66
 
67
  /* Labels and info text */
68
  label {
69
+ color: white !important;
70
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
71
  }
72
 
73
  .gr-box label {
74
+ color: white !important;
75
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
76
  }
77
 
 
158
 
159
  /* Radio button labels */
160
  input[type="radio"] + label {
161
+ color: white !important;
162
  }
163
 
164
  input[type="radio"]:checked {
 
171
  .dropdown {
172
  border-color: var(--border-default) !important;
173
  background: var(--bg-card) !important;
174
+ color: white !important;
175
  transition: all 0.2s ease !important;
176
  }
177
 
178
  /* Dropdown option styling */
179
  .dropdown option {
180
  background: var(--bg-card) !important;
181
+ color: white !important;
182
  }
183
 
184
  /* Gradio dropdown specific styling */
185
  .gradio-dropdown select,
186
  .gradio-dropdown [role="combobox"],
187
  .gradio-dropdown input {
188
+ color: white !important;
189
  background: var(--bg-card) !important;
190
  }
191
 
192
  .gradio-dropdown option {
193
+ color: white !important;
194
  background: var(--bg-card) !important;
195
  }
196
 
 
210
  overflow-y: auto !important;
211
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
212
  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
213
+ color: white !important;
214
  }
215
 
216
  /* Table cells and headers */
217
  .dataframe td,
218
  .dataframe th {
219
+ color: white !important;
220
  }
221
 
222
  /* Button styling */
223
  button {
224
  background: var(--bg-card) !important;
225
+ color: white !important;
226
  border: 1px solid var(--border-default) !important;
227
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
228
  }
 
363
  display: inline-block !important;
364
  padding: 14px 28px !important;
365
  background: #ffd21e !important;
366
+ color: #FFFFFF !important;
367
  text-decoration: none !important;
368
  border-radius: 16px !important;
369
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
 
382
  transform: translateY(-3px) !important;
383
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
384
  background: #ffd21e !important;
385
+ color: #FFFFFF !important;
386
  text-decoration: none !important;
387
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
388
  }
 
424
  border-color: #ffd21e !important;
425
  box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
426
  text-decoration: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  color: #FFFFFF !important;
 
 
 
 
 
428
  }
429
 
430
  /* Numeric content styling */
431
  .numeric-cell, .metric-value, .rank-value,
432
  .level-tile-score, .core-metric-card .metric-value {
433
+ color: white !important;
434
  font-family: 'Geist Mono', monospace !important;
435
  }
436
 
437
  /* Table content */
438
  td, th, table * {
439
+ color: white !important;
440
  }
441
 
442
  /* All numeric and data elements */
443
  .performance-card *, .v2-styled-table *, .dataframe * {
444
+ color: white !important;
445
  }
446
 
447
  /* Enhanced dropdown styling - more specific selectors
 
454
  .model-dropdown [role="combobox"],
455
  .model-dropdown button {
456
  background: rgba(1, 9, 26, 0.95) !important;
457
+ color: white !important;
458
  border: 1px solid var(--border-default) !important;
459
  border-radius: 8px !important;
460
  }
461
+
462
  .gradio-dropdown option,
463
  .model-dropdown option {
464
  background: rgba(1, 9, 26, 0.95) !important;
465
+ color: white !important;
466
  }
467
 
468
  /* Force dropdown text color */
469
  /* .gradio-dropdown *, .model-dropdown * {
470
+ color: white !important;
471
  } */
472
 
473
  /* Gradio 5.x compatible dropdown styling */
 
475
  .gradio-container [data-testid="dropdown"],
476
  .gradio-container select {
477
  background-color: rgba(1, 9, 26, 0.95) !important;
478
+ color: white !important;
479
  border: 1px solid rgba(245, 246, 247, 0.12) !important;
480
  }
481
+
482
  .gradio-container .gradio-dropdown option,
483
  .gradio-container select option {
484
  background-color: rgba(1, 9, 26, 0.95) !important;
485
+ color: white !important;
486
  }
487
+
488
  /* Target the actual visible text in dropdown */
489
  .gradio-container [role="combobox"],
490
  .gradio-container .gradio-dropdown .wrap > div {
491
+ color: white !important;
492
  background-color: rgba(1, 9, 26, 0.95) !important;
493
  }
494
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  </style>
496
  """
tabs/{leaderboard_v1_kr.py → leaderboard_v1.py} RENAMED
@@ -53,8 +53,7 @@ def create_leaderboard_v2_tab():
53
 
54
  # Clean and prepare data
55
  df = df.copy()
56
- exclude_cols = {'Model', 'Vendor', 'Model Type', 'LLM Type'}
57
- numeric_candidate_cols = [col for col in df.columns if col not in exclude_cols]
58
  for col in numeric_candidate_cols:
59
  df[col] = pd.to_numeric(df[col], errors='coerce')
60
 
@@ -119,45 +118,36 @@ def create_leaderboard_v2_tab():
119
  df['Call Validity'] = df[epr_cols].mean(axis=1)
120
 
121
  # Use LLM Type from CSV directly, with mapping to display names
122
- def normalize_model_type(value):
123
- """Normalize raw type labels from CSV (e.g., OSS/API) to display values."""
124
- if pd.isna(value):
125
- return None
126
- cleaned = str(value).strip()
127
- if not cleaned:
128
- return None
129
- upper = cleaned.upper()
130
- if upper in ("OSS", "OPEN SOURCE", "OPEN-SOURCE", "OPEN_SOURCE"):
131
- return "Open source"
132
- if upper in ("API", "PROPRIETARY", "PRIVATE", "CLOSED"):
133
- return "Proprietary"
134
- return None
135
-
136
- # Prefer explicit type columns from the CSV, then fall back to vendor mapping
137
- if 'Model Type' in df.columns:
138
- df['Model Type'] = df['Model Type'].apply(normalize_model_type)
139
- elif 'LLM Type' in df.columns:
140
- df['Model Type'] = df['LLM Type'].apply(normalize_model_type)
141
  else:
142
- df['Model Type'] = None
143
-
144
- vendor_model_type_map = {
145
- "OpenAI": "Proprietary",
146
- "Anthropic": "Proprietary",
147
- "Google": "Proprietary",
148
- "Microsoft": "Proprietary",
149
- "Mistral": "Proprietary",
150
- "Databricks": "Open source",
151
- "Meta": "Open source",
152
- "Alibaba": "Open source",
153
- "알리바바": "Open source", # Korean name for Alibaba
154
- "Kakao": "Open source",
155
- "SKT": "Open source",
156
- "KT": "Open source",
157
- "xAI": "Proprietary",
158
- }
159
- df['Model Type'] = df['Model Type'].fillna(df['Vendor'].map(vendor_model_type_map))
160
- df['Model Type'] = df['Model Type'].fillna('Proprietary')
161
 
162
  # Round numeric columns for better display
163
  round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
@@ -234,36 +224,36 @@ def create_leaderboard_v2_tab():
234
  # Level metadata for the 7-stage task framework
235
  level_details = {
236
  "ALL": {
237
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL · 전체 태스크</span>",
238
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>7개의 태스크 전반의 평균 성능을 한눈에 살펴보고 각 레벨 비교를 위한 기준점을 제공합니다.</span>"
239
  },
240
  "L1": {
241
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 · 단일 도구 호출</span>",
242
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>단일 도구 호출 능력과 기본적인 명령 수행 정확도를 평가합니다.</span>"
243
  },
244
  "L2": {
245
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 · 도구 선택</span>",
246
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>요구 사항에 맞는 도구를 고르고 적절한 파라미터로 호출하는 능력을 측정합니다.</span>"
247
  },
248
  "L3": {
249
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 · 도구 순차 추론</span>",
250
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>복수 단계의 순차적 reasoning을 통해 문제를 해결하는 과정을 검증합니다.</span>"
251
  },
252
  "L4": {
253
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 · 도구 병렬 추론</span>",
254
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>여러 소스의 정보를 병렬적으로 통합하고 요약하는 능력을 평가합니다.</span>"
255
  },
256
  "L5": {
257
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 · 오류 처리와 강건성</span>",
258
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>예상치 못한 오류나 실패 상황에 대한 인지와 대응 전략을 확인합니다.</span>"
259
  },
260
  "L6": {
261
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 · 효율적인 도구 활용</span>",
262
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>최소한의 호출과 비용으로 목표를 달성하는 운영 효율을 살펴봅니다.</span>"
263
  },
264
  "L7": {
265
- "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 · 장기 컨텍스트 기억</span>",
266
- "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>장기 대화 맥락을 유지하고 적절히 활용하는 능력을 집중적으로 분석합니다.</span>"
267
  }
268
  }
269
  default_level = "ALL"
@@ -301,7 +291,7 @@ def create_leaderboard_v2_tab():
301
  border-collapse: collapse;
302
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
303
  background: var(--bg-card);
304
- color: var(--text-primary);
305
  }
306
 
307
  .v2-styled-table thead {
@@ -315,7 +305,7 @@ def create_leaderboard_v2_tab():
315
  padding: 14px 12px;
316
  text-align: left;
317
  font-weight: 600;
318
- color: var(--text-primary);
319
  border-bottom: 2px solid var(--accent-primary);
320
  font-size: 13px;
321
  text-transform: uppercase;
@@ -329,7 +319,7 @@ def create_leaderboard_v2_tab():
329
  .v2-styled-table td {
330
  padding: 12px;
331
  border-bottom: 1px solid var(--border-subtle);
332
- color: var(--text-primary);
333
  transition: all 0.2s ease;
334
  }
335
 
@@ -349,30 +339,30 @@ def create_leaderboard_v2_tab():
349
 
350
  .model-name {
351
  font-weight: 500;
352
- color: var(--text-primary);
353
  transition: color 0.2s ease;
354
  }
355
 
356
  /* Keep model name color consistent on hover to emphasize row highlight */
357
  .v2-styled-table tr:hover .model-name {
358
- color: var(--text-primary);
359
  }
360
 
361
  .numeric-cell {
362
  font-family: 'Geist Mono', monospace;
363
  font-size: 13px;
364
  text-align: center;
365
- color: var(--text-primary);
366
  }
367
 
368
  .highlight-header {
369
  background: rgba(255, 210, 30, 0.14);
370
- color: var(--text-primary);
371
  }
372
 
373
  .highlight-cell {
374
  background: rgba(255, 210, 30, 0.08);
375
- color: var(--text-primary);
376
  font-weight: 600;
377
  }
378
  </style>
@@ -470,8 +460,8 @@ def create_leaderboard_v2_tab():
470
  return f"""
471
  <div class="domain-selector-container leaderboard-intro">
472
  <div class="domain-header">
473
- <h2 class="domain-title" style="color: var(--text-primary);">Agent Leaderboard · {level_title}</h2>
474
- <p class="domain-subtitle" style="color: var(--text-primary);">{level_description}</p>
475
  </div>
476
  <div class="dataframe-container">
477
  """
@@ -521,14 +511,6 @@ def create_leaderboard_v2_tab():
521
  # Load initial data
522
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
523
  initial_df = load_leaderboard_data() # Load raw data for model selector
524
- if not initial_df.empty:
525
- overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
526
- if overall_success_numeric.notna().any():
527
- initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
528
- 'Overall Success', ascending=False, na_position='last'
529
- )
530
- else:
531
- initial_df = initial_df.sort_values('Model')
532
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
533
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
534
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
@@ -754,14 +736,12 @@ def create_leaderboard_v2_tab():
754
  # Header styles and navigation
755
  gr.HTML("""
756
  <style>
757
- @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700;800&family=Gowun+Dodum&family=Do+Hyeon&display=swap');
758
-
759
  /* Enhanced button styling with better gradio compatibility */
760
  .header-action-button {
761
  display: inline-block !important;
762
  padding: 14px 28px !important;
763
  background: #ffd21e !important;
764
- color: var(--text-primary) !important;
765
  text-decoration: none !important;
766
  border-radius: 16px !important;
767
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -795,7 +775,7 @@ def create_leaderboard_v2_tab():
795
  transform: translateY(-3px) !important;
796
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
797
  background: #ffd21e !important;
798
- color: var(--text-primary) !important;
799
  text-decoration: none !important;
800
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
801
  }
@@ -810,52 +790,34 @@ def create_leaderboard_v2_tab():
810
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
811
  }
812
 
813
- .hero-banner-wrapper {
814
- position: relative;
815
- width: 100vw;
816
- margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
817
- border-radius: 0 !important;
818
- overflow: hidden !important;
819
- box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
820
- }
821
-
822
- .hero-banner-wrapper::before {
823
- content: "";
824
- position: absolute;
825
- inset: 0;
826
- background: #01091A;
827
- z-index: 0;
828
- }
829
-
830
  #hero-banner {
831
- position: relative;
832
- width: 100% !important;
833
- height: auto !important;
834
- z-index: 1;
 
835
  }
836
-
837
  #hero-banner img {
838
- width: 100% !important;
839
- height: auto !important;
840
- display: block !important;
841
- object-fit: cover !important;
842
  }
843
 
844
  .hero-title {
845
- font-size: 10rem;
846
  font-weight: 800;
847
  line-height: 1.1;
848
  background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
849
  -webkit-background-clip: text;
850
  -webkit-text-fill-color: transparent;
851
  margin-bottom: 1rem;
852
- font-family: 'Nanum Gothic', sans-serif !important;
853
  }
854
 
855
  .hero-subtitle {
856
  color: var(--text-secondary);
857
- font-size: 3rem;
858
- font-family: 'Nanum Gothic', sans-serif !important;
859
  margin-top: 0;
860
  }
861
 
@@ -914,7 +876,6 @@ def create_leaderboard_v2_tab():
914
  box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
915
  backdrop-filter: blur(12px);
916
  -webkit-backdrop-filter: blur(12px);
917
- font-family: 'Nanum Gothic', sans-serif !important;
918
  }
919
 
920
  .dashboard-section.emphasized {
@@ -934,16 +895,15 @@ def create_leaderboard_v2_tab():
934
  }
935
 
936
  .section-title {
937
- font-size: 3.75rem;
938
  font-weight: 700;
939
  color: var(--text-primary);
940
  margin-bottom: 12px;
941
  text-align: center !important;
942
- font-family: 'Nanum Gothic', sans-serif !important;
943
  }
944
 
945
  .section-lead, .section-subtitle {
946
- font-size: 1.32rem !important;
947
  color: var(--text-secondary);
948
  max-width: 720px;
949
  margin: 0 auto 24px auto;
@@ -952,7 +912,6 @@ def create_leaderboard_v2_tab():
952
  word-break: keep-all;
953
  white-space: normal;
954
  display: block;
955
- font-family: 'Nanum Gothic', sans-serif !important;
956
  }
957
 
958
  .phase-grid {
@@ -970,11 +929,10 @@ def create_leaderboard_v2_tab():
970
  }
971
 
972
  .phase-card h3 {
973
- font-size: 1.44rem !important;
974
  color: var(--text-primary);
975
  margin-bottom: 20px;
976
  font-weight: 700;
977
- font-family: 'Nanum Gothic', sans-serif !important;
978
  }
979
 
980
  .phase-chart {
@@ -1002,26 +960,11 @@ def create_leaderboard_v2_tab():
1002
 
1003
  .phase-chart span {
1004
  position: relative;
1005
- font-size: 1.2rem !important;
1006
  font-weight: 700;
1007
- color: var(--text-primary) !important;
1008
- font-family: 'Nanum Gothic', sans-serif !important;
1009
- }
1010
-
1011
- /* 추가적인 구체적 선택자 */
1012
- .phase-card .phase-chart span {
1013
- color: var(--text-primary) !important;
1014
- text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
1015
- font-family: 'Nanum Gothic', sans-serif !important;
1016
- }
1017
-
1018
- .phase-grid .phase-chart span {
1019
- color: var(--text-primary) !important;
1020
- z-index: 10 !important;
1021
- font-family: 'Nanum Gothic', sans-serif !important;
1022
  }
1023
 
1024
-
1025
  .phase-list {
1026
  list-style: none;
1027
  padding: 0;
@@ -1036,8 +979,7 @@ def create_leaderboard_v2_tab():
1036
  background: rgba(245, 246, 247, 0.05);
1037
  border: 1px solid rgba(245, 246, 247, 0.08);
1038
  color: var(--text-secondary);
1039
- font-size: 1.08rem !important;
1040
- font-family: 'Nanum Gothic', sans-serif !important;
1041
  }
1042
 
1043
  .scenario-body {
@@ -1100,7 +1042,7 @@ def create_leaderboard_v2_tab():
1100
  /* Responsive design */
1101
  @media (max-width: 768px) {
1102
  .hero-title {
1103
- font-size: 10rem;
1104
  }
1105
  .hero-action-button {
1106
  width: 100% !important;
@@ -1124,7 +1066,7 @@ def create_leaderboard_v2_tab():
1124
  gap: 8px;
1125
  }
1126
  .section-title {
1127
- font-size: 2.7rem;
1128
  }
1129
  .phase-chart {
1130
  width: 100px;
@@ -1138,15 +1080,13 @@ def create_leaderboard_v2_tab():
1138
  </style>
1139
  """)
1140
 
1141
- gr.HTML("<div class='hero-banner-wrapper'>")
1142
  gr.Image(
1143
- value="banner_wide.png",
1144
  show_label=False,
1145
  interactive=False,
1146
  type="filepath",
1147
  elem_id="hero-banner"
1148
  )
1149
- gr.HTML("</div>")
1150
 
1151
  gr.HTML("""
1152
  <div style="text-align: center; padding: 20px 0;">
@@ -1159,35 +1099,35 @@ def create_leaderboard_v2_tab():
1159
  gr.HTML("""
1160
  <div class="hero-actions">
1161
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1162
- <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1163
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1164
  <line x1="8" y1="12" x2="16" y2="12"/>
1165
  </svg>
1166
- <span>블로그</span>
1167
  </a>
1168
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1169
- <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1170
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1171
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1172
  </svg>
1173
  <span>GitHub</span>
1174
  </a>
1175
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1176
- <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1177
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1178
  <polyline points="7 10 12 15 17 10"/>
1179
  <line x1="12" y1="15" x2="12" y2="3"/>
1180
  </svg>
1181
- <span>데이터셋</span>
1182
  </a>
1183
- <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench?tab=readme-ov-file#-%ED%8F%89%EA%B0%80-%EC%A7%80%ED%91%9C" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1184
- <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1185
  <path d="M3 3v18h18"/>
1186
  <path d="M7 17v-6"/>
1187
  <path d="M12 17V7"/>
1188
  <path d="M17 17v-3"/>
1189
  </svg>
1190
- <span>평가 지표</span>
1191
  </a>
1192
  </div>
1193
  """)
@@ -1196,31 +1136,31 @@ def create_leaderboard_v2_tab():
1196
  gr.HTML("""
1197
  <div class="dashboard-section">
1198
  <div class="section-header">
1199
- <h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">단계별 태스크 설계</h2>
1200
  </div>
1201
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">단순 도구 호출부터 장기적 맥락 능력, 강건성 처리 능력까지 에이전트의 능력을 7단계로 입체적으로 분석하였습니다.</p>
1202
  <div class="phase-grid">
1203
  <div class="phase-card">
1204
- <h3>단일 턴</h3>
1205
  <div class="phase-chart" style="--progress:80%;">
1206
- <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
1207
  </div>
1208
  <ul class="phase-list">
1209
- <li style="color: var(--text-primary);">L1: 단일 도구 호출</li>
1210
- <li style="color: var(--text-primary);">L2: 도구 선택</li>
1211
- <li style="color: var(--text-primary);">L3: 도구 순차 추론</li>
1212
- <li style="color: var(--text-primary);">L4: 도구 병렬 추론</li>
1213
- <li style="color: var(--text-primary);">L5: 오류 처리와 강건성</li>
1214
  </ul>
1215
  </div>
1216
  <div class="phase-card">
1217
- <h3>다중 턴</h3>
1218
  <div class="phase-chart" style="--progress:20%;">
1219
- <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
1220
  </div>
1221
  <ul class="phase-list">
1222
- <li style="color: var(--text-primary);">L6: 효율적인 도구 활용</li>
1223
- <li style="color: var(--text-primary);">L7: 장기 컨텍스트 기억</li>
1224
  </ul>
1225
  </div>
1226
  </div>
@@ -1231,21 +1171,20 @@ def create_leaderboard_v2_tab():
1231
  gr.HTML("""
1232
  <div class="dashboard-section emphasized">
1233
  <div class="section-header">
1234
- <h2 class="section-title" style="font-size: 2.0rem;">18가지 한국형 API 사용 및 실생활 환경에 특화된 고품질 시나리오 구성</h2>
1235
  </div>
1236
  <div class="scenario-body">
1237
- <p style="color: var(--text-primary);">네이버, 카카오국내 실사용 API를 기반으로, '약속 예약', '블로그 후기 검색'처럼 일상에 유용한 현실적인 문제 해결 시나리오를 구현했습니다.</p>
1238
  </div>
1239
-
1240
- </div>
1241
  <div class="section-flow">⌄</div>
 
1242
  """)
1243
 
1244
  # Section 3: 핵심 평가 기준
1245
  gr.HTML("""
1246
  <div class="dashboard-section">
1247
  <div class="section-header">
1248
- <h2 class="section-title" style="font-size: 2.0rem;">핵심 평가 기준</h2>
1249
  </div>
1250
  <div class="criteria-grid">
1251
  <div class="criteria-card">
@@ -1279,8 +1218,6 @@ def create_leaderboard_v2_tab():
1279
  # Domain filter section with enhanced styling
1280
  gr.HTML("""
1281
  <style>
1282
- @import url('https://fonts.googleapis.com/css2?family=Nanum+Gothic:wght@400;700;800&family=Gowun+Dodum&family=Do+Hyeon&display=swap');
1283
-
1284
  /* Enhanced domain selector styling */
1285
  .domain-selector-container {
1286
  background: #ffd21e0d;
@@ -1383,11 +1320,10 @@ def create_leaderboard_v2_tab():
1383
  -webkit-background-clip: text;
1384
  background-clip: text;
1385
  -webkit-text-fill-color: transparent;
1386
- text-shadow: 0 0 3px rgba(255, 210, 30, 0.08), 0 0 8px rgba(255, 210, 30, 0.05);
1387
- filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
1388
  letter-spacing: 0.02em;
1389
- animation: title-shimmer 1.25s ease-in-out infinite;
1390
- font-family: 'Nanum Gothic', sans-serif !important;
1391
  }
1392
 
1393
  @keyframes title-shimmer {
@@ -1675,8 +1611,8 @@ def create_leaderboard_v2_tab():
1675
 
1676
  .model-dropdown select,
1677
  .model-dropdown [role="combobox"] {
1678
- background: #000000 !important;
1679
- border: 1px solid #333333 !important;
1680
  border-radius: 999px !important;
1681
  padding: 12px 24px !important;
1682
  color: var(--text-primary) !important;
@@ -1707,8 +1643,8 @@ def create_leaderboard_v2_tab():
1707
  gap: 8px !important;
1708
  width: 100% !important;
1709
  padding: 12px 24px !important;
1710
- background: #000000 !important;
1711
- border: 1px solid #333333 !important;
1712
  border-radius: 999px !important;
1713
  color: var(--text-primary) !important;
1714
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -1765,7 +1701,7 @@ def create_leaderboard_v2_tab():
1765
  background: #ffd21e !important;
1766
  border: 1px solid rgba(255, 210, 30, 0.6) !important;
1767
  border-radius: 999px !important;
1768
- color: var(--text-primary) !important;
1769
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1770
  font-weight: 600 !important;
1771
  font-size: 0.95rem !important;
@@ -1812,7 +1748,7 @@ def create_leaderboard_v2_tab():
1812
  font-size: 1.5rem;
1813
  margin-bottom: 4px;
1814
  display: block;
1815
- filter: drop-shadow(0 0 10px white);
1816
  }
1817
 
1818
  .domain-name {
@@ -1827,7 +1763,7 @@ def create_leaderboard_v2_tab():
1827
  top: 8px;
1828
  right: 8px;
1829
  background: var(--accent-primary);
1830
- color: var(--text-primary);
1831
  font-size: 0.75rem;
1832
  padding: 2px 8px;
1833
  border-radius: 12px;
@@ -1999,147 +1935,92 @@ def create_leaderboard_v2_tab():
1999
  padding: 12px 20px !important;
2000
  font-size: 0.95rem !important;
2001
  }
2002
-
2003
- /* Leaderboard controls row styling */
2004
- .leaderboard-controls-row {
2005
- margin: 20px 0 !important;
2006
- padding: 20px !important;
2007
- background: transparent !important;
2008
- border: none !important;
2009
- gap: 40px !important;
2010
- }
2011
-
2012
- .leaderboard-controls-row .gr-column,
2013
- .leaderboard-controls-row .gr-row,
2014
- .leaderboard-controls-row .gr-box,
2015
- .leaderboard-controls-row .gradio-column,
2016
- .leaderboard-controls-row .gradio-row,
2017
- .leaderboard-controls-row .gradio-group {
2018
- background: transparent !important;
2019
- border: none !important;
2020
- box-shadow: none !important;
2021
- padding: 0 !important;
2022
- }
2023
-
2024
- /* Remove all container backgrounds for leaderboard controls */
2025
- .leaderboard-controls-row * {
2026
- background-color: transparent !important;
2027
- background-image: none !important;
2028
- border: none !important;
2029
- box-shadow: none !important;
2030
- }
2031
-
2032
- .leaderboard-controls-row .inline-radio,
2033
- .leaderboard-controls-row .domain-radio {
2034
- background: transparent !important;
2035
- border: none !important;
2036
- box-shadow: none !important;
2037
- }
2038
-
2039
- /* Inline radio styling for integrated controls */
2040
- .inline-radio {
2041
- background: transparent !important;
2042
- border: none !important;
2043
- box-shadow: none !important;
2044
- padding: 0 !important;
2045
- }
2046
-
2047
- .inline-radio .wrap {
2048
- display: flex !important;
2049
- gap: 8px !important;
2050
- flex-wrap: wrap !important;
2051
- justify-content: flex-start !important;
2052
- background: transparent !important;
2053
- border: none !important;
2054
- box-shadow: none !important;
2055
- padding: 0 !important;
2056
- }
2057
-
2058
- .inline-radio label {
2059
- padding: 8px 16px !important;
2060
- background: rgba(245, 246, 247, 0.06) !important;
2061
- border: 1px solid var(--border-subtle) !important;
2062
- border-radius: 20px !important;
2063
- font-size: 0.85rem !important;
2064
- color: var(--text-primary) !important;
2065
- transition: all 0.2s ease !important;
2066
- cursor: pointer !important;
2067
- }
2068
-
2069
- .inline-radio label:hover {
2070
- background: rgba(255, 210, 30, 0.12) !important;
2071
- border-color: var(--accent-primary) !important;
2072
- }
2073
-
2074
- .inline-radio input[type="radio"]:checked + label,
2075
- .inline-radio label[aria-checked="true"] {
2076
- background: rgba(255, 210, 30, 0.2) !important;
2077
- border-color: var(--accent-primary) !important;
2078
- color: var(--text-primary) !important;
2079
- font-weight: 600 !important;
2080
- }
2081
  </style>
2082
 
2083
  """)
2084
 
2085
  level_options = list(level_details.keys())
2086
 
2087
- # Main leaderboard table with dynamic title and integrated controls
2088
- leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
 
 
 
 
 
 
 
 
 
 
 
 
 
2089
 
2090
- # Integrated controls within leaderboard section - stacked vertically
2091
- gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>태스크 레벨 선택</p>")
2092
- domain_filter = gr.Radio(
2093
- choices=level_options,
2094
- value=default_level,
2095
- label="",
2096
- interactive=True,
2097
- container=False,
2098
- elem_classes=["domain-radio", "inline-radio"]
2099
- )
2100
-
2101
- gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>🔍 필터 및 정렬</p>")
2102
- with gr.Row():
2103
- with gr.Column(scale=1):
2104
- gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>모델 접근</span>")
2105
- model_type_filter = gr.Radio(
2106
- choices=["All", "OSS", "API"],
2107
- value="All",
2108
- label="",
2109
- elem_classes=["domain-radio", "inline-radio"],
2110
- container=False
2111
- )
2112
- with gr.Column(scale=1):
2113
- gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>정렬 순서</span>")
2114
- sort_order = gr.Radio(
2115
- choices=["Descending", "Ascending"],
2116
- value="Descending",
2117
- label="",
2118
- elem_classes=["domain-radio", "inline-radio"],
2119
- container=False
2120
- )
 
2121
 
2122
  leaderboard_table = gr.HTML(initial_table)
2123
 
 
 
 
 
2124
  # Radar Chart Section
2125
  gr.HTML("""
2126
  <div class="domain-selector-container domain-performance-container">
2127
  <div class="domain-header">
2128
- <h2 class="domain-title" style="color: var(--text-primary);">핵심 역량 레이더</h2>
2129
- <p class="domain-subtitle" style="color: var(--text-primary);">6가지 필수 핵심 요소(성공, 실행, 추론, 강건성, 효율성, 호출 유효성)를 추적합니다.</p>
2130
  </div>
2131
  """)
2132
-
2133
- gr.HTML("<p style='color: var(--text-primary); margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>비교할 모델을 선택하세요. 최대 5개까지 가능합니다.</p>")
2134
- # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>모델은 최대 5개까지 선택 가능 합니다.</p>")
2135
- model_selector = gr.Dropdown(
2136
- choices=initial_df['Model'].tolist()[:10],
2137
- value=initial_df['Model'].tolist()[:5],
2138
- multiselect=True,
2139
- label="",
2140
- info=None,
2141
- container=False,
2142
- )
 
 
 
 
 
 
2143
 
2144
  # Radar chart plot - wrapped in centered container
2145
  gr.HTML('<div class="chart-container radar-chart-container">')
@@ -2155,29 +2036,292 @@ def create_leaderboard_v2_tab():
2155
 
2156
  gr.HTML("</div>")
2157
 
2158
-
2159
- # Define generate_performance_card function before using it
2160
- def generate_performance_card(model_name):
2161
- """Generate HTML for the model performance card"""
2162
- if not model_name:
2163
- return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
2164
- Please select a model to generate its performance card
2165
- </div>"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2166
 
2167
- # Get model data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2168
  df = load_leaderboard_data()
2169
- model_data = df[df['Model'] == model_name]
 
2170
 
2171
- if model_data.empty:
2172
- return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
2173
- Model not found in the database
2174
- </div>"""
2175
 
2176
- row = model_data.iloc[0]
 
 
 
 
 
 
2177
 
2178
- # Get overall rank based on overall success
2179
- df_with_success = df.copy()
2180
- df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2181
  df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
2182
  df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
2183
  try:
@@ -2308,22 +2452,18 @@ def create_leaderboard_v2_tab():
2308
  gr.HTML("""
2309
  <div class="domain-selector-container performance-card-container">
2310
  <div class="domain-header">
2311
- <h2 class="domain-title" style="color: var(--text-primary);">모델 성능 카드</h2>
2312
- <p class="domain-subtitle" style="color: var(--text-primary);">
2313
- 모델의 성능 스펙트럼을 6대 핵심 지표와 L1~L7 단계별 종합 성공률(SR)로 시각화한 정밀 분석 카드를 확인해보세요.
2314
- </p>
2315
- <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
2316
- ※ Rank는 L1~L7 단계별 SR의 평균값을 기준으로 선정되었습니다.
2317
- </p>
2318
  </div>
2319
-
2320
  <div class="performance-card-content">
2321
  """)
2322
-
2323
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2324
  gr.HTML("""
2325
- <p class="domain-subtitle" style="color: var(--text-primary);">분석 카드를 생성할 모델을 선택하세요.</p>
2326
-
 
 
2327
  """)
2328
  card_model_selector = gr.Dropdown(
2329
  choices=initial_df['Model'].tolist(),
@@ -2331,10 +2471,10 @@ def create_leaderboard_v2_tab():
2331
  label="",
2332
  info=None,
2333
  container=False,
2334
- # elem_classes=["model-dropdown"]
2335
  )
2336
  download_card_btn = gr.Button(
2337
- "PNG로 다운로드",
2338
  elem_id="download-card-btn",
2339
  elem_classes=["pill-button"]
2340
  )
@@ -2353,275 +2493,6 @@ def create_leaderboard_v2_tab():
2353
  </div>
2354
  </div>
2355
  """)
2356
-
2357
-
2358
- # Level metric breakdown section
2359
- gr.HTML("""
2360
- <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2361
- <div class="domain-header">
2362
- <h2 class="domain-title" style="color: var(--text-primary);">레벨별 상세 지표</h2>
2363
- <p class="domain-subtitle" style="color: var(--text-primary);">각 Ko-AgentBench 단계별 고유 평가 지표를 통해 모델 점수를 비교하고 더 자세히 살펴보세요.</p>
2364
- </div>
2365
- """)
2366
-
2367
- with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2368
- level_metric_selector = gr.Dropdown(
2369
- choices=level_ids,
2370
- value=level_ids[0] if level_ids else None,
2371
- multiselect=False,
2372
- label="",
2373
- info=None,
2374
- container=False,
2375
- elem_classes=["level-dropdown"]
2376
- )
2377
- level_model_selector = gr.Dropdown(
2378
- choices=initial_level_model_choices,
2379
- value=initial_level_model_values,
2380
- multiselect=True,
2381
- label="",
2382
- info=None,
2383
- container=False,
2384
- elem_classes=["model-dropdown", "level-model-dropdown"]
2385
- )
2386
-
2387
- gr.HTML('<div class="chart-container level-metric-chart-container">')
2388
- level_metric_chart = gr.Plot(
2389
- label="",
2390
- value=initial_level_metric_chart,
2391
- elem_classes=["level-metric-plot", "plot-container"]
2392
- )
2393
- gr.HTML("""
2394
- </div>
2395
- </div>
2396
- """)
2397
-
2398
- # # Heatmap section
2399
- # gr.HTML("""
2400
- # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2401
- # <div class="domain-header">
2402
- # <h2 class="domain-title" style="color: var(--text-primary);">종합 성능 히트맵</h2>
2403
- # <p class="domain-subtitle" style="color: var(--text-primary);">각 모델의 L1~L7 Ko-AgentBench SR(성공률) 점수를 한눈에 보세요.</p>
2404
- # </div>
2405
- # <div class="chart-container heatmap-chart-container">
2406
- # """)
2407
- # heatmap_chart = gr.Plot(
2408
- # label="",
2409
- # value=initial_heatmap,
2410
- # elem_classes=["heatmap-plot", "plot-container"]
2411
- # )
2412
- # gr.HTML("""
2413
- # </div>
2414
- # </div>
2415
- # """)
2416
-
2417
- # Update functions
2418
- def get_optimal_sort_order(sort_by_value):
2419
- """Return the optimal sort order for a given metric"""
2420
- # Metrics where higher is better (descending)
2421
- descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
2422
-
2423
- # Metrics where lower is better (ascending)
2424
- ascending_metrics = []
2425
-
2426
- if sort_by_value in descending_metrics:
2427
- return "Descending"
2428
- elif sort_by_value in ascending_metrics:
2429
- return "Ascending"
2430
- else:
2431
- return "Descending" # Default fallback
2432
-
2433
-
2434
-
2435
- def update_table(level_filter, model_type_filter, sort_order):
2436
- title_html = update_leaderboard_title(level_filter)
2437
- sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
2438
- table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
2439
- return title_html, table_html
2440
-
2441
- def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2442
- # Get filtered dataframe
2443
- df = load_leaderboard_data()
2444
- sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2445
- filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2446
-
2447
- # Update model selector choices based on filtered data
2448
- available_models_all = filtered_df['Model'].tolist()
2449
- available_models = available_models_all[:15] # Top 15 from filtered results
2450
-
2451
- # If selected models are not in available models, reset to top 5
2452
- if selected_models:
2453
- valid_selected = [m for m in selected_models if m in available_models]
2454
- # Check if more than 5 models are selected and show alert
2455
- if len(valid_selected) > 5:
2456
- gr.Warning("최대 5개 까지만 선택 가능합니다")
2457
- # Remove the last selected item (6th item) instead of keeping first 5
2458
- valid_selected = valid_selected[:-1]
2459
- if not valid_selected:
2460
- valid_selected = available_models[:5]
2461
- else:
2462
- valid_selected = available_models[:5]
2463
-
2464
- # Create radar chart
2465
- chart = create_domain_radar_chart(filtered_df, valid_selected)
2466
-
2467
- # Prepare heatmap order prioritizing selected models
2468
-
2469
-
2470
- # Level metric chart
2471
- effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2472
- available_level_models = available_models_all
2473
- if level_selected_models:
2474
- valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
2475
- if not valid_level_models:
2476
- valid_level_models = available_level_models[:5]
2477
- else:
2478
- valid_level_models = available_level_models[:5]
2479
- level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2480
-
2481
- return (
2482
- gr.Dropdown(
2483
- choices=available_models,
2484
- value=valid_selected,
2485
- multiselect=True,
2486
- label="",
2487
- info=None,
2488
- container=False,
2489
- # elem_classes=["model-dropdown"]
2490
- ),
2491
- chart,
2492
- gr.Dropdown(
2493
- choices=available_level_models,
2494
- value=valid_level_models,
2495
- multiselect=True,
2496
- label="",
2497
- info=None,
2498
- container=False,
2499
- elem_classes=["model-dropdown", "level-model-dropdown"]
2500
- ),
2501
- level_metric_fig,
2502
- )
2503
-
2504
- def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2505
- # Get filtered dataframe
2506
- df = load_leaderboard_data()
2507
- sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2508
- filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2509
-
2510
- available_models_all = filtered_df['Model'].tolist()
2511
- if selected_models:
2512
- valid_selected = [m for m in selected_models if m in available_models_all]
2513
- # Check if more than 5 models are selected and show alert
2514
- if len(valid_selected) > 5:
2515
- # JavaScript alert for exceeding 5 models
2516
- gr.Warning("최대 5개 까지만 선택 가능합니다")
2517
- # Remove the last selected item (6th item) instead of keeping first 5
2518
- valid_selected = valid_selected[:-1]
2519
- if not valid_selected:
2520
- valid_selected = available_models_all[:5]
2521
- else:
2522
- valid_selected = available_models_all[:5]
2523
-
2524
-
2525
-
2526
- effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2527
- available_level_models = available_models_all
2528
- if level_selected_models:
2529
- valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
2530
- if not valid_level_models:
2531
- valid_level_models = available_level_models[:5]
2532
- else:
2533
- valid_level_models = available_level_models[:5]
2534
- level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2535
-
2536
- return (
2537
- gr.Dropdown(
2538
- choices=available_models_all[:15],
2539
- value=valid_selected,
2540
- multiselect=True,
2541
- label="",
2542
- info=None,
2543
- container=False,
2544
- ),
2545
- create_domain_radar_chart(filtered_df, valid_selected),
2546
- gr.Dropdown(
2547
- choices=available_level_models,
2548
- value=valid_level_models,
2549
- multiselect=True,
2550
- label="",
2551
- info=None,
2552
- container=False,
2553
- elem_classes=["model-dropdown", "level-model-dropdown"]
2554
- ),
2555
- level_metric_fig,
2556
- )
2557
-
2558
- def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2559
- df = load_leaderboard_data()
2560
- sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2561
- filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2562
- available_models = filtered_df['Model'].tolist()
2563
- if level_selected_models:
2564
- valid_level_models = [m for m in level_selected_models if m in available_models]
2565
- # Check if more than 5 models are selected and show alert
2566
- if len(valid_level_models) > 5:
2567
- gr.Warning("최대 5개 까지만 선택 가능합니다")
2568
- # Remove the last selected item (6th item) instead of keeping first 5
2569
- valid_level_models = valid_level_models[:-1]
2570
- if not valid_level_models:
2571
- valid_level_models = available_models[:5]
2572
- else:
2573
- valid_level_models = available_models[:5]
2574
- effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2575
- level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2576
- return (
2577
- gr.Dropdown(
2578
- choices=available_models,
2579
- value=valid_level_models,
2580
- multiselect=True,
2581
- label="",
2582
- info=None,
2583
- container=False,
2584
- elem_classes=["model-dropdown", "level-model-dropdown"]
2585
- ),
2586
- level_chart,
2587
- )
2588
-
2589
- # Update table when filters change
2590
- filter_inputs = [domain_filter, model_type_filter, sort_order]
2591
-
2592
- for input_component in filter_inputs:
2593
- input_component.change(
2594
- fn=update_table,
2595
- inputs=filter_inputs,
2596
- outputs=[leaderboard_title, leaderboard_table]
2597
- )
2598
-
2599
- # Also update radar chart when filters change
2600
- input_component.change(
2601
- fn=update_radar_chart,
2602
- inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2603
- outputs=[model_selector, radar_chart, level_model_selector, level_metric_chart]
2604
- )
2605
-
2606
- # Update radar chart when model selection changes
2607
- model_selector.change(
2608
- fn=update_radar_only,
2609
- inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2610
- outputs=[model_selector, radar_chart, level_model_selector, level_metric_chart]
2611
- )
2612
-
2613
- level_metric_selector.change(
2614
- fn=update_level_metric_only,
2615
- inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2616
- outputs=[level_model_selector, level_metric_chart]
2617
- )
2618
-
2619
- level_model_selector.change(
2620
- fn=update_level_metric_only,
2621
- inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2622
- outputs=[level_model_selector, level_metric_chart]
2623
- )
2624
-
2625
 
2626
  # Add custom CSS for the performance card
2627
  gr.HTML("""
@@ -2844,8 +2715,8 @@ def create_leaderboard_v2_tab():
2844
  .level-dropdown select,
2845
  .level-dropdown [role="combobox"],
2846
  .level-dropdown button {
2847
- background: #000000 !important;
2848
- border: 1px solid #333333 !important;
2849
  border-radius: 999px !important;
2850
  padding: 12px 20px !important;
2851
  color: var(--text-primary) !important;
@@ -2855,7 +2726,7 @@ def create_leaderboard_v2_tab():
2855
  text-align: center !important;
2856
  min-height: 46px !important;
2857
  transition: all 0.3s ease !important;
2858
- box-shadow: 0 10px 24px rgba(0, 0, 0, 0.3) !important;
2859
  }
2860
 
2861
  .level-dropdown select:hover,
@@ -2872,14 +2743,6 @@ def create_leaderboard_v2_tab():
2872
  margin: 12px auto 0 !important;
2873
  }
2874
 
2875
- .level-model-dropdown select,
2876
- .level-model-dropdown [role="combobox"],
2877
- .level-model-dropdown button {
2878
- background: #000000 !important;
2879
- border: 1px solid #333333 !important;
2880
- color: var(--text-primary) !important;
2881
- }
2882
-
2883
  .radar-placeholder {
2884
  display: flex;
2885
  flex-direction: column;
@@ -3032,74 +2895,6 @@ def create_leaderboard_v2_tab():
3032
  }
3033
  }
3034
 
3035
- /* 폰트 강제 적용 - 최종 우선순위 */
3036
- .dashboard-section,
3037
- .dashboard-section *,
3038
- .dashboard-section h2,
3039
- .dashboard-section h3,
3040
- .dashboard-section p,
3041
- .dashboard-section li,
3042
- .section-lead,
3043
- .section-subtitle,
3044
- .phase-card h3,
3045
- .phase-list li,
3046
- .scenario-body p,
3047
- .criteria-card h3,
3048
- .criteria-card ul,
3049
- .criteria-card li {
3050
- font-family: "Nanum Gothic", sans-serif !important;
3051
- }
3052
-
3053
- /* section-title 강제 적용 */
3054
- .section-title,
3055
- h2.section-title,
3056
- .dashboard-section .section-title,
3057
- .section-header .section-title {
3058
- font-family: "Nanum Gothic", sans-serif !important;
3059
- }
3060
-
3061
- .domain-title,
3062
- h2.domain-title,
3063
- .domain-header .domain-title {
3064
- font-family: "Nanum Gothic", sans-serif !important;
3065
- }
3066
-
3067
- .hero-title,
3068
- .hero-subtitle,
3069
- h1.hero-title,
3070
- p.hero-subtitle {
3071
- font-family: "Nanum Gothic", sans-serif !important;
3072
- font-size: 2rem; !important;
3073
- }
3074
-
3075
- /* hero-title 크기 강제 적용 */
3076
- .hero-title,
3077
- h1.hero-title {
3078
- font-size: 4rem !important;
3079
- }
3080
-
3081
- .phase-chart span,
3082
- .phase-card .phase-chart span,
3083
- .phase-grid .phase-chart span {
3084
- font-family: "Nanum Gothic", sans-serif !important;
3085
- font-size: 1.2rem !important;
3086
- }
3087
-
3088
- .section-lead, .section-subtitle {
3089
- font-size: 1.32rem !important;
3090
- font-family: "Nanum Gothic", sans-serif !important;
3091
- }
3092
-
3093
- .phase-card h3 {
3094
- font-size: 1.44rem !important;
3095
- font-family: "Nanum Gothic", sans-serif !important;
3096
- }
3097
-
3098
- .phase-list li {
3099
- font-size: 1.08rem !important;
3100
- font-family: "Nanum Gothic", sans-serif !important;
3101
- }
3102
-
3103
  </style>
3104
 
3105
  """)
@@ -3207,7 +3002,7 @@ def create_leaderboard_v2_tab():
3207
  label="",
3208
  info=None,
3209
  container=False,
3210
- # elem_classes=["model-dropdown"]
3211
  )
3212
 
3213
  input_component.change(
@@ -3262,8 +3057,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3262
  palette = [
3263
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3264
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3265
- {'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
3266
- {'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
3267
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3268
  ]
3269
 
@@ -3387,8 +3182,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3387
  height=800,
3388
  width=900,
3389
  margin=dict(t=30, b=50, l=10, r=10),
3390
- autosize=True,
3391
- annotations=[]
3392
  )
3393
 
3394
  return fig
@@ -3647,8 +3441,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
3647
  model_palette = [
3648
  '#ffd21e',
3649
  '#FF8A3C',
3650
- '#A16207',
3651
- '#DC2626',
3652
  '#F8FAFC',
3653
  '#38BDF8',
3654
  ]
@@ -3686,7 +3480,7 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
3686
  paper_bgcolor="#01091A",
3687
  plot_bgcolor="rgba(245, 246, 247, 0.02)",
3688
  height=plot_height,
3689
- autosize=True,
3690
  margin=dict(t=90, b=80, l=220, r=160),
3691
  legend=dict(
3692
  orientation="h",
@@ -3738,7 +3532,7 @@ def create_empty_level_metric_chart(message):
3738
  paper_bgcolor="#01091A",
3739
  plot_bgcolor="rgba(245, 246, 247, 0.02)",
3740
  height=420,
3741
- autosize=True,
3742
  margin=dict(t=80, b=60, l=80, r=120),
3743
  title=dict(
3744
  text="<b>Level Metric Breakdown</b>",
 
53
 
54
  # Clean and prepare data
55
  df = df.copy()
56
+ numeric_candidate_cols = [col for col in df.columns if col not in ('Model', 'Vendor')]
 
57
  for col in numeric_candidate_cols:
58
  df[col] = pd.to_numeric(df[col], errors='coerce')
59
 
 
118
  df['Call Validity'] = df[epr_cols].mean(axis=1)
119
 
120
  # Use LLM Type from CSV directly, with mapping to display names
121
+ if 'LLM Type' in df.columns:
122
+ # Clean the LLM Type column to remove any whitespace
123
+ df['LLM Type'] = df['LLM Type'].astype(str).str.strip()
124
+
125
+ # Map LLM Type to Model Type
126
+ def map_llm_type(llm_type):
127
+ if llm_type.upper() == "OSS":
128
+ return "Open source"
129
+ else:
130
+ return "Proprietary"
131
+
132
+ df['Model Type'] = df['LLM Type'].apply(map_llm_type)
 
 
 
 
 
 
 
133
  else:
134
+ # Fallback to vendor mapping if LLM Type column doesn't exist
135
+ vendor_model_type_map = {
136
+ "OpenAI": "Proprietary",
137
+ "Anthropic": "Proprietary",
138
+ "Google": "Proprietary",
139
+ "Microsoft": "Proprietary",
140
+ "Mistral": "Proprietary",
141
+ "Databricks": "Open source",
142
+ "Meta": "Open source",
143
+ "Alibaba": "Open source",
144
+ "알리바바": "Open source", # Korean name for Alibaba
145
+ "Kakao": "Open source",
146
+ "SKT": "Open source",
147
+ "KT": "Open source",
148
+ "xAI": "Proprietary",
149
+ }
150
+ df['Model Type'] = df['Vendor'].map(vendor_model_type_map).fillna('Proprietary')
 
 
151
 
152
  # Round numeric columns for better display
153
  round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
 
224
  # Level metadata for the 7-stage task framework
225
  level_details = {
226
  "ALL": {
227
+ "title": "ALL · 전체 태스크",
228
+ "description": "7개의 태스크 전반의 평균 성능을 한눈에 살펴보고 각 레벨 비교를 위한 기준점을 제공합니다."
229
  },
230
  "L1": {
231
+ "title": "<span style='color: white;'>L1 · 단일 도구 실행</span>",
232
+ "description": "<span style='color: white;'>단일 도구 실행 능력과 기본적인 명령 수행 정확도를 평가합니다.</span>"
233
  },
234
  "L2": {
235
+ "title": "<span style='color: white;'>L2 · 도구 선택 능력</span>",
236
+ "description": "<span style='color: white;'>요구 사항에 맞는 도구를 고르고 적절한 파라미터로 호출하는 능력을 측정합니다.</span>"
237
  },
238
  "L3": {
239
+ "title": "<span style='color: white;'>L3 · 순차적 추론 (Chaining)</span>",
240
+ "description": "<span style='color: white;'>복수 단계의 순차적 reasoning을 통해 문제를 해결하는 과정을 검증합니다.</span>"
241
  },
242
  "L4": {
243
+ "title": "<span style='color: white;'>L4 · 병렬적 추론 (Aggregation)</span>",
244
+ "description": "<span style='color: white;'>여러 소스의 정보를 병렬적으로 통합하고 요약하는 능력을 평가합니다.</span>"
245
  },
246
  "L5": {
247
+ "title": "<span style='color: white;'>L5 · 강건성 (Robustness / Fallback)</span>",
248
+ "description": "<span style='color: white;'>예상치 못한 오류나 실패 상황에 대한 인지와 대응 전략을 확인합니다.</span>"
249
  },
250
  "L6": {
251
+ "title": "<span style='color: white;'>L6 · 효율성 (Efficiency)</span>",
252
+ "description": "<span style='color: white;'>최소한의 호출과 비용으로 목표를 달성하는 운영 효율을 살펴봅니다.</span>"
253
  },
254
  "L7": {
255
+ "title": "<span style='color: white;'>L7 · 장기 컨텍스트 기억 (Contextual Memory)</span>",
256
+ "description": "<span style='color: white;'>장기 대화 맥락을 유지하고 적절히 활용하는 능력을 집중적으로 분석합니다.</span>"
257
  }
258
  }
259
  default_level = "ALL"
 
291
  border-collapse: collapse;
292
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
293
  background: var(--bg-card);
294
+ color: white;
295
  }
296
 
297
  .v2-styled-table thead {
 
305
  padding: 14px 12px;
306
  text-align: left;
307
  font-weight: 600;
308
+ color: white;
309
  border-bottom: 2px solid var(--accent-primary);
310
  font-size: 13px;
311
  text-transform: uppercase;
 
319
  .v2-styled-table td {
320
  padding: 12px;
321
  border-bottom: 1px solid var(--border-subtle);
322
+ color: white;
323
  transition: all 0.2s ease;
324
  }
325
 
 
339
 
340
  .model-name {
341
  font-weight: 500;
342
+ color: white;
343
  transition: color 0.2s ease;
344
  }
345
 
346
  /* Keep model name color consistent on hover to emphasize row highlight */
347
  .v2-styled-table tr:hover .model-name {
348
+ color: white;
349
  }
350
 
351
  .numeric-cell {
352
  font-family: 'Geist Mono', monospace;
353
  font-size: 13px;
354
  text-align: center;
355
+ color: white;
356
  }
357
 
358
  .highlight-header {
359
  background: rgba(255, 210, 30, 0.14);
360
+ color: white;
361
  }
362
 
363
  .highlight-cell {
364
  background: rgba(255, 210, 30, 0.08);
365
+ color: white;
366
  font-weight: 600;
367
  }
368
  </style>
 
460
  return f"""
461
  <div class="domain-selector-container leaderboard-intro">
462
  <div class="domain-header">
463
+ <h2 class="domain-title" style="color: white;">Agent Leaderboard · {level_title}</h2>
464
+ <p class="domain-subtitle" style="color: white;">{level_description}</p>
465
  </div>
466
  <div class="dataframe-container">
467
  """
 
511
  # Load initial data
512
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
513
  initial_df = load_leaderboard_data() # Load raw data for model selector
 
 
 
 
 
 
 
 
514
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
515
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
516
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
 
736
  # Header styles and navigation
737
  gr.HTML("""
738
  <style>
 
 
739
  /* Enhanced button styling with better gradio compatibility */
740
  .header-action-button {
741
  display: inline-block !important;
742
  padding: 14px 28px !important;
743
  background: #ffd21e !important;
744
+ color: #FFFFFF !important;
745
  text-decoration: none !important;
746
  border-radius: 16px !important;
747
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
 
775
  transform: translateY(-3px) !important;
776
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
777
  background: #ffd21e !important;
778
+ color: #FFFFFF !important;
779
  text-decoration: none !important;
780
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
781
  }
 
790
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
791
  }
792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  #hero-banner {
794
+ max-width: 960px;
795
+ margin: 0 auto 20px auto;
796
+ border-radius: 16px;
797
+ overflow: hidden;
798
+ box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25);
799
  }
800
+
801
  #hero-banner img {
802
+ width: 100%;
803
+ height: auto;
804
+ display: block;
 
805
  }
806
 
807
  .hero-title {
808
+ font-size: 5rem;
809
  font-weight: 800;
810
  line-height: 1.1;
811
  background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
812
  -webkit-background-clip: text;
813
  -webkit-text-fill-color: transparent;
814
  margin-bottom: 1rem;
 
815
  }
816
 
817
  .hero-subtitle {
818
  color: var(--text-secondary);
819
+ font-size: 1.25rem;
820
+ font-family: 'Geist', sans-serif;
821
  margin-top: 0;
822
  }
823
 
 
876
  box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
877
  backdrop-filter: blur(12px);
878
  -webkit-backdrop-filter: blur(12px);
 
879
  }
880
 
881
  .dashboard-section.emphasized {
 
895
  }
896
 
897
  .section-title {
898
+ font-size: 2.2rem;
899
  font-weight: 700;
900
  color: var(--text-primary);
901
  margin-bottom: 12px;
902
  text-align: center !important;
 
903
  }
904
 
905
  .section-lead, .section-subtitle {
906
+ font-size: 1.1rem;
907
  color: var(--text-secondary);
908
  max-width: 720px;
909
  margin: 0 auto 24px auto;
 
912
  word-break: keep-all;
913
  white-space: normal;
914
  display: block;
 
915
  }
916
 
917
  .phase-grid {
 
929
  }
930
 
931
  .phase-card h3 {
932
+ font-size: 1.5rem;
933
  color: var(--text-primary);
934
  margin-bottom: 20px;
935
  font-weight: 700;
 
936
  }
937
 
938
  .phase-chart {
 
960
 
961
  .phase-chart span {
962
  position: relative;
963
+ font-size: 1.5rem;
964
  font-weight: 700;
965
+ color: var(--text-primary);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
966
  }
967
 
 
968
  .phase-list {
969
  list-style: none;
970
  padding: 0;
 
979
  background: rgba(245, 246, 247, 0.05);
980
  border: 1px solid rgba(245, 246, 247, 0.08);
981
  color: var(--text-secondary);
982
+ font-size: 0.95rem;
 
983
  }
984
 
985
  .scenario-body {
 
1042
  /* Responsive design */
1043
  @media (max-width: 768px) {
1044
  .hero-title {
1045
+ font-size: 3rem;
1046
  }
1047
  .hero-action-button {
1048
  width: 100% !important;
 
1066
  gap: 8px;
1067
  }
1068
  .section-title {
1069
+ font-size: 1.8rem;
1070
  }
1071
  .phase-chart {
1072
  width: 100px;
 
1080
  </style>
1081
  """)
1082
 
 
1083
  gr.Image(
1084
+ value="banner.png",
1085
  show_label=False,
1086
  interactive=False,
1087
  type="filepath",
1088
  elem_id="hero-banner"
1089
  )
 
1090
 
1091
  gr.HTML("""
1092
  <div style="text-align: center; padding: 20px 0;">
 
1099
  gr.HTML("""
1100
  <div class="hero-actions">
1101
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1102
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1103
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1104
  <line x1="8" y1="12" x2="16" y2="12"/>
1105
  </svg>
1106
+ <span>Blog</span>
1107
  </a>
1108
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1109
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1110
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1111
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1112
  </svg>
1113
  <span>GitHub</span>
1114
  </a>
1115
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1116
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1117
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1118
  <polyline points="7 10 12 15 17 10"/>
1119
  <line x1="12" y1="15" x2="12" y2="3"/>
1120
  </svg>
1121
+ <span>Dataset</span>
1122
  </a>
1123
+ <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1124
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1125
  <path d="M3 3v18h18"/>
1126
  <path d="M7 17v-6"/>
1127
  <path d="M12 17V7"/>
1128
  <path d="M17 17v-3"/>
1129
  </svg>
1130
+ <span>Metrics</span>
1131
  </a>
1132
  </div>
1133
  """)
 
1136
  gr.HTML("""
1137
  <div class="dashboard-section">
1138
  <div class="section-header">
1139
+ <h2 class="section-title">단계별 태스크 설계</h2>
1140
  </div>
1141
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">단순 도구 호출부터 장기적 맥락 능력, 강건성 처리 능력까지 에이전트의 능력을 7단계로 입체적으로 분석하였습니다.</p>
1142
  <div class="phase-grid">
1143
  <div class="phase-card">
1144
+ <h3>Single-Turn</h3>
1145
  <div class="phase-chart" style="--progress:80%;">
1146
+ <span>80%</span>
1147
  </div>
1148
  <ul class="phase-list">
1149
+ <li style="color: white;">L1: 단일 도구 실행</li>
1150
+ <li style="color: white;">L2: 도구 선택 능력</li>
1151
+ <li style="color: white;">L3: 순차적 reasoning (Chaining)</li>
1152
+ <li style="color: white;">L4: 병렬적 reasoning (Aggregation)</li>
1153
+ <li style="color: white;">L5: 강건성 (Robustness / Fallback)</li>
1154
  </ul>
1155
  </div>
1156
  <div class="phase-card">
1157
+ <h3>Multi-Turn</h3>
1158
  <div class="phase-chart" style="--progress:20%;">
1159
+ <span>20%</span>
1160
  </div>
1161
  <ul class="phase-list">
1162
+ <li style="color: white;">L6: 효율성 (Efficiency)</li>
1163
+ <li style="color: white;">L7: 장기 컨텍스트 기억 (Contextual Memory)</li>
1164
  </ul>
1165
  </div>
1166
  </div>
 
1171
  gr.HTML("""
1172
  <div class="dashboard-section emphasized">
1173
  <div class="section-header">
1174
+ <h2 class="section-title">18가지 한국형 API 사용 및 실생활 환경에 특화된 고품질 시나리오 구성</h2>
1175
  </div>
1176
  <div class="scenario-body">
1177
+ <p>네이버, 지도, 카카오, 웹사이트한국 실사용 환경 기반의 API를 기반으로 국내 사용자의 일상과 밀접한 '약속 예약', '블로그 후기 검색' 같은 현실적인 문제 해결 시나리오를 구현했습니다.</p>
1178
  </div>
 
 
1179
  <div class="section-flow">⌄</div>
1180
+ </div>
1181
  """)
1182
 
1183
  # Section 3: 핵심 평가 기준
1184
  gr.HTML("""
1185
  <div class="dashboard-section">
1186
  <div class="section-header">
1187
+ <h2 class="section-title">핵심 평가 기준</h2>
1188
  </div>
1189
  <div class="criteria-grid">
1190
  <div class="criteria-card">
 
1218
  # Domain filter section with enhanced styling
1219
  gr.HTML("""
1220
  <style>
 
 
1221
  /* Enhanced domain selector styling */
1222
  .domain-selector-container {
1223
  background: #ffd21e0d;
 
1320
  -webkit-background-clip: text;
1321
  background-clip: text;
1322
  -webkit-text-fill-color: transparent;
1323
+ text-shadow: 0 0 22px rgba(255, 210, 30, 0.65), 0 0 45px rgba(255, 210, 30, 0.4);
1324
+ filter: drop-shadow(0 0 16px rgba(255, 210, 30, 0.35));
1325
  letter-spacing: 0.02em;
1326
+ animation: title-shimmer 5s ease-in-out infinite;
 
1327
  }
1328
 
1329
  @keyframes title-shimmer {
 
1611
 
1612
  .model-dropdown select,
1613
  .model-dropdown [role="combobox"] {
1614
+ background: rgba(245, 246, 247, 0.06) !important;
1615
+ border: 1px solid var(--border-subtle) !important;
1616
  border-radius: 999px !important;
1617
  padding: 12px 24px !important;
1618
  color: var(--text-primary) !important;
 
1643
  gap: 8px !important;
1644
  width: 100% !important;
1645
  padding: 12px 24px !important;
1646
+ background: rgba(245, 246, 247, 0.06) !important;
1647
+ border: 1px solid var(--border-subtle) !important;
1648
  border-radius: 999px !important;
1649
  color: var(--text-primary) !important;
1650
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
 
1701
  background: #ffd21e !important;
1702
  border: 1px solid rgba(255, 210, 30, 0.6) !important;
1703
  border-radius: 999px !important;
1704
+ color: #FFFFFF !important;
1705
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1706
  font-weight: 600 !important;
1707
  font-size: 0.95rem !important;
 
1748
  font-size: 1.5rem;
1749
  margin-bottom: 4px;
1750
  display: block;
1751
+ filter: drop-shadow(0 0 10px currentColor);
1752
  }
1753
 
1754
  .domain-name {
 
1763
  top: 8px;
1764
  right: 8px;
1765
  background: var(--accent-primary);
1766
+ color: white;
1767
  font-size: 0.75rem;
1768
  padding: 2px 8px;
1769
  border-radius: 12px;
 
1935
  padding: 12px 20px !important;
1936
  font-size: 0.95rem !important;
1937
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1938
  </style>
1939
 
1940
  """)
1941
 
1942
  level_options = list(level_details.keys())
1943
 
1944
+ with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
1945
+ gr.HTML("""
1946
+ <div class="domain-header">
1947
+ <h2 class="domain-title" style="color: white;">🧠 Select Task Level</h2>
1948
+ <p class="domain-subtitle" style="color: white;">Ko-AgentBench의 ALL · L1~L7 단계별 에이전트 성능을 손쉽게 비교하세요.</p>
1949
+ </div>
1950
+ """)
1951
+ domain_filter = gr.Radio(
1952
+ choices=level_options,
1953
+ value=default_level,
1954
+ label="",
1955
+ interactive=True,
1956
+ container=False,
1957
+ elem_classes=["domain-radio"]
1958
+ )
1959
 
1960
+ # Filter controls with domain styling
1961
+ with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
1962
+ gr.HTML("""
1963
+ <div class="domain-header">
1964
+ <h2 class="domain-title" style="color: white;">🔍 Filters & Sorting</h2>
1965
+ <p class="domain-subtitle" style="color: white;">모델 접근 방식과 정렬 순서를 선택해 맞춤 뷰를 구성하세요.</p>
1966
+ </div>
1967
+ """)
1968
+ with gr.Row(elem_classes=["filters-sorting-row"]):
1969
+ with gr.Column(scale=1, elem_classes=["filter-group"]):
1970
+ with gr.Row(elem_classes=["filter-group-row"]):
1971
+ gr.HTML("<span class='filter-group-label' style='color: white;'>Model Access</span>")
1972
+ model_type_filter = gr.Radio(
1973
+ choices=["All", "OSS", "API"],
1974
+ value="All",
1975
+ label="",
1976
+ elem_classes=["domain-radio"],
1977
+ container=False
1978
+ )
1979
+ with gr.Column(scale=1, elem_classes=["filter-group"]):
1980
+ with gr.Row(elem_classes=["filter-group-row"]):
1981
+ gr.HTML("<span class='filter-group-label' style='color: white;'>Sort Order</span>")
1982
+ sort_order = gr.Radio(
1983
+ choices=["Descending", "Ascending"],
1984
+ value="Descending",
1985
+ label="",
1986
+ elem_classes=["domain-radio"],
1987
+ container=False
1988
+ )
1989
+
1990
+ # Main leaderboard table with dynamic title
1991
+ leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
1992
 
1993
  leaderboard_table = gr.HTML(initial_table)
1994
 
1995
+ gr.HTML("""
1996
+ </div>
1997
+ </div>""")
1998
+
1999
  # Radar Chart Section
2000
  gr.HTML("""
2001
  <div class="domain-selector-container domain-performance-container">
2002
  <div class="domain-header">
2003
+ <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
2004
+ <p class="domain-subtitle" style="color: white;">Track six essential pillars: Success, Execution, Reasoning, Robustness, Efficiency, and Call Validity.</p>
2005
  </div>
2006
  """)
2007
+
2008
+ with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
2009
+ gr.HTML("""
2010
+ <div class="domain-header">
2011
+ <h2 class="domain-title" style="color: white;">🎯 Select Models for Comparison</h2>
2012
+ <p class="domain-subtitle" style="color: white;">Choose up to 5 models to map on the capability radar.</p>
2013
+ </div>
2014
+ """)
2015
+ model_selector = gr.Dropdown(
2016
+ choices=initial_df['Model'].tolist()[:10],
2017
+ value=initial_df['Model'].tolist()[:5],
2018
+ multiselect=True,
2019
+ label="",
2020
+ info=None,
2021
+ container=False,
2022
+ elem_classes=["model-dropdown"]
2023
+ )
2024
 
2025
  # Radar chart plot - wrapped in centered container
2026
  gr.HTML('<div class="chart-container radar-chart-container">')
 
2036
 
2037
  gr.HTML("</div>")
2038
 
2039
+ # Level metric breakdown section
2040
+ gr.HTML("""
2041
+ <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2042
+ <div class="domain-header">
2043
+ <h2 class="domain-title" style="color: white;">Level-Specific Metric Spotlight</h2>
2044
+ <p class="domain-subtitle" style="color: white;">Dive deeper into each Ko-AgentBench stage and compare model scores across its unique evaluation metrics.</p>
2045
+ </div>
2046
+ """)
2047
+
2048
+ with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2049
+ gr.HTML("""
2050
+ <div class="domain-header">
2051
+ <h2 class="domain-title" style="color: white;">🧭 Select Task Level and Models</h2>
2052
+ <p class="domain-subtitle" style="color: white;">Choose a level and up to 5 models to explore their detailed SR-driven metrics.</p>
2053
+ </div>
2054
+ """)
2055
+ level_metric_selector = gr.Dropdown(
2056
+ choices=level_ids,
2057
+ value=level_ids[0] if level_ids else None,
2058
+ multiselect=False,
2059
+ label="",
2060
+ info=None,
2061
+ container=False,
2062
+ elem_classes=["level-dropdown"]
2063
+ )
2064
+ level_model_selector = gr.Dropdown(
2065
+ choices=initial_level_model_choices,
2066
+ value=initial_level_model_values,
2067
+ multiselect=True,
2068
+ label="",
2069
+ info=None,
2070
+ container=False,
2071
+ elem_classes=["model-dropdown", "level-model-dropdown"]
2072
+ )
2073
+
2074
+ gr.HTML('<div class="chart-container level-metric-chart-container">')
2075
+ level_metric_chart = gr.Plot(
2076
+ label="",
2077
+ value=initial_level_metric_chart,
2078
+ elem_classes=["level-metric-plot", "plot-container"]
2079
+ )
2080
+ gr.HTML("""
2081
+ </div>
2082
+ </div>
2083
+ """)
2084
+
2085
+ # Heatmap section
2086
+ gr.HTML("""
2087
+ <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2088
+ <div class="domain-header">
2089
+ <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
2090
+ <p class="domain-subtitle" style="color: white;">View Ko-AgentBench SR scores across L1~L7 for each model in a single glance.</p>
2091
+ </div>
2092
+ <div class="chart-container heatmap-chart-container">
2093
+ """)
2094
+ heatmap_chart = gr.Plot(
2095
+ label="",
2096
+ value=initial_heatmap,
2097
+ elem_classes=["heatmap-plot", "plot-container"]
2098
+ )
2099
+ gr.HTML("""
2100
+ </div>
2101
+ </div>
2102
+ """)
2103
+
2104
+ # Update functions
2105
+ def get_optimal_sort_order(sort_by_value):
2106
+ """Return the optimal sort order for a given metric"""
2107
+ # Metrics where higher is better (descending)
2108
+ descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
2109
 
2110
+ # Metrics where lower is better (ascending)
2111
+ ascending_metrics = []
2112
+
2113
+ if sort_by_value in descending_metrics:
2114
+ return "Descending"
2115
+ elif sort_by_value in ascending_metrics:
2116
+ return "Ascending"
2117
+ else:
2118
+ return "Descending" # Default fallback
2119
+
2120
+ def update_table(level_filter, model_type_filter, sort_order):
2121
+ title_html = update_leaderboard_title(level_filter)
2122
+ sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
2123
+ table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
2124
+ return title_html, table_html
2125
+
2126
+ def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2127
+ # Get filtered dataframe
2128
  df = load_leaderboard_data()
2129
+ sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2130
+ filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2131
 
2132
+ # Update model selector choices based on filtered data
2133
+ available_models_all = filtered_df['Model'].tolist()
2134
+ available_models = available_models_all[:15] # Top 15 from filtered results
 
2135
 
2136
+ # If selected models are not in available models, reset to top 5
2137
+ if selected_models:
2138
+ valid_selected = [m for m in selected_models if m in available_models]
2139
+ if not valid_selected:
2140
+ valid_selected = available_models[:5]
2141
+ else:
2142
+ valid_selected = available_models[:5]
2143
 
2144
+ # Create radar chart
2145
+ chart = create_domain_radar_chart(filtered_df, valid_selected)
2146
+
2147
+ # Prepare heatmap order prioritizing selected models
2148
+ heatmap_order = []
2149
+ for model in valid_selected:
2150
+ if model not in heatmap_order:
2151
+ heatmap_order.append(model)
2152
+ for model in available_models_all:
2153
+ if model not in heatmap_order:
2154
+ heatmap_order.append(model)
2155
+ heatmap_order = heatmap_order[:12]
2156
+ heatmap_fig = create_performance_heatmap(filtered_df, heatmap_order)
2157
+
2158
+ # Level metric chart
2159
+ effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2160
+ available_level_models = available_models_all
2161
+ if level_selected_models:
2162
+ valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
2163
+ if not valid_level_models:
2164
+ valid_level_models = available_level_models[:5]
2165
+ else:
2166
+ valid_level_models = available_level_models[:5]
2167
+ level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2168
+
2169
+ return (
2170
+ gr.Dropdown(
2171
+ choices=available_models,
2172
+ value=valid_selected,
2173
+ multiselect=True,
2174
+ label="",
2175
+ info=None,
2176
+ container=False,
2177
+ elem_classes=["model-dropdown"]
2178
+ ),
2179
+ chart,
2180
+ heatmap_fig,
2181
+ gr.Dropdown(
2182
+ choices=available_level_models,
2183
+ value=valid_level_models,
2184
+ multiselect=True,
2185
+ label="",
2186
+ info=None,
2187
+ container=False,
2188
+ elem_classes=["model-dropdown", "level-model-dropdown"]
2189
+ ),
2190
+ level_metric_fig,
2191
+ )
2192
+
2193
+ def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2194
+ # Get filtered dataframe
2195
+ df = load_leaderboard_data()
2196
+ sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2197
+ filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2198
+
2199
+ available_models_all = filtered_df['Model'].tolist()
2200
+ if selected_models:
2201
+ valid_selected = [m for m in selected_models if m in available_models_all]
2202
+ if not valid_selected:
2203
+ valid_selected = available_models_all[:5]
2204
+ else:
2205
+ valid_selected = available_models_all[:5]
2206
+
2207
+ heatmap_order = []
2208
+ for model in valid_selected:
2209
+ if model not in heatmap_order:
2210
+ heatmap_order.append(model)
2211
+ for model in available_models_all:
2212
+ if model not in heatmap_order:
2213
+ heatmap_order.append(model)
2214
+ heatmap_order = heatmap_order[:12]
2215
+
2216
+ effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2217
+ available_level_models = available_models_all
2218
+ if level_selected_models:
2219
+ valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
2220
+ if not valid_level_models:
2221
+ valid_level_models = available_level_models[:5]
2222
+ else:
2223
+ valid_level_models = available_level_models[:5]
2224
+ level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2225
+
2226
+ return (
2227
+ create_domain_radar_chart(filtered_df, valid_selected),
2228
+ create_performance_heatmap(filtered_df, heatmap_order),
2229
+ gr.Dropdown(
2230
+ choices=available_level_models,
2231
+ value=valid_level_models,
2232
+ multiselect=True,
2233
+ label="",
2234
+ info=None,
2235
+ container=False,
2236
+ elem_classes=["model-dropdown", "level-model-dropdown"]
2237
+ ),
2238
+ level_metric_fig,
2239
+ )
2240
+
2241
+ def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
2242
+ df = load_leaderboard_data()
2243
+ sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
2244
+ filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
2245
+ available_models = filtered_df['Model'].tolist()
2246
+ if level_selected_models:
2247
+ valid_level_models = [m for m in level_selected_models if m in available_models][:5]
2248
+ if not valid_level_models:
2249
+ valid_level_models = available_models[:5]
2250
+ else:
2251
+ valid_level_models = available_models[:5]
2252
+ effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
2253
+ level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
2254
+ return (
2255
+ gr.Dropdown(
2256
+ choices=available_models,
2257
+ value=valid_level_models,
2258
+ multiselect=True,
2259
+ label="",
2260
+ info=None,
2261
+ container=False,
2262
+ elem_classes=["model-dropdown", "level-model-dropdown"]
2263
+ ),
2264
+ level_chart,
2265
+ )
2266
+
2267
+ # Update table when filters change
2268
+ filter_inputs = [domain_filter, model_type_filter, sort_order]
2269
+
2270
+ for input_component in filter_inputs:
2271
+ input_component.change(
2272
+ fn=update_table,
2273
+ inputs=filter_inputs,
2274
+ outputs=[leaderboard_title, leaderboard_table]
2275
+ )
2276
+
2277
+ # Also update radar chart when filters change
2278
+ input_component.change(
2279
+ fn=update_radar_chart,
2280
+ inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2281
+ outputs=[model_selector, radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
2282
+ )
2283
+
2284
+ # Update radar chart when model selection changes
2285
+ model_selector.change(
2286
+ fn=update_radar_only,
2287
+ inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2288
+ outputs=[radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
2289
+ )
2290
+
2291
+ level_metric_selector.change(
2292
+ fn=update_level_metric_only,
2293
+ inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2294
+ outputs=[level_model_selector, level_metric_chart]
2295
+ )
2296
+
2297
+ level_model_selector.change(
2298
+ fn=update_level_metric_only,
2299
+ inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
2300
+ outputs=[level_model_selector, level_metric_chart]
2301
+ )
2302
+
2303
+ # Define generate_performance_card function before using it
2304
+ def generate_performance_card(model_name):
2305
+ """Generate HTML for the model performance card"""
2306
+ if not model_name:
2307
+ return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
2308
+ Please select a model to generate its performance card
2309
+ </div>"""
2310
+
2311
+ # Get model data
2312
+ df = load_leaderboard_data()
2313
+ model_data = df[df['Model'] == model_name]
2314
+
2315
+ if model_data.empty:
2316
+ return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
2317
+ Model not found in the database
2318
+ </div>"""
2319
+
2320
+ row = model_data.iloc[0]
2321
+
2322
+ # Get overall rank based on overall success
2323
+ df_with_success = df.copy()
2324
+ df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
2325
  df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
2326
  df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
2327
  try:
 
2452
  gr.HTML("""
2453
  <div class="domain-selector-container performance-card-container">
2454
  <div class="domain-header">
2455
+ <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
2456
+ <p class="domain-subtitle" style="color: white;">Comprehensive performance card for any model - perfect for presentations and reports</p>
 
 
 
 
 
2457
  </div>
 
2458
  <div class="performance-card-content">
2459
  """)
2460
+
2461
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2462
  gr.HTML("""
2463
+ <div class="domain-header">
2464
+ <h2 class="domain-title" style="color: white;">🤖 Select Model</h2>
2465
+ <p class="domain-subtitle" style="color: white;">비교할 모델을 선택하세요.</p>
2466
+ </div>
2467
  """)
2468
  card_model_selector = gr.Dropdown(
2469
  choices=initial_df['Model'].tolist(),
 
2471
  label="",
2472
  info=None,
2473
  container=False,
2474
+ elem_classes=["model-dropdown"]
2475
  )
2476
  download_card_btn = gr.Button(
2477
+ "Download Card as PNG",
2478
  elem_id="download-card-btn",
2479
  elem_classes=["pill-button"]
2480
  )
 
2493
  </div>
2494
  </div>
2495
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2496
 
2497
  # Add custom CSS for the performance card
2498
  gr.HTML("""
 
2715
  .level-dropdown select,
2716
  .level-dropdown [role="combobox"],
2717
  .level-dropdown button {
2718
+ background: rgba(245, 246, 247, 0.06) !important;
2719
+ border: 1px solid var(--border-subtle) !important;
2720
  border-radius: 999px !important;
2721
  padding: 12px 20px !important;
2722
  color: var(--text-primary) !important;
 
2726
  text-align: center !important;
2727
  min-height: 46px !important;
2728
  transition: all 0.3s ease !important;
2729
+ box-shadow: 0 10px 24px rgba(255, 210, 30, 0.15) !important;
2730
  }
2731
 
2732
  .level-dropdown select:hover,
 
2743
  margin: 12px auto 0 !important;
2744
  }
2745
 
 
 
 
 
 
 
 
 
2746
  .radar-placeholder {
2747
  display: flex;
2748
  flex-direction: column;
 
2895
  }
2896
  }
2897
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2898
  </style>
2899
 
2900
  """)
 
3002
  label="",
3003
  info=None,
3004
  container=False,
3005
+ elem_classes=["model-dropdown"]
3006
  )
3007
 
3008
  input_component.change(
 
3057
  palette = [
3058
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3059
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3060
+ {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
3061
+ {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
3062
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3063
  ]
3064
 
 
3182
  height=800,
3183
  width=900,
3184
  margin=dict(t=30, b=50, l=10, r=10),
3185
+ autosize=True
 
3186
  )
3187
 
3188
  return fig
 
3441
  model_palette = [
3442
  '#ffd21e',
3443
  '#FF8A3C',
3444
+ '#F970B9',
3445
+ '#8B5CF6',
3446
  '#F8FAFC',
3447
  '#38BDF8',
3448
  ]
 
3480
  paper_bgcolor="#01091A",
3481
  plot_bgcolor="rgba(245, 246, 247, 0.02)",
3482
  height=plot_height,
3483
+ width=1450,
3484
  margin=dict(t=90, b=80, l=220, r=160),
3485
  legend=dict(
3486
  orientation="h",
 
3532
  paper_bgcolor="#01091A",
3533
  plot_bgcolor="rgba(245, 246, 247, 0.02)",
3534
  height=420,
3535
+ width=1450,
3536
  margin=dict(t=80, b=60, l=80, r=120),
3537
  title=dict(
3538
  text="<b>Level Metric Breakdown</b>",
tabs/leaderboard_v1_en.py DELETED
The diff for this file is too large to render. See raw diff
 
utils.py CHANGED
@@ -9,8 +9,8 @@ def get_chart_colors():
9
  # "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
10
  # }
11
  return {
12
- "Private": "#593B1D", # rich brown for API
13
- "Open source": "#FACC15", # warm amber for OSS
14
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
15
  "text": "#111827",
16
  "background": "#FFFFFF",
@@ -20,12 +20,10 @@ def get_chart_colors():
20
 
21
  def get_rank_badge(rank):
22
  """Generate HTML for rank badge with appropriate styling"""
23
- tag_background = "#593B1D"
24
- tag_text_color = "#FFFFFF"
25
  badge_styles = {
26
- 1: ("1st", tag_background, tag_text_color),
27
- 2: ("2nd", tag_background, tag_text_color),
28
- 3: ("3rd", tag_background, tag_text_color),
29
  }
30
 
31
  if rank in badge_styles:
@@ -65,25 +63,24 @@ def get_type_badge(model_type):
65
  """Generate HTML for model type badge"""
66
  colors = get_chart_colors()
67
  color_map = {
68
- "Open source": colors.get("Open source", "#FACC15"),
69
- "Proprietary": colors.get("Private", "#593B1D"),
70
- "Private": colors.get("Private", "#593B1D"),
71
  }
72
  label_map = {
73
  "Open source": "OSS",
74
  "Proprietary": "API",
75
  "Private": "API",
76
  }
77
- bg_color = color_map.get(model_type, "#593B1D")
78
  display_label = label_map.get(model_type, model_type)
79
- text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
80
  return f"""
81
  <div style="
82
  display: inline-flex;
83
  align-items: center;
84
  padding: 4px 8px;
85
  background: {bg_color};
86
- color: {text_color};
87
  border-radius: 4px;
88
  font-size: 0.85em;
89
  font-weight: 500;
 
9
  # "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
10
  # }
11
  return {
12
+ "Private": "#3F78FA", # accent-blue light
13
+ "Open source": "#A13AE2", # accent-purple light
14
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
15
  "text": "#111827",
16
  "background": "#FFFFFF",
 
20
 
21
  def get_rank_badge(rank):
22
  """Generate HTML for rank badge with appropriate styling"""
 
 
23
  badge_styles = {
24
+ 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
25
+ 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
26
+ 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
27
  }
28
 
29
  if rank in badge_styles:
 
63
  """Generate HTML for model type badge"""
64
  colors = get_chart_colors()
65
  color_map = {
66
+ "Open source": colors.get("Open source", "#A13AE2"),
67
+ "Proprietary": colors.get("Private", "#3F78FA"),
68
+ "Private": colors.get("Private", "#3F78FA"),
69
  }
70
  label_map = {
71
  "Open source": "OSS",
72
  "Proprietary": "API",
73
  "Private": "API",
74
  }
75
+ bg_color = color_map.get(model_type, "#4F46E5")
76
  display_label = label_map.get(model_type, model_type)
 
77
  return f"""
78
  <div style="
79
  display: inline-flex;
80
  align-items: center;
81
  padding: 4px 8px;
82
  background: {bg_color};
83
+ color: white;
84
  border-radius: 4px;
85
  font-size: 0.85em;
86
  font-weight: 500;