diff --git "a/tabs/leaderboard_v1_en.py" "b/tabs/leaderboard_v1_en.py"
new file mode 100644--- /dev/null
+++ "b/tabs/leaderboard_v1_en.py"
@@ -0,0 +1,4114 @@
+"""
+Agent Leaderboard v1 - Main leaderboard interface
+Updated implementation with LLM Type support and optimized radar charts
+"""
+
+import base64
+import math
+import re
+from datetime import datetime
+from pathlib import Path
+
+import gradio as gr
+import pandas as pd
+import plotly.graph_objects as go
+
+# Import components and styles from modular files
+from components.leaderboard_components import (
+    get_chart_colors, get_rank_badge, get_type_badge, 
+    get_metric_tooltip, get_responsive_styles, get_faq_section
+)
+from styles.leaderboard_styles import get_leaderboard_css
+
+ASSET_ICON_PATH = Path("krew_icon.png")
+KREW_ICON_BASE64 = ""
+if ASSET_ICON_PATH.exists():
+    KREW_ICON_BASE64 = base64.b64encode(ASSET_ICON_PATH.read_bytes()).decode("utf-8")
+
+CSV_PATH = Path("combined_evaluation_summary.csv")
+if CSV_PATH.exists():
+    EVALUATION_DATE = datetime.fromtimestamp(CSV_PATH.stat().st_mtime).strftime("%Y-%m-%d")
+else:
+    EVALUATION_DATE = datetime.today().strftime("%Y-%m-%d")
+
+
+def create_leaderboard_v2_tab():
+    """Create the main leaderboard v1 tab with interactive table"""
+    token_to_cost_factor = 2e-6  # Rough cost per token ($2 per 1M tokens)
+    tokens_per_turn = 1000  # Approximate tokens exchanged per turn for scaling
+    level_ids = [f"L{i}" for i in range(1, 8)]
+    level_tsq_sources = {
+        "L1": "L1_ArgAcc",
+        "L2": "L2_SelectAcc",
+        "L3": "L3_PSM",
+        "L4": "L4_Coverage",
+        "L5": "L5_AdaptiveRoutingScore",
+        "L6": "L6_EffScore",
+        "L7": "L7_ContextRetention",
+    }
+    
+    def load_leaderboard_data():
+        """Load and prepare the leaderboard data"""
+        df = pd.read_csv('combined_evaluation_summary.csv')
+        
+        # Clean and prepare data
+        df = df.copy()
+        numeric_candidate_cols = [col for col in df.columns if col not in ('Model', 'Vendor')]
+        for col in numeric_candidate_cols:
+            df[col] = pd.to_numeric(df[col], errors='coerce')
+        
+        # Derive per-level helper columns for cost and turns
+        sr_columns = []
+        tsq_columns = []
+        duration_columns = []
+        cost_columns = []
+        turns_columns = []
+        
+        for level in level_ids:
+            sr_col = f"{level}_SR"
+            if sr_col in df.columns:
+                sr_columns.append(sr_col)
+                df[sr_col] = df[sr_col].round(3)
+            
+            tsq_source = level_tsq_sources.get(level)
+            if tsq_source and tsq_source in df.columns:
+                tsq_columns.append(tsq_source)
+            
+            duration_col = f"{level}_Avg_Exec_Time"
+            if duration_col in df.columns:
+                duration_columns.append(duration_col)
+            
+            token_col = f"{level}_Avg_Tokens"
+            if token_col in df.columns:
+                cost_col = f"{level}_Avg_Cost"
+                turns_col = f"{level}_Avg_Turns"
+                df[cost_col] = df[token_col] * token_to_cost_factor
+                df[turns_col] = df[token_col] / tokens_per_turn
+                cost_columns.append(cost_col)
+                turns_columns.append(turns_col)
+        
+        if sr_columns:
+            df['Avg AC'] = df[sr_columns].mean(axis=1)
+        if tsq_columns:
+            df['Avg TSQ'] = df[tsq_columns].mean(axis=1)
+        if cost_columns:
+            df['Avg Total Cost'] = df[cost_columns].mean(axis=1)
+        if duration_columns:
+            df['Avg Session Duration'] = df[duration_columns].mean(axis=1)
+        if turns_columns:
+            df['Avg Turns'] = df[turns_columns].mean(axis=1)
+        
+        # Derive core capability metrics for radar visualization
+        if sr_columns:
+            df['Overall Success'] = df[sr_columns].mean(axis=1)
+        execution_cols = [col for col in ['L1_CallEM', 'L1_ArgAcc', 'L2_SelectAcc'] if col in df.columns]
+        if execution_cols:
+            df['Execution Accuracy'] = df[execution_cols].mean(axis=1)
+        reasoning_cols = [col for col in ['L3_ProvAcc', 'L3_PSM', 'L4_Coverage'] if col in df.columns]
+        if reasoning_cols:
+            df['Complex Reasoning'] = df[reasoning_cols].mean(axis=1)
+        robustness_cols = [col for col in ['L5_AdaptiveRoutingScore', 'L5_FallbackSR'] if col in df.columns]
+        if robustness_cols:
+            df['Robustness'] = df[robustness_cols].mean(axis=1)
+        context_cols = [col for col in ['L6_ReuseRage', 'L6_EffScore', 'L7_ContextRetention'] if col in df.columns]
+        if context_cols:
+            df['Context & Efficiency'] = df[context_cols].mean(axis=1)
+        epr_cols = [f"L{i}_EPR_CVR" for i in range(1, 8) if f"L{i}_EPR_CVR" in df.columns]
+        if epr_cols:
+            df['Call Validity'] = df[epr_cols].mean(axis=1)
+        
+        # Use LLM Type from CSV directly, with mapping to display names
+        if 'LLM Type' in df.columns:
+            # Clean the LLM Type column to remove any whitespace
+            df['LLM Type'] = df['LLM Type'].astype(str).str.strip()
+            
+            # Map LLM Type to Model Type
+            def map_llm_type(llm_type):
+                if llm_type.upper() == "OSS":
+                    return "Open source"
+                else:
+                    return "Proprietary"
+            
+            df['Model Type'] = df['LLM Type'].apply(map_llm_type)
+        else:
+            # Fallback to vendor mapping if LLM Type column doesn't exist
+            vendor_model_type_map = {
+                "OpenAI": "Proprietary",
+                "Anthropic": "Proprietary", 
+                "Google": "Proprietary",
+                "Microsoft": "Proprietary",
+                "Mistral": "Proprietary",
+                "Databricks": "Open source",
+                "Meta": "Open source",
+                "Alibaba": "Open source",
+                "알리바바": "Open source",  # Korean name for Alibaba
+                "Kakao": "Open source",
+                "SKT": "Open source", 
+                "KT": "Open source",
+                "xAI": "Proprietary",
+            }
+            df['Model Type'] = df['Vendor'].map(vendor_model_type_map).fillna('Proprietary')
+        
+        # Round numeric columns for better display
+        round_three_cols = ['Avg AC', 'Avg TSQ', 'Avg Total Cost', 'Overall Success', 'Execution Accuracy',
+                            'Complex Reasoning', 'Robustness', 'Context & Efficiency', 'Call Validity']
+        round_one_cols = ['Avg Session Duration', 'Avg Turns']
+        for col in round_three_cols:
+            if col in df.columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce').round(3)
+        for col in round_one_cols:
+            if col in df.columns:
+                df[col] = pd.to_numeric(df[col], errors='coerce').round(1)
+        if cost_columns:
+            df[cost_columns] = df[cost_columns].apply(pd.to_numeric, errors='coerce').round(3)
+        if turns_columns:
+            df[turns_columns] = df[turns_columns].apply(pd.to_numeric, errors='coerce').round(2)
+        if duration_columns:
+            df[duration_columns] = df[duration_columns].apply(pd.to_numeric, errors='coerce').round(2)
+        
+        # Fill NaN values appropriately
+        df = df.fillna('')
+        
+        return df
+
+    def build_static_radar_chart(values, labels):
+        """Render a small static radar chart as inline SVG"""
+        if not values or all(v == 0 for v in values):
+            return """
+                <div class="radar-placeholder">
+                    <span>Radar Chart</span>
+                    <small>Execution Accuracy · Complex Reasoning · Robustness · Context & Efficiency · Overall Success · Validity</small>
+                </div>
+            """
+        size = 220
+        center = size / 2
+        radius = size * 0.38
+        n = len(values)
+        def point(v, idx, scale=1.0):
+            angle = (2 * math.pi * idx / n) - math.pi / 2
+            r = radius * v * scale
+            x = center + r * math.cos(angle)
+            y = center + r * math.sin(angle)
+            return x, y
+        polygon_points = " ".join(f"{x:.2f},{y:.2f}" for x, y in (point(v, i) for i, v in enumerate(values)))
+        ring_polygons = []
+        for step in (0.33, 0.66, 1.0):
+            ring_points = " ".join(f"{x:.2f},{y:.2f}" for x, y in (point(step, i) for i in range(n)))
+            opacity = 0.04 if step < 1.0 else 0.08
+            ring_polygons.append(f'<polygon points="{ring_points}" fill="rgba(245,246,247,{opacity})" stroke="rgba(148,163,184,0.35)" stroke-width="1" />')
+        axis_lines = "\n".join(
+            f'<line x1="{center:.2f}" y1="{center:.2f}" x2="{point(1, idx)[0]:.2f}" y2="{point(1, idx)[1]:.2f}" stroke="rgba(148,163,184,0.35)" stroke-width="1" />'
+            for idx in range(n)
+        )
+        label_spans = "\n".join(
+            f'<text x="{point(1.1, idx)[0]:.2f}" y="{point(1.1, idx)[1]:.2f}" text-anchor="middle" dominant-baseline="middle" font-size="9" fill="white">{label}</text>'
+            for idx, label in enumerate(labels)
+        )
+        svg = f"""
+        <svg width="{size}" height="{size}" viewBox="0 0 {size} {size}" xmlns="http://www.w3.org/2000/svg" role="img">
+            <defs>
+                <radialGradient id="radarGlow" cx="50%" cy="50%" r="50%">
+                    <stop offset="0%" stop-color="rgba(255,210,30,0.25)" />
+                    <stop offset="100%" stop-color="rgba(255,210,30,0.0)" />
+                </radialGradient>
+            </defs>
+            <rect width="{size}" height="{size}" fill="url(#radarGlow)" opacity="0.2" />
+            {''.join(ring_polygons)}
+            {axis_lines}
+            <polygon points="{polygon_points}" fill="rgba(255,210,30,0.35)" stroke="#ffd21e" stroke-width="2" />
+            {label_spans}
+        </svg>
+        """
+        return svg
+
+    # Level metadata for the 7-stage task framework
+    level_details = {
+        "ALL": {
+            "title": "ALL · All Tasks",
+            "description": "Compare overall performance levels and stage-specific strengths of models through average SR across L1~L7 levels."
+        },
+        "L1": {
+            "title": "<span style='color: white;'>>L1 · Single Tool Execution</span>",
+            "description": "<span style='color: white;'>Evaluates single tool execution capability and basic command performance accuracy.</span>"
+        },
+        "L2": {
+            "title": "<span style='color: white;'>L2 · Tool Selection Capability</span>",
+            "description": "<span style='color: white;'>Measures the ability to select appropriate tools and invoke them with proper parameters.</span>"
+        },
+        "L3": {
+            "title": "<span style='color: white;'>L3 · Sequential Reasoning</span>",
+            "description": "<span style='color: white;'>복수 단계의 순차적 추론을 통해 문제를 해결하는 과정을 검증합니다.</span>"
+        },
+        "L4": {
+            "title": "<span style='color: white;'>L4 · Parallel Reasoning</span>",
+            "description": "<span style='color: white;'>Evaluates the ability to integrate and summarize information from multiple sources in parallel.</span>"
+        },
+        "L5": {
+            "title": "<span style='color: white;'>L5 · Robustness (Robustness / Fallback)</span>",
+            "description": "<span style='color: white;'>Confirms recognition and response strategies for unexpected errors or failure situations.</span>"
+        },
+        "L6": {
+            "title": "<span style='color: white;'>L6 · Efficiency (Efficiency)</span>",
+            "description": "<span style='color: white;'>Examines operational efficiency in achieving goals with minimal calls and costs.</span>"
+        },
+        "L7": {
+            "title": "<span style='color: white;'>L7 · Long-term Context Memory (Contextual Memory)</span>",
+            "description": "<span style='color: white;'>Intensively analyzes the ability to maintain and appropriately utilize long-term conversation context.</span>"
+        }
+    }
+    default_level = "ALL"
+    
+    sr_column_map = {level: f"{level}_SR" for level in level_ids}
+    overall_sort_column = "Overall Success"
+    
+    def resolve_level(level_value):
+        """Normalize the incoming level filter value"""
+        if not level_value:
+            return default_level
+        return level_value if level_value in level_details else default_level
+    
+    def generate_html_table(filtered_df, highlight_column):
+        """Generate styled HTML table with per-level success rates"""
+        valid_highlights = list(sr_column_map.values()) + ["Overall Success"]
+        highlight_column = highlight_column if highlight_column in valid_highlights else None
+        overall_column = "Overall Success"
+        overall_highlight = (highlight_column == overall_column)
+        highlight_map = {level: (sr_column_map[level] == highlight_column) for level in level_ids}
+        
+        table_html = """
+        <style>
+            /* Dark theme table styling */
+            .v2-table-container {
+                background: var(--bg-card);
+                border-radius: 16px;
+                overflow: hidden;
+                border: 1px solid var(--border-subtle);
+                margin-top: 20px;
+            }
+            
+            .v2-styled-table {
+                width: 100%;
+                border-collapse: collapse;
+                font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
+                background: var(--bg-card);
+                color: white;
+            }
+            
+            .v2-styled-table thead {
+                position: sticky;
+                top: 0;
+                background: rgba(255, 210, 30, 0.1);
+                z-index: 1;
+            }
+            
+            .v2-styled-table th {
+                padding: 14px 12px;
+                text-align: left;
+                font-weight: 600;
+                color: white;
+                border-bottom: 2px solid var(--accent-primary);
+                font-size: 13px;
+                text-transform: uppercase;
+                letter-spacing: 0.05em;
+            }
+            
+            .v2-styled-table th.numeric-cell {
+                text-align: center;
+            }
+            
+            .v2-styled-table td {
+                padding: 12px;
+                border-bottom: 1px solid var(--border-subtle);
+                color: white;
+                transition: all 0.2s ease;
+            }
+            
+            .v2-styled-table tbody tr {
+                transition: all 0.3s ease;
+            }
+            
+            .v2-styled-table tbody tr:hover {
+                background: rgba(255, 210, 30, 0.15) !important;
+                box-shadow: 0 0 20px rgba(255, 210, 30, 0.3), inset 0 0 20px rgba(255, 210, 30, 0.1);
+                transform: scale(1.01);
+            }
+            
+            .v2-styled-table tbody tr:nth-child(even) {
+                background: var(--bg-secondary);
+            }
+            
+            .model-name {
+                font-weight: 500;
+                color: white;
+                transition: color 0.2s ease;
+            }
+            
+            /* Keep model name color consistent on hover to emphasize row highlight */
+            .v2-styled-table tr:hover .model-name {
+                color: white;
+            }
+            
+            .numeric-cell {
+                font-family: 'Geist Mono', monospace;
+                font-size: 13px;
+                text-align: center;
+                color: white;
+            }
+            
+            .highlight-header {
+                background: rgba(255, 210, 30, 0.14);
+                color: white;
+            }
+            
+            .highlight-cell {
+                background: rgba(255, 210, 30, 0.08);
+                color: white;
+                font-weight: 600;
+            }
+        </style>
+        
+        <div class="v2-table-container">
+            <table class="v2-styled-table">
+                <thead>
+                    <tr>
+                        <th style="width: 80px;">Rank</th>
+                        <th>Model</th>
+                        <th>Vendor</th>
+                        <th style="width: 120px;">LLM Type</th>
+        """
+        overall_header_classes = ["numeric-cell"]
+        if overall_highlight:
+            overall_header_classes.append("highlight-header")
+        table_html += f"""
+                        <th class="{' '.join(overall_header_classes)}" title="Average success rate across all levels">
+                            <span class="metric-header">Overall <span class="info-icon">ⓘ</span></span>
+                        </th>
+        """
+        for level in level_ids:
+            header_classes = ["numeric-cell"]
+            if highlight_map.get(level):
+                header_classes.append("highlight-header")
+            table_html += f"""
+                        <th class="{' '.join(header_classes)}" title="Average Success Rate {level}">
+                            <span class="metric-header">{level} <span class="info-icon">ⓘ</span></span>
+                        </th>
+            """
+        table_html += """
+                    </tr>
+                </thead>
+                <tbody>
+        """
+        def safe_float(value):
+            if value is None:
+                return ''
+            if isinstance(value, str) and value.strip() == '':
+                return ''
+            if pd.isna(value):
+                return ''
+            try:
+                return float(value)
+            except (TypeError, ValueError):
+                return ''
+        
+        # Generate table rows
+        for idx, (_, row) in enumerate(filtered_df.iterrows()):
+            rank = idx + 1
+            table_html += f"""
+                <tr>
+                    <td>{get_rank_badge(rank)}</td>
+                    <td class="model-name">{row['Model']}</td>
+                    <td>{row['Vendor']}</td>
+                    <td>{get_type_badge(row['Model Type'])}</td>
+            """
+            overall_value = safe_float(row.get(overall_column, ''))
+            if overall_value != '':
+                overall_display = f'{overall_value:.3f}'
+            else:
+                overall_display = '-'
+            overall_classes = ["numeric-cell"]
+            if overall_highlight:
+                overall_classes.append("highlight-cell")
+            table_html += f'<td class="{" ".join(overall_classes)}">{overall_display}</td>'
+            for level in level_ids:
+                sr_col = sr_column_map[level]
+                value = safe_float(row.get(sr_col, ''))
+                if value != '':
+                    value_display = f'{value:.3f}'
+                else:
+                    value_display = '-'
+                cell_classes = ["numeric-cell"]
+                if highlight_map.get(level):
+                    cell_classes.append("highlight-cell")
+                table_html += f'<td class="{" ".join(cell_classes)}">{value_display}</td>'
+            table_html += "</tr>"
+        
+        table_html += """
+                </tbody>
+            </table>
+        </div>
+        """
+        
+        return table_html
+    
+    def update_leaderboard_title(level_filter):
+        """Update the leaderboard title based on selected level"""
+        level_key = resolve_level(level_filter)
+        level_info = level_details.get(level_key, level_details[default_level])
+        level_title = level_info["title"]
+        level_description = level_info["description"]
+        
+        return f"""
+        <div class="domain-selector-container leaderboard-intro">
+            <div class="domain-header">
+                <h2 class="domain-title" style="color: white;">Agent Leaderboard · {level_title}</h2>
+                <p class="domain-subtitle" style="color: white;">{level_description}</p>
+            </div>
+            <div class="dataframe-container">
+        """
+    
+    model_type_lookup = {
+        "OSS": "Open source",
+        "API": "Proprietary"
+    }
+    
+    def apply_filters(df, level_filter, model_type_filter, sort_order, sort_by="Overall Success"):
+        """Apply shared filters and sorting to the leaderboard dataframe."""
+        filtered_df = df.copy()
+        level_key = resolve_level(level_filter)
+        highlight_column = None
+        
+        if model_type_filter != "All":
+            mapped_type = model_type_lookup.get(model_type_filter, model_type_filter)
+            filtered_df = filtered_df[filtered_df['Model Type'] == mapped_type]
+        
+        actual_sort_column = sort_by if sort_by in filtered_df.columns else None
+        if not actual_sort_column:
+            if level_key == "ALL":
+                actual_sort_column = overall_sort_column if overall_sort_column in filtered_df.columns else None
+            else:
+                actual_sort_column = sr_column_map.get(level_key)
+        
+        if level_key in sr_column_map:
+            highlight_column = sr_column_map[level_key]
+        elif level_key == "ALL" and overall_sort_column in filtered_df.columns:
+            highlight_column = overall_sort_column
+        
+        if actual_sort_column and actual_sort_column in filtered_df.columns:
+            ascending = (sort_order == "Ascending")
+            filtered_df = filtered_df.sort_values(by=actual_sort_column, ascending=ascending, na_position='last')
+        
+        return filtered_df, level_key, highlight_column
+
+    def filter_and_sort_data(level_filter, model_type_filter, sort_by, sort_order):
+        """Filter and sort the leaderboard data"""
+        df = load_leaderboard_data()
+        
+        filtered_df, level_key, highlight_column = apply_filters(df, level_filter, model_type_filter, sort_order, sort_by)
+        
+        # Generate HTML table
+        return generate_html_table(filtered_df, highlight_column)
+    
+    # Load initial data
+    initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
+    initial_df = load_leaderboard_data()  # Load raw data for model selector
+    initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
+    initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
+    initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
+    initial_level_metric_level = level_ids[0] if level_ids else None
+    initial_level_model_choices = initial_df['Model'].tolist() if len(initial_df) > 0 else []
+    initial_level_model_values = initial_level_model_choices[:5]
+    initial_level_metric_chart = create_level_metric_chart(
+        initial_df, 
+        initial_level_metric_level,
+        initial_level_model_values
+    ) if initial_level_metric_level else create_empty_level_metric_chart("No level metrics available")
+    
+    # Load custom CSS and responsive styles
+    custom_css = get_leaderboard_css() + get_responsive_styles() + """
+    <style>
+    /* Page-specific styles for leaderboard v2 */
+    
+    /* Metric header styles with info icons */
+    .metric-header {
+        cursor: help;
+        display: inline-flex;
+        align-items: center;
+        gap: 6px;
+    }
+    
+    .info-icon {
+        color: var(--accent-secondary);
+        font-size: 1em;
+        opacity: 0.8;
+        transition: opacity 0.2s ease;
+        font-weight: normal;
+    }
+    
+    .metric-header:hover .info-icon {
+        opacity: 1;
+    }
+    
+    /* Native tooltip styling */
+    .v2-styled-table th[title] {
+        cursor: help;
+    }
+    
+    /* Custom tooltip using CSS only */
+    [data-tooltip] {
+        position: relative;
+        cursor: help;
+    }
+    
+    [data-tooltip]::before {
+        content: attr(data-tooltip);
+        position: absolute;
+        bottom: 100%;
+        left: 50%;
+        transform: translateX(-50%);
+        background: rgba(26, 26, 46, 0.95);
+        color: #f5f6f7;
+        padding: 8px 12px;
+        border-radius: 6px;
+        font-size: 12px;
+        white-space: nowrap;
+        max-width: 300px;
+        z-index: 10000;
+        opacity: 0;
+        pointer-events: none;
+        transition: opacity 0.3s;
+        margin-bottom: 5px;
+        border: 1px solid rgba(16, 152, 247, 0.3);
+        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.8);
+    }
+    
+    [data-tooltip]:hover::before {
+        opacity: 0.8;
+    }
+    
+    /* Dark theme table styling */
+    .v2-table-container {
+        background: var(--bg-card);
+        border-radius: 16px;
+        overflow: visible;  /* Changed from hidden to visible for tooltips */
+        border: 1px solid var(--border-subtle);
+        margin-top: 20px;
+        position: relative;
+    }
+    
+    
+    .v2-styled-table {
+        width: 100%;
+        border-collapse: collapse;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
+        background: var(--bg-card);
+        color: var(--text-primary);
+    }
+    
+    .v2-styled-table thead {
+        position: sticky;
+        top: 0;
+        background: rgba(255, 210, 30, 0.1);
+        z-index: 1;
+    }
+    
+    .v2-styled-table th {
+        padding: 14px 12px;
+        text-align: left;
+        font-weight: 600;
+        color: var(--text-primary);
+        border-bottom: 2px solid var(--accent-primary);
+        font-size: 14px;
+        text-transform: uppercase;
+        letter-spacing: 0.05em;
+        position: relative;  /* Added for tooltip positioning */
+    }
+    
+    .v2-styled-table td {
+        padding: 12px;
+        border-bottom: 1px solid var(--border-subtle);
+        color: var(--text-primary);
+        font-size: 14px;
+        transition: all 0.2s ease;
+    }
+    
+    .v2-styled-table tbody tr {
+        transition: all 0.3s ease;
+    }
+    
+    .v2-styled-table tbody tr:hover {
+        background: rgba(255, 210, 30, 0.15) !important;
+        box-shadow: 0 0 20px rgba(255, 210, 30, 0.3), inset 0 0 20px rgba(255, 210, 30, 0.1);
+        transform: scale(1.01);
+    }
+    
+    .v2-styled-table tbody tr:nth-child(even) {
+        background: var(--bg-secondary);
+    }
+    
+    .model-name {
+        font-weight: 500;
+        color: var(--accent-primary);
+        font-size: 14px;
+        transition: color 0.2s ease;
+    }
+    
+    .v2-styled-table tr:hover .model-name {
+        color: var(--accent-secondary);
+    }
+    
+    .numeric-cell {
+        font-family: 'Geist Mono', monospace;
+        font-size: 14px;
+        text-align: center;
+    }
+    
+    </style>
+    
+    <script>
+    // Function to update radio button styling
+    function updateRadioStyling() {
+        // Remove selected class from all labels first
+        document.querySelectorAll('.selected').forEach(function(label) {
+            label.classList.remove('selected');
+        });
+        document.querySelectorAll('.domain-radio label').forEach(function(label) {
+            label.style.background = '';
+            label.style.borderColor = '';
+            label.style.transform = '';
+            label.style.fontWeight = '';
+        });
+        
+        // Apply selected class to checked radio buttons
+        document.querySelectorAll('input[type="radio"]:checked').forEach(function(input) {
+            var label = input.closest('label');
+            if (label) {
+                label.classList.add('selected');
+                
+                // For domain radio buttons, apply special styling
+                if (label.closest('.domain-radio')) {
+                    label.style.background = '#ffd21e33';
+                    label.style.borderColor = 'var(--accent-primary)';
+                    label.style.transform = 'scale(1.05)';
+                    label.style.fontWeight = '600';
+                }
+            }
+        });
+    }
+    
+    // Wait for Gradio to initialize
+    function initializeRadioStyles() {
+        updateRadioStyling();
+        
+        // Create observer to watch for changes
+        var observer = new MutationObserver(function(mutations) {
+            mutations.forEach(function(mutation) {
+                if (mutation.type === 'attributes' && mutation.attributeName === 'checked') {
+                    updateRadioStyling();
+                }
+            });
+        });
+        
+        // Observe all radio inputs
+        document.querySelectorAll('input[type="radio"]').forEach(function(radio) {
+            observer.observe(radio, { attributes: true });
+        });
+    }
+    
+    // Try multiple initialization strategies
+    document.addEventListener('DOMContentLoaded', function() {
+        setTimeout(initializeRadioStyles, 100);
+        setTimeout(initializeRadioStyles, 500);
+        setTimeout(initializeRadioStyles, 1000);
+    });
+    
+    // Also check when window loads
+    window.addEventListener('load', function() {
+        setTimeout(initializeRadioStyles, 100);
+    });
+    
+    // Listen for Gradio's custom events
+    document.addEventListener('gradio:loaded', initializeRadioStyles);
+    </script>
+    """
+    
+    gr.HTML(custom_css)
+    
+    # Header styles and navigation
+    gr.HTML("""
+    <style>
+    /* Enhanced button styling with better gradio compatibility */
+    .header-action-button {
+        display: inline-block !important;
+        padding: 14px 28px !important;
+        background: #ffd21e !important;
+        color: #FFFFFF !important;
+        text-decoration: none !important;
+        border-radius: 16px !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 700 !important;
+        font-size: 1.1rem !important;
+        transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
+        border: none !important;
+        cursor: pointer !important;
+        box-shadow: 0 8px 24px rgba(255, 210, 30, 0.4), 0 4px 12px rgba(0, 0, 0, 0.3) !important;
+        position: relative !important;
+        overflow: hidden !important;
+        text-shadow: 0 1px 2px rgba(0, 0, 0, 0.35) !important;
+    }
+    
+    .header-action-button::before {
+        content: '';
+        position: absolute;
+        top: 0;
+        left: -100%;
+        width: 100%;
+        height: 100%;
+        background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
+        transition: left 0.6s;
+    }
+    
+    .header-action-button:hover::before {
+        left: 100%;
+    }
+    
+    .header-action-button:hover {
+        transform: translateY(-3px) !important;
+        box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
+        background: #ffd21e !important;
+        color: #FFFFFF !important;
+        text-decoration: none !important;
+        text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
+    }
+    
+    .header-action-button:active {
+        transform: translateY(-1px) !important;
+    }
+    
+    .action-button-icon {
+        font-size: 1.2rem !important;
+        margin-right: 8px !important;
+        filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
+    }
+    
+    #hero-banner {
+        max-width: 960px;
+        margin: 0 auto 20px auto;
+        border-radius: 16px;
+        overflow: hidden;
+        box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25);
+    }
+    
+    #hero-banner img {
+        width: 100%;
+        height: auto;
+        display: block;
+    }
+    
+    .hero-title {
+        font-size: 5rem;
+        font-weight: 800;
+        line-height: 1.1;
+        background: linear-gradient(135deg, #FFE082 0%, #FFC107 50%, #FFB300 100%);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        margin-bottom: 1rem;
+    }
+    
+    .hero-subtitle {
+        color: var(--text-secondary);
+        font-size: 1.25rem;
+        font-family: 'Geist', sans-serif;
+        margin-top: 0;
+    }
+    
+    .hero-actions {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        gap: 16px;
+        flex-wrap: wrap;
+        margin: 32px 0;
+        padding: 0 20px;
+    }
+    
+    .hero-action-button {
+        display: inline-flex !important;
+        align-items: center !important;
+        gap: 12px !important;
+        padding: 10px 16px !important;
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
+        border-radius: 999px !important;
+        color: var(--text-primary) !important;
+        text-decoration: none !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 0.95rem !important;
+        transition: all 0.3s ease !important;
+        backdrop-filter: blur(10px) !important;
+        -webkit-backdrop-filter: blur(10px) !important;
+    }
+    
+    .hero-action-button:hover {
+        transform: translateY(-2px) !important;
+        border-color: var(--accent-primary) !important;
+        background: rgba(255, 210, 30, 0.12) !important;
+        text-decoration: none !important;
+    }
+    
+    .hero-action-button svg {
+        width: 20px;
+        height: 20px;
+    }
+    
+    .hero-action-button span {
+        font-weight: 600;
+        letter-spacing: 0.01em;
+    }
+    
+    .dashboard-section {
+        margin: 48px auto 0 auto;
+        max-width: 1100px;
+        padding: 40px;
+        background: rgba(245, 246, 247, 0.06);
+        border: 1px solid var(--border-subtle);
+        border-radius: 24px;
+        box-shadow: 0 12px 30px rgba(0, 0, 0, 0.25);
+        backdrop-filter: blur(12px);
+        -webkit-backdrop-filter: blur(12px);
+    }
+    
+    .dashboard-section.emphasized {
+        background: #ffd21e26;
+        border-color: rgba(255, 210, 30, 0.6);
+        box-shadow: 0 24px 50px rgba(255, 210, 30, 0.25);
+    }
+    
+    .dashboard-section .section-header {
+        display: flex;
+        flex-direction: column;
+        align-items: center;
+        justify-content: center;
+        text-align: center;
+        gap: 12px;
+        margin-bottom: 32px;
+    }
+    
+    .section-title {
+        font-size: 2.2rem;
+        font-weight: 1000;
+        color: var(--text-primary);
+        margin-bottom: 12px;
+        text-align: center !important;
+    }
+    
+    .section-lead, .section-subtitle {
+        font-size: 1.1rem;
+        color: var(--text-secondary);
+        max-width: 1500px;
+        margin: 0 auto 24px auto;
+        line-height: 1.7;
+        text-align: center !important;
+        word-break: keep-all;
+        white-space: normal;
+        display: block;
+    }
+    
+    .phase-grid {
+        display: grid;
+        grid-template-columns: repeat(2, minmax(0, 1fr));
+        gap: 24px;
+    }
+    
+    .phase-card {
+        padding: 28px;
+        border-radius: 20px;
+        border: 1px solid var(--border-subtle);
+        background: rgba(1, 9, 26, 0.65);
+        box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03);
+    }
+    
+    .phase-card h3 {
+        font-size: 1.5rem;
+        color: var(--text-primary);
+        margin-bottom: 20px;
+        font-weight: 700;
+    }
+    
+    .phase-chart {
+        width: 120px;
+        height: 120px;
+        border-radius: 50%;
+        background: conic-gradient(var(--accent-primary) var(--progress), rgba(255, 210, 30, 0.15) var(--progress));
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 24px;
+        margin-left: auto;
+        margin-right: auto;
+        position: relative;
+    }
+    
+    .phase-chart::after {
+        content: '';
+        position: absolute;
+        width: 80px;
+        height: 80px;
+        border-radius: 50%;
+        background: rgba(1, 9, 26, 0.95);
+    }
+    
+    .phase-chart span {
+        position: relative;
+        font-size: 1.5rem;
+        font-weight: 700;
+        color: var(--text-primary);
+    }
+    
+    .phase-list {
+        list-style: none;
+        padding: 0;
+        margin: 0;
+        display: grid;
+        gap: 12px;
+    }
+    
+    .phase-list li {
+        padding: 12px 16px;
+        border-radius: 12px;
+        background: rgba(245, 246, 247, 0.05);
+        border: 1px solid rgba(245, 246, 247, 0.08);
+        color: var(--text-secondary);
+        font-size: 0.95rem;
+    }
+    
+    .scenario-body {
+        max-width: 1200px;
+        margin: 0 auto;
+        text-align: center;
+    }
+    
+    .scenario-body p {
+        font-size: 1.05rem;
+        line-height: 1.7;
+        color: var(--text-secondary);
+        margin-bottom: 32px;
+    }
+    
+    .section-flow {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        margin-top: 24px;
+        color: var(--accent-primary);
+        font-size: 2rem;
+    }
+    
+    .criteria-grid {
+        display: grid;
+        grid-template-columns: repeat(3, minmax(0, 1fr));
+        gap: 24px;
+    }
+    
+    .criteria-card {
+        padding: 24px;
+        border-radius: 20px;
+        border: 1px solid var(--border-subtle);
+        background: rgba(1, 9, 26, 0.7);
+        display: flex;
+        flex-direction: column;
+        gap: 16px;
+        box-shadow: inset 0 1px 0 rgba(255, 255, 255, 0.03);
+    }
+    
+    .criteria-card h3 {
+        font-size: 1.25rem;
+        font-weight: 700;
+        color: var(--text-primary);
+        margin: 0;
+    }
+    
+    .criteria-card ul {
+        list-style: disc;
+        margin: 0;
+        padding-left: 20px;
+        color: var(--text-secondary);
+        display: grid;
+        gap: 10px;
+        font-size: 0.95rem;
+        line-height: 1.5;
+    }
+    
+    /* Responsive design */
+    @media (max-width: 768px) {
+        .hero-title {
+            font-size: 3rem;
+        }
+        .hero-action-button {
+            width: 100% !important;
+            justify-content: center !important;
+        }
+        .dashboard-section {
+            padding: 28px;
+            margin: 32px 16px 0 16px;
+        }
+        .phase-grid {
+            grid-template-columns: 1fr;
+        }
+        .criteria-grid {
+            grid-template-columns: 1fr;
+        }
+    }
+    
+    @media (max-width: 480px) {
+        .hero-actions {
+            flex-direction: column;
+            gap: 8px;
+        }
+        .section-title {
+            font-size: 1.8rem;
+        }
+        .phase-chart {
+            width: 100px;
+            height: 100px;
+        }
+        .phase-chart::after {
+            width: 68px;
+            height: 68px;
+        }
+    }
+    </style>
+    """)
+
+    gr.Image(
+        value="banner.png",
+        show_label=False,
+        interactive=False,
+        type="filepath",
+        elem_id="hero-banner"
+    )
+    
+    gr.HTML("""
+    <div style="text-align: center; padding: 20px 0;">
+        <h1 class="hero-title">Hugging Face KREW Ko-AgentBench</h1>
+        <p class="hero-subtitle">Agent Benchmark Specialized for Korean Service Environment</p>
+    </div>
+    """)
+    
+    # Links section below title
+    gr.HTML("""
+    <div class="hero-actions">
+        <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
+                <line x1="8" y1="12" x2="16" y2="12"/>
+            </svg>
+            <span>Blog</span>
+        </a>
+        <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                <path d="M9 19c-5 1.5-5-2.5-7-3"/>
+                <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
+            </svg>
+            <span>GitHub</span>
+        </a>
+        <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
+                <polyline points="7 10 12 15 17 10"/>
+                <line x1="12" y1="15" x2="12" y2="3"/>
+            </svg>
+            <span>Dataset</span>
+        </a>
+        <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
+            <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+                <path d="M3 3v18h18"/>
+                <path d="M7 17v-6"/>
+                <path d="M12 17V7"/>
+                <path d="M17 17v-3"/>
+            </svg>
+            <span>Metrics</span>
+        </a>
+    </div>
+    """)
+
+    # Section 1: 단계별 태스크 설계
+    gr.HTML("""
+    <div class="dashboard-section">
+        <div class="section-header">
+            <h2 class="section-title">7-Level Task Structure</h2>
+        </div>
+        <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 1000px; line-height: 1.7; word-break: keep-all;">From simple tool calls to long-term context understanding and robustness handling,</p>
+            <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 1000px; line-height: 1.7; word-break: keep-all;">we analyzed agent capabilities in 3D across 7 levels.</p>
+        <div class="phase-grid">
+            <div class="phase-card">
+                <h3>Single-Turn</h3>
+                <div class="phase-chart" style="--progress:80%;">
+                    <span>80%</span>
+                </div>
+                 <ul class="phase-list">
+                    <li style="color: white;">L1: Single Tool Execution</li>
+                    <li style="color: white;">L2: Tool Selection Capability</li>
+                    <li style="color: white;">L3: Sequential Reasoning</li>
+                    <li style="color: white;">L4: Parallel Reasoning</li>
+                    <li style="color: white;">L5: Robustness</li>
+                </ul>
+            </div>
+            <div class="phase-card">
+                <h3>Multi-Turn</h3>
+                <div class="phase-chart" style="--progress:20%;">
+                    <span>20%</span>
+                </div>
+                <ul class="phase-list">
+                    <li style="color: white;">L6: Efficiency</li>
+                    <li style="color: white;">L7: Long-term Context Memory</li>
+                </ul>
+            </div>
+        </div>
+    </div>
+    """)
+
+    # Section 2: 핵심 시나리오 구성
+    gr.HTML("""
+    <div class="dashboard-section emphasized">
+        <div class="section-header">
+            <h2 class="section-title">Real-life Scenario Design Using 18 APIs Optimized for Domestic Environment</h2>
+        </div>
+        <div class="scenario-body">
+            <p>Realistic, user-centered scenarios—such as “appointment booking” and “blog review search”—were designed</p>
+            <p>by integrating major domestic service APIs including Naver Maps and Kakao.</p>
+        </div>
+        <div class="section-flow">⌄</div>
+    </div>
+    """)
+
+    # Section 3: 핵심 평가 기준
+    gr.HTML("""
+    <div class="dashboard-section">
+        <div class="section-header">
+            <h2 class="section-title">Key Evaluation Metrics</h2>
+        </div>
+        <div class="criteria-grid">
+            <div class="criteria-card">
+                <h3>Cache-based Iterative Evaluation</h3>
+                <ul>
+                    <li>Real API Response Caching</li>
+                    <li>Solves chronic issues of existing benchmarks such as 'external API instability and information attribute mismatch'</li>
+                    <li>Ensures benchmark consistency and reliability</li>
+                </ul>
+            </div>
+            <div class="criteria-card">
+                <h3>Robustness Test</h3>
+                <ul>
+                    <li>Evaluates error recognition and response capability (strategy) for intentional error situations (product discontinuation)</li>
+                    <li>Selects models that operate stably in real-world environments</li>
+                </ul>
+            </div>
+            <div class="criteria-card">
+                <h3>Level-specific Evaluation Metrics</h3>
+                 <ul>
+                    <li>Evaluates problem-solving efficiency at each stage including tool selection, parameter configuration, and data processing flow</li>
+                    <li>Quantitatively identifies model strengths and weaknesses</li>
+                </ul>
+            </div>
+        </div>
+    </div>
+    """)
+    
+    # Metrics overview cards removed per updated design
+    
+    # Domain filter section with enhanced styling
+    gr.HTML("""
+    <style>
+    /* Enhanced domain selector styling */
+    .domain-selector-container {
+        background: #ffd21e0d;
+        border-radius: 20px;
+        padding: 32px;
+        margin-bottom: 32px;
+        border: 1px solid var(--border-subtle);
+        position: relative;
+        overflow: visible;
+        box-shadow: 
+            0 8px 32px rgba(0, 0, 0, 0.3),
+            inset 0 1px 0 rgba(255, 255, 255, 0.05);
+    }
+    
+    .domain-selector-container.leaderboard-intro {
+        padding-bottom: 0;
+        margin-bottom: 32px;
+    }
+    
+    .leaderboard-intro .domain-header {
+        margin-bottom: 20px;
+    }
+    
+    .leaderboard-intro .domain-subtitle {
+        font-size: 1.1rem;
+        max-width: 720px;
+        margin: 0 auto;
+    }
+    
+    .leaderboard-intro .dataframe-container {
+        margin: 16px -32px -32px;
+        padding: 0 32px 32px;
+        background: transparent;
+        border-radius: 0 0 20px 20px;
+    }
+    
+    .domain-performance-container {
+        margin-bottom: 32px;
+    }
+    
+    .domain-performance-container .domain-header {
+        margin-bottom: 24px;
+    }
+    
+    .domain-performance-container .domain-subtitle {
+        font-size: 1.05rem;
+        max-width: 1000px;
+        margin: 0 auto;
+    }
+            
+    .domain-performance-container .domain-subtitle_ {
+        font-size: 1.07rem;
+        max-width: 1000px;
+        margin: 0 auto;
+        color: #bdbdbd;
+    }
+    
+
+    .leaderboard-intro .domain-title,
+    .domain-performance-container > .domain-header .domain-title,
+    .performance-card-container > .domain-header .domain-title {
+        font-size: 2.6rem !important;
+    }
+    
+    .performance-card-content {
+        display: flex;
+        flex-direction: column;
+        gap: 24px;
+        align-items: stretch;
+    }
+    
+    .performance-card-display {
+        flex: 1;
+        min-width: 0;
+    }
+    
+    .performance-card-container {
+        margin-top: 32px;
+    }
+    
+    .performance-card-container .domain-header {
+        margin-bottom: 24px;
+    }
+    
+    .performance-card-container .domain-subtitle {
+        font-size: 1.05rem;
+        max-width: 1000px;
+        margin: 0 auto;
+    }
+    
+    .domain-header {
+        text-align: center;
+        margin-bottom: 28px;
+        position: relative;
+        z-index: 1;
+    }
+    
+    .domain-title {
+        font-size: 2rem;
+        font-weight: 800;
+        position: relative;
+        display: inline-block;
+        margin-bottom: 8px;
+        padding: 4px 0;
+        color: transparent !important;
+        background: linear-gradient(120deg, rgba(255, 255, 255, 0.75) 0%, rgba(255, 210, 30, 0.95) 25%, rgba(255, 139, 0, 0.85) 50%, rgba(255, 210, 30, 0.95) 75%, rgba(255, 255, 255, 0.8) 100%);
+        background-size: 220% 100%;
+        -webkit-background-clip: text;
+        background-clip: text;
+        -webkit-text-fill-color: transparent;
+        text-shadow: 0 0 22px rgba(255, 210, 30, 0.65), 0 0 45px rgba(255, 210, 30, 0.4);
+        filter: drop-shadow(0 0 16px rgba(255, 210, 30, 0.35));
+        letter-spacing: 0.02em;
+        animation: title-shimmer 5s ease-in-out infinite;
+    }
+
+    @keyframes title-shimmer {
+        0% {
+            background-position: 0% 50%;
+        }
+        50% {
+            background-position: 100% 50%;
+        }
+        100% {
+            background-position: 0% 50%;
+        }
+    }
+    
+    .domain-subtitle {
+        color: var(--text-secondary);
+        font-size: 1.2rem;
+        font-family: 'Geist', sans-serif;
+    }
+    
+    /* Custom radio button styling */
+    .domain-radio {
+        display: flex !important;
+        gap: 12px !important;
+        flex-wrap: wrap !important;
+        justify-content: center !important;
+        position: relative;
+        z-index: 1;
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+        padding: 6px 0 !important;
+    }
+    
+    /* Gradio radio button wrapper */
+    .domain-radio .wrap {
+        display: flex !important;
+        gap: 12px !important;
+        flex-wrap: wrap !important;
+        justify-content: center !important;
+        width: 100% !important;
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+        padding: 6px 0 !important;
+    }
+    
+    .domain-selector-container [role="radiogroup"],
+    .domain-selector-container fieldset,
+    .domain-selector-container .gradio-radio,
+    .domain-selector-container .gradio-radio-group,
+    .domain-selector-container .gr-form,
+    .domain-selector-container .gradio-radio-group > div {
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+        padding: 0 !important;
+    }
+    
+    #filters-sorting-container {
+        padding: 28px !important;
+    }
+    
+    #filters-sorting-container .gr-box,
+    #filters-sorting-container .gradio-column,
+    #filters-sorting-container .gradio-row,
+    #filters-sorting-container .gradio-group,
+    #filters-sorting-container .gradio-radio,
+    #filters-sorting-container [role="radiogroup"],
+    #filters-sorting-container fieldset {
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+    }
+    
+    .filters-sorting-row {
+        gap: 18px !important;
+        justify-content: center !important;
+        flex-wrap: nowrap !important;
+    }
+    
+    .filter-group {
+        flex: 1 1 260px !important;
+        display: flex !important;
+        flex-direction: column !important;
+        gap: 12px !important;
+        align-items: flex-start !important;
+        width: 100% !important;
+    }
+    
+    .filter-group-row {
+        display: flex !important;
+        align-items: center !important;
+        gap: 4px !important;
+        justify-content: flex-start !important;
+        flex-wrap: nowrap !important;
+        width: 100% !important;
+    }
+    
+    .filter-group-row > * {
+        margin: 0 !important;
+        padding: 0 !important;
+        flex: 0 0 auto !important;
+        width: auto !important;
+        min-width: auto !important;
+    }
+    
+    .filter-group-row > .gr-column,
+    .filter-group-row > .gr-box {
+        flex: 0 0 auto !important;
+        width: auto !important;
+        min-width: auto !important;
+    }
+    
+    .filter-group-row .gradio-html,
+    .filter-group-row .gradio-html > * {
+        display: inline-flex !important;
+        align-items: center !important;
+        margin: 0 !important;
+        padding: 0 !important;
+        flex: 0 0 auto !important;
+        width: auto !important;
+        min-width: auto !important;
+    }
+    
+    .filter-group-row .domain-radio {
+        flex: 1 1 auto !important;
+        width: 100% !important;
+        margin: 0 !important;
+        display: inline-flex !important;
+        align-items: center !important;
+        min-width: 220px !important;
+        justify-content: flex-start !important;
+        padding-right: 8px !important;
+    }
+    
+    .filter-group .gr-input-label {
+        font-size: 1rem !important;
+        font-weight: 600 !important;
+        color: var(--text-primary) !important;
+        text-align: center !important;
+        margin-bottom: 12px !important;
+    }
+    
+    .filter-group-label {
+        font-size: 1rem !important;
+        font-weight: 600 !important;
+        color: var(--text-primary) !important;
+        text-align: left !important;
+        margin: 0 !important;
+        font-family: 'Geist', sans-serif !important;
+        white-space: nowrap !important;
+    }
+    
+    #filters-sorting-container .domain-radio,
+    #filters-sorting-container .domain-radio .wrap {
+        flex-wrap: nowrap !important;
+        justify-content: flex-start !important;
+        align-items: center !important;
+        gap: 12px !important;
+        width: 100% !important;
+    }
+    
+    .domain-radio label,
+    .domain-radio .wrap > label {
+        display: inline-flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+        gap: 0 !important;
+        padding: 12px 28px !important;
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
+        border-radius: 999px !important;
+        cursor: pointer !important;
+        transition: all 0.3s ease !important;
+        text-align: center !important;
+        position: relative !important;
+        overflow: hidden !important;
+        color: var(--text-primary) !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 0.95rem !important;
+        letter-spacing: 0.01em !important;
+        text-align: center !important;
+        line-height: 1 !important;
+        backdrop-filter: blur(10px) !important;
+        -webkit-backdrop-filter: blur(10px) !important;
+        min-width: 0 !important;
+        flex: 0 0 auto !important;
+        box-shadow: 0 0 18px -6px rgba(0, 0, 0, 0.45) !important;
+        z-index: 0 !important;
+    }
+    
+    .domain-radio label::before {
+        content: '';
+        position: absolute;
+        inset: 0;
+        background: #ffd21e14;
+        opacity: 0;
+        transition: opacity 0.3s ease;
+        pointer-events: none;
+        z-index: -1;
+    }
+    
+    .domain-radio label:hover {
+        border-color: var(--accent-primary) !important;
+        background: rgba(255, 210, 30, 0.12) !important;
+        box-shadow: 0 0 18px 0 rgba(255, 210, 30, 0.45) !important;
+        border-color: rgba(255, 210, 30, 0.45) !important;
+    }
+    
+    .domain-radio label:hover::before {
+        opacity: 1;
+    }
+    
+    .domain-radio input[type="radio"] {
+        display: none !important;
+    }
+    
+    .domain-radio input[type="radio"]:checked + label,
+    .domain-radio .wrap > label:has(input[type="radio"]:checked),
+    .domain-radio label.selected,
+    .domain-radio label[aria-checked="true"] {
+        background: #ffd21e33 !important;
+        border-color: var(--accent-primary) !important;
+        color: var(--accent-tertiary) !important;
+        box-shadow: 0 0 24px 0 rgba(255, 210, 30, 0.55) !important;
+    }
+    
+    .domain-radio input[type="radio"]:checked + label::before,
+    .domain-radio label[aria-checked="true"]::before {
+        opacity: 1;
+    }
+    
+    /* Model selector styling */
+    .model-selector-container {
+        padding: 28px;
+    }
+    
+    .model-selector-container .domain-header {
+        margin-bottom: 18px;
+    }
+    
+    .model-selector-container .domain-title {
+        font-size: 1.8rem;
+    }
+    
+    .model-selector-container .domain-subtitle {
+        font-size: 1rem;
+    }
+    
+    .model-selector-container .model-dropdown,
+    .model-selector-container .pill-button {
+        display: flex !important;
+        justify-content: center !important;
+        align-items: center !important;
+        width: 100% !important;
+    }
+    
+    .model-selector-container .model-dropdown {
+        margin-bottom: 12px !important;
+    }
+    
+    .model-dropdown {
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+        padding: 0 !important;
+        margin: 0 !important;
+    }
+    
+    .model-dropdown .gradio-dropdown,
+    .model-dropdown .gradio-dropdown > div,
+    .model-dropdown .gr-form,
+    .model-dropdown .gradio-input,
+    .model-dropdown .gradio-button-group,
+    .model-dropdown .wrap {
+        background: transparent !important;
+        border: none !important;
+        box-shadow: none !important;
+        padding: 0 !important;
+        margin: 0 !important;
+        width: 100% !important;
+    }
+    
+    .model-dropdown select,
+    .model-dropdown [role="combobox"] {
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
+        border-radius: 999px !important;
+        padding: 12px 24px !important;
+        color: var(--text-primary) !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 1rem !important;
+        letter-spacing: 0.01em !important;
+        min-height: 46px !important;
+        min-width: 240px !important;
+        width: 100% !important;
+        cursor: pointer !important;
+        transition: all 0.3s ease !important;
+        text-align: center !important;
+        text-align-last: center !important;
+    }
+    
+    .model-dropdown select:focus,
+    .model-dropdown [role="combobox"]:focus-visible {
+        outline: none !important;
+        border-color: var(--accent-primary) !important;
+        box-shadow: 0 0 0 3px rgba(255, 210, 30, 0.25) !important;
+    }
+    
+    .model-dropdown button {
+        display: inline-flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+        gap: 8px !important;
+        width: 100% !important;
+        padding: 12px 24px !important;
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
+        border-radius: 999px !important;
+        color: var(--text-primary) !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 0.95rem !important;
+        letter-spacing: 0.01em !important;
+        cursor: pointer !important;
+        transition: all 0.3s ease !important;
+    }
+    
+    .model-dropdown button:hover:not(:disabled),
+    .model-dropdown [role="combobox"]:hover {
+        transform: translateY(-1px) !important;
+        border-color: var(--accent-primary) !important;
+        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
+        background: rgba(255, 210, 30, 0.12) !important;
+    }
+    
+    .model-dropdown .tags {
+        display: flex !important;
+        flex-wrap: wrap !important;
+        gap: 6px !important;
+        justify-content: center !important;
+    }
+    
+    .model-dropdown .tag {
+        background: rgba(255, 210, 30, 0.18) !important;
+        border: 1px solid rgba(255, 210, 30, 0.35) !important;
+        color: var(--text-primary) !important;
+        border-radius: 999px !important;
+        padding: 4px 10px !important;
+        font-size: 0.85rem !important;
+        font-weight: 500 !important;
+    }
+    
+    .model-dropdown label {
+        display: flex !important;
+        flex-direction: column !important;
+        gap: 8px !important;
+        width: 100% !important;
+        align-items: center !important;
+    }
+
+    .model-dropdown label > span {
+        display: none !important;
+    }
+    
+    .pill-button button {
+        display: inline-flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+        gap: 8px !important;
+        padding: 12px 28px !important;
+        background: #ffd21e !important;
+        border: 1px solid rgba(255, 210, 30, 0.6) !important;
+        border-radius: 999px !important;
+        color: #FFFFFF !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 0.95rem !important;
+        letter-spacing: 0.01em !important;
+        text-decoration: none !important;
+        cursor: pointer !important;
+        transition: all 0.3s ease !important;
+        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
+        border-bottom: none !important;
+        text-shadow: 0 1px 2px rgba(0, 0, 0, 0.35) !important;
+    }
+    
+    .pill-button button:hover:not(:disabled) {
+        transform: translateY(-2px) !important;
+        background: #ffd21e !important;
+        box-shadow: 0 16px 36px rgba(255, 210, 30, 0.35) !important;
+        text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
+    }
+    
+    .pill-button button:disabled {
+        opacity: 0.6 !important;
+        cursor: not-allowed !important;
+        box-shadow: none !important;
+    }
+    
+    .level-selector-container {
+        padding: 28px;
+    }
+    
+    .level-selector-container .domain-header {
+        margin-bottom: 18px;
+    }
+    
+    .level-selector-container .domain-title {
+        font-size: 1.8rem;
+    }
+    
+    .level-selector-container .domain-subtitle {
+        font-size: 1rem;
+    }
+    
+    /* Domain icons */
+    .domain-icon {
+        font-size: 1.5rem;
+        margin-bottom: 4px;
+        display: block;
+        filter: drop-shadow(0 0 10px currentColor);
+    }
+    
+    .domain-name {
+        font-size: 0.95rem;
+        font-weight: 500;
+        margin-top: 4px;
+    }
+    
+    /* Badge for domain counts */
+    .domain-count {
+        position: absolute;
+        top: 8px;
+        right: 8px;
+        background: var(--accent-primary);
+        color: white;
+        font-size: 0.75rem;
+        padding: 2px 8px;
+        border-radius: 12px;
+        font-weight: 600;
+        opacity: 0.8;
+    }
+    
+    /* Filter radio buttons styling - smaller for better fit */
+    .filter-radio {
+        max-width: 100% !important;
+    }
+    
+    .filter-radio .gr-row {
+        gap: 8px !important;
+    }
+    
+    .filter-radio .gr-column {
+        min-width: 0 !important;
+        flex: 1 !important;
+    }
+    
+    .filter-radio .gr-form {
+        min-width: 0 !important;
+    }
+    
+    .filter-radio .gr-radio-group {
+        gap: 4px !important;
+    }
+    
+    .filter-radio .domain-radio {
+        display: flex !important;
+        gap: 4px !important;
+        flex-wrap: nowrap !important;
+        justify-content: center !important;
+    }
+    
+    .filter-radio .domain-radio label {
+        min-width: auto !important;
+        max-width: 120px !important;
+        padding: 8px 12px !important;
+        font-size: 0.8rem !important;
+        white-space: nowrap !important;
+        overflow: hidden !important;
+        text-overflow: ellipsis !important;
+    }
+    
+    /* Additional targeting for the specific filter components */
+    .filter-radio .gr-box {
+        padding: 8px !important;
+    }
+    
+    .filter-radio .gr-radio {
+        gap: 4px !important;
+    }
+    
+    .filter-radio .gr-input-label {
+        font-size: 0.85rem !important;
+        margin-bottom: 4px !important;
+    }
+    
+    /* Force compact layout for the filters */
+    @media (max-width: 1400px) {
+        .filter-radio .domain-radio label {
+            padding: 6px 10px !important;
+            font-size: 0.75rem !important;
+        }
+    }
+    
+    /* Compact filter row styling */
+    .compact-filter-row {
+        margin-bottom: 20px !important;
+    }
+    
+    .compact-filter-row .gr-column {
+        padding: 0 8px !important;
+    }
+    
+    .compact-filter-row .gr-box {
+        padding: 0 !important;
+    }
+    
+    /* Compact radio button styling */
+    .compact-radio {
+        width: 100% !important;
+    }
+    
+    .compact-radio > label {
+        font-size: 0.85rem !important;
+        margin-bottom: 8px !important;
+        font-weight: 600 !important;
+        color: var(--text-primary) !important;
+        display: block !important;
+    }
+    
+    .compact-radio .wrap {
+        display: flex !important;
+        flex-wrap: nowrap !important;
+        gap: 4px !important;
+        justify-content: center !important;
+    }
+    
+    .compact-radio .wrap > label {
+        display: inline-flex !important;
+        align-items: center !important;
+        justify-content: center !important;
+        padding: 6px 10px !important;
+        margin: 0 !important;
+        background: var(--bg-card) !important;
+        border: 1px solid var(--border-default) !important;
+        border-radius: 8px !important;
+        cursor: pointer !important;
+        transition: all 0.2s ease !important;
+        font-size: 0.75rem !important;
+        white-space: nowrap !important;
+        flex: 1 !important;
+        min-width: 0 !important;
+        overflow: hidden !important;
+        text-overflow: ellipsis !important;
+    }
+    
+    .compact-radio .wrap > label:has(input[type="radio"]:checked) {
+        background: transparent !important;
+        border-color: var(--accent-primary) !important;
+        color: var(--text-primary) !important;
+        font-weight: 600 !important;
+    }
+    
+    .compact-radio .wrap > label:hover {
+        background: rgba(255, 210, 30, 0.1) !important;
+        border-color: var(--accent-primary) !important;
+        transform: scale(1.02) !important;
+    }
+    
+    .compact-radio input[type="radio"] {
+        display: none !important;
+    }
+    
+    /* Target Gradio's data attributes for selected state */
+    .compact-radio label[data-selected="true"],
+    .compact-radio label[aria-checked="true"],
+    .domain-radio label[data-selected="true"],
+    .domain-radio label[aria-checked="true"] {
+        background: transparent !important;
+        border-color: var(--accent-primary) !important;
+        color: var(--text-primary) !important;
+        font-weight: 600 !important;
+    }
+    
+    /* Sort by radio buttons */
+    .sort-by-radio .domain-radio {
+        display: flex !important;
+        gap: 10px !important;
+        flex-wrap: wrap !important;
+        justify-content: flex-start !important;
+    }
+    
+    .sort-by-radio .domain-radio .wrap {
+        display: flex !important;
+        gap: 10px !important;
+        flex-wrap: wrap !important;
+        justify-content: flex-start !important;
+        width: 100% !important;
+    }
+    
+    .sort-by-radio .domain-radio label,
+    .sort-by-radio .domain-radio .wrap > label {
+        min-width: 180px !important;
+        max-width: 220px !important;
+        padding: 12px 20px !important;
+        font-size: 0.95rem !important;
+    }
+    </style>
+    
+    """)
+    
+    level_options = list(level_details.keys())
+    
+    with gr.Column(elem_classes=["domain-selector-container"], elem_id="task-level-selector"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🧠 Select Task Level</h2>
+            <p class="domain-subtitle" style="color: white;">Easily compare agent performance across ALL · L1~L7 stages of Ko-AgentBench.</p>
+        </div>
+        """)
+        domain_filter = gr.Radio(
+            choices=level_options,
+            value=default_level,
+            label="",
+            interactive=True,
+            container=False,
+            elem_classes=["domain-radio"]
+        )
+    
+    # Filter controls with domain styling
+    with gr.Column(elem_classes=["domain-selector-container", "filters-sorting-container"], elem_id="filters-sorting-container"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🔍 Filters & Sorting</h2>
+            <p class="domain-subtitle" style="color: white;">Select model type and sorting criteria to explore results in your preferred way.</p>
+        </div>
+        """)
+        with gr.Row(elem_classes=["filters-sorting-row"]):
+            with gr.Column(scale=1, elem_classes=["filter-group"]):
+                with gr.Row(elem_classes=["filter-group-row"]):
+                    gr.HTML("<span class='filter-group-label' style='color: white;'>Model Access</span>")
+                    model_type_filter = gr.Radio(
+                        choices=["All", "OSS", "API"],
+                        value="All",
+                        label="",
+                        elem_classes=["domain-radio"],
+                        container=False
+                    )
+            with gr.Column(scale=1, elem_classes=["filter-group"]):
+                with gr.Row(elem_classes=["filter-group-row"]):
+                    gr.HTML("<span class='filter-group-label' style='color: white;'>Sort Order</span>")
+                    sort_order = gr.Radio(
+                        choices=["Descending", "Ascending"],
+                        value="Descending",
+                        label="",
+                        elem_classes=["domain-radio"],
+                        container=False
+                    )
+    
+    # Main leaderboard table with dynamic title
+    leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
+    
+    leaderboard_table = gr.HTML(initial_table)
+    
+    gr.HTML("""
+        </div>
+    </div>""")
+    
+    # Radar Chart Section
+    gr.HTML("""
+    <div class="domain-selector-container domain-performance-container">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
+            <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
+            <p class="domain-subtitle_">#Execution Accuracy #Complex Reasoning #Robustness #Context & Efficiency #Overall Success #Validity</p>
+            <p class="domain-subtitle" style="color: white;">Analyze model performance capabilities and balance through 6 core competencies.</p>
+        </div>
+    """)
+    
+    with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="radar-model-selector"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🎯 Select Models for Comparison</h2>
+            <p class="domain-subtitle" style="color: white;">Select models to compare in the radar chart.</p>
+        </div>
+        """)
+        model_selector = gr.Dropdown(
+            choices=initial_df['Model'].tolist()[:10],
+            value=initial_df['Model'].tolist()[:5],
+            multiselect=True,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["model-dropdown"]
+        )
+    
+    # Radar chart plot - wrapped in centered container
+    gr.HTML('<div class="chart-container radar-chart-container">')
+    radar_chart = gr.Plot(
+        label="",
+        value=create_domain_radar_chart(
+            load_leaderboard_data(), 
+            initial_df['Model'].tolist()[:5]
+        ),
+        elem_classes=["radar-chart", "plot-container"]
+    )
+    gr.HTML('</div>')
+    
+    gr.HTML("</div>")
+    
+    # Level metric breakdown section
+    gr.HTML("""
+    <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Level-Specific Metric Spotlight</h2>
+            <p class="domain-subtitle" style="color: white;">Compare model scores based on unique evaluation metrics for each L1–L7 level.</p>
+        </div>
+    """)
+    
+    with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🧭 Select Task Level and Models</h2>
+            <p class="domain-subtitle" style="color: white;">Select L1–L7 levels and models to explore detailed SR-based metrics.</p>
+        </div>
+        """)
+        level_metric_selector = gr.Dropdown(
+            choices=level_ids,
+            value=level_ids[0] if level_ids else None,
+            multiselect=False,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["level-dropdown"]
+        )
+        level_model_selector = gr.Dropdown(
+            choices=initial_level_model_choices,
+            value=initial_level_model_values,
+            multiselect=True,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["model-dropdown", "level-model-dropdown"]
+        )
+    
+    gr.HTML('<div class="chart-container level-metric-chart-container">')
+    level_metric_chart = gr.Plot(
+        label="",
+        value=initial_level_metric_chart,
+        elem_classes=["level-metric-plot", "plot-container"]
+    )
+    gr.HTML("""
+        </div>
+    </div>
+    """)
+    
+    # Heatmap section
+    gr.HTML("""
+    <div class="domain-selector-container domain-performance-container heatmap-wrapper">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
+            <p class="domain-subtitle" style="color: white;">Explore the comprehensive performance heatmap to see SR scores across L1–L7 levels for each model at a glance.</p>
+        </div>
+        <div class="chart-container heatmap-chart-container">
+    """)
+    heatmap_chart = gr.Plot(
+        label="",
+        value=initial_heatmap,
+        elem_classes=["heatmap-plot", "plot-container"]
+    )
+    gr.HTML("""
+        </div>
+    </div>
+    """)
+    
+    # Update functions
+    def get_optimal_sort_order(sort_by_value):
+        """Return the optimal sort order for a given metric"""
+        # Metrics where higher is better (descending)
+        descending_metrics = ["Overall Success"] + [sr_column_map[level] for level in level_ids]
+        
+        # Metrics where lower is better (ascending)  
+        ascending_metrics = []
+        
+        if sort_by_value in descending_metrics:
+            return "Descending"
+        elif sort_by_value in ascending_metrics:
+            return "Ascending"
+        else:
+            return "Descending"  # Default fallback
+    
+    def update_table(level_filter, model_type_filter, sort_order):
+        title_html = update_leaderboard_title(level_filter)
+        sort_metric = "Overall Success" if level_filter == "ALL" else sr_column_map.get(resolve_level(level_filter), "Overall Success")
+        table_html = filter_and_sort_data(level_filter, model_type_filter, sort_metric, sort_order)
+        return title_html, table_html
+    
+    def update_radar_chart(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        # Get filtered dataframe
+        df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        
+        # Update model selector choices based on filtered data
+        available_models_all = filtered_df['Model'].tolist()
+        available_models = available_models_all[:15]  # Top 15 from filtered results
+        
+        # If selected models are not in available models, reset to top 5
+        if selected_models:
+            valid_selected = [m for m in selected_models if m in available_models]
+            if not valid_selected:
+                valid_selected = available_models[:5]
+        else:
+            valid_selected = available_models[:5]
+        
+        # Create radar chart
+        chart = create_domain_radar_chart(filtered_df, valid_selected)
+        
+        # Prepare heatmap order prioritizing selected models
+        heatmap_order = []
+        for model in valid_selected:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        for model in available_models_all:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        heatmap_order = heatmap_order[:12]
+        heatmap_fig = create_performance_heatmap(filtered_df, heatmap_order)
+        
+        # Level metric chart
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        available_level_models = available_models_all
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_level_models[:5]
+        else:
+            valid_level_models = available_level_models[:5]
+        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        
+        return (
+            gr.Dropdown(
+                choices=available_models,
+                value=valid_selected,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown"]
+            ),
+            chart,
+            heatmap_fig,
+            gr.Dropdown(
+                choices=available_level_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_metric_fig,
+        )
+    
+    def update_radar_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        # Get filtered dataframe
+        df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        
+        available_models_all = filtered_df['Model'].tolist()
+        if selected_models:
+            valid_selected = [m for m in selected_models if m in available_models_all]
+            if not valid_selected:
+                valid_selected = available_models_all[:5]
+        else:
+            valid_selected = available_models_all[:5]
+        
+        heatmap_order = []
+        for model in valid_selected:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        for model in available_models_all:
+            if model not in heatmap_order:
+                heatmap_order.append(model)
+        heatmap_order = heatmap_order[:12]
+        
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        available_level_models = available_models_all
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_level_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_level_models[:5]
+        else:
+            valid_level_models = available_level_models[:5]
+        level_metric_fig = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        
+        return (
+            create_domain_radar_chart(filtered_df, valid_selected),
+            create_performance_heatmap(filtered_df, heatmap_order),
+            gr.Dropdown(
+                choices=available_level_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_metric_fig,
+        )
+    
+    def update_level_metric_only(domain_filter, model_type_filter, sort_order, selected_models, selected_level, level_selected_models):
+        df = load_leaderboard_data()
+        sort_metric = "Overall Success" if domain_filter == "ALL" else sr_column_map.get(resolve_level(domain_filter), "Overall Success")
+        filtered_df, _, _ = apply_filters(df, domain_filter, model_type_filter, sort_order, sort_metric)
+        available_models = filtered_df['Model'].tolist()
+        if level_selected_models:
+            valid_level_models = [m for m in level_selected_models if m in available_models][:5]
+            if not valid_level_models:
+                valid_level_models = available_models[:5]
+        else:
+            valid_level_models = available_models[:5]
+        effective_level = selected_level if selected_level in level_ids else (level_ids[0] if level_ids else None)
+        level_chart = create_level_metric_chart(filtered_df, effective_level, valid_level_models) if effective_level else create_empty_level_metric_chart("Select a level to view its metrics")
+        return (
+            gr.Dropdown(
+                choices=available_models,
+                value=valid_level_models,
+                multiselect=True,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown", "level-model-dropdown"]
+            ),
+            level_chart,
+        )
+
+    # Update table when filters change
+    filter_inputs = [domain_filter, model_type_filter, sort_order]
+
+    for input_component in filter_inputs:
+        input_component.change(
+            fn=update_table,
+            inputs=filter_inputs,
+            outputs=[leaderboard_title, leaderboard_table]
+        )
+        
+        # Also update radar chart when filters change
+        input_component.change(
+            fn=update_radar_chart,
+            inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+            outputs=[model_selector, radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
+        )
+
+    # Update radar chart when model selection changes
+    model_selector.change(
+        fn=update_radar_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[radar_chart, heatmap_chart, level_model_selector, level_metric_chart]
+    )
+
+    level_metric_selector.change(
+        fn=update_level_metric_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[level_model_selector, level_metric_chart]
+    )
+
+    level_model_selector.change(
+        fn=update_level_metric_only,
+        inputs=filter_inputs + [model_selector, level_metric_selector, level_model_selector],
+        outputs=[level_model_selector, level_metric_chart]
+    )
+    
+    # Define generate_performance_card function before using it
+    def generate_performance_card(model_name):
+        """Generate HTML for the model performance card"""
+        if not model_name:
+            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                Please select a model to generate its performance card
+            </div>"""
+        
+        # Get model data
+        df = load_leaderboard_data()
+        model_data = df[df['Model'] == model_name]
+        
+        if model_data.empty:
+            return """<div style="text-align: center; color: var(--text-secondary); padding: 40px;">
+                Model not found in the database
+            </div>"""
+        
+        row = model_data.iloc[0]
+        
+        # Get overall rank based on overall success
+        df_with_success = df.copy()
+        df_with_success['Overall Success'] = pd.to_numeric(df_with_success.get('Overall Success', pd.Series()), errors='coerce')
+        df_with_success = df_with_success[df_with_success['Overall Success'].notna()]
+        df_sorted = df_with_success.sort_values('Overall Success', ascending=False).reset_index(drop=True)
+        try:
+            rank = df_sorted[df_sorted['Model'] == model_name].index[0] + 1
+        except:
+            rank = 'N/A'
+        
+        # Format values
+        def format_value(val, decimals=3, prefix='', suffix=''):
+            if pd.isna(val) or val == '':
+                return 'N/A'
+            return f"{prefix}{float(val):.{decimals}f}{suffix}"
+        
+        def format_score(value):
+            if pd.isna(value) or value == '':
+                return 'N/A'
+            return f"{float(value):.3f}"
+        
+        # Use the same order as the domain radar but keep '견고성' (Robustness) last
+        radar_metrics = [
+            ("Execution Accuracy", row.get('Execution Accuracy')),
+            ("Context & Efficiency", row.get('Context & Efficiency')),
+            ("Overall Success", row.get('Overall Success')),
+            ("Robustness", row.get('Robustness')),
+            ("Complex Reasoning", row.get('Complex Reasoning')),
+            ("Validity", row.get('Call Validity')),
+        ]
+        radar_values = []
+        radar_labels = []
+        for label, value in radar_metrics:
+            if pd.isna(value) or value == '':
+                radar_values.append(0.0)
+            else:
+                try:
+                    radar_values.append(max(0.0, min(1.0, float(value))))
+                except (TypeError, ValueError):
+                    radar_values.append(0.0)
+            radar_labels.append(label)
+        
+        mini_radar_html = build_static_radar_chart(radar_values, radar_labels)
+        
+        level_blocks = []
+        for level in level_ids:
+            sr_col = sr_column_map.get(level)
+            level_blocks.append((level, row.get(sr_col, '')))
+        
+        evaluation_date = EVALUATION_DATE
+        icon_html = ""
+        if KREW_ICON_BASE64:
+            icon_html = f'<img src="data:image/png;base64,{KREW_ICON_BASE64}" alt="Krew icon" />'
+        else:
+            icon_html = '<div class="icon-fallback">🤖</div>'
+        
+        card_html = f"""
+        <div class="performance-card">
+            <div class="card-top-row">
+                <div class="model-identity">
+                    <div class="model-icon">{icon_html}</div>
+                    <div class="model-meta">
+                        <div class="card-model-name">{model_name}</div>
+                        <div class="meta-line">Vendor · <span>{row['Vendor']}</span></div>
+                        <div class="meta-line evaluation">
+                            <span class="date-pill">Evaluation Date</span>
+                            <span>{evaluation_date}</span>
+                        </div>
+                    </div>
+                </div>
+                <div class="rank-panel">
+                    <div class="rank-label">RANK</div>
+                    <div class="rank-value">#{rank}</div>
+                </div>
+            </div>
+            <div class="card-main">
+                <div class="card-body">
+                    <div class="radar-slot">
+                        {mini_radar_html}
+                    </div>
+                    <div class="core-section">
+                        <div class="core-metric-grid">
+        """
+        ordered_labels = ["Execution Accuracy", "Context & Efficiency", "Overall Success", "Robustness", "Complex Reasoning", "Validity"]
+        ordered_metrics = sorted(radar_metrics, key=lambda x: ordered_labels.index(x[0]) if x[0] in ordered_labels else len(ordered_labels))
+        top_metrics = ordered_metrics[:3]
+        bottom_metrics = ordered_metrics[3:]
+        card_html += """
+                        <div class="core-metric-row">
+        """
+        for label, value in top_metrics:
+            card_html += f"""
+                            <div class="core-metric-card">
+                                <div class="metric-label">{label}</div>
+                                <div class="metric-value">{format_score(value)}</div>
+                            </div>
+            """
+        card_html += """
+                        </div>
+                        <div class="core-metric-row">
+        """
+        for label, value in bottom_metrics:
+            card_html += f"""
+                            <div class="core-metric-card">
+                                <div class="metric-label">{label}</div>
+                                <div class="metric-value">{format_score(value)}</div>
+                            </div>
+            """
+        card_html += """
+                        </div>
+                    </div>
+                </div>
+            </div>
+                <div class="level-strip">
+        """
+        for level, value in level_blocks:
+            card_html += f"""
+                        <div class="level-tile">
+                            <div class="level-tile-label">{level}</div>
+                            <div class="level-tile-score">{format_score(value)}</div>
+                        </div>
+            """
+        card_html += """
+                </div>
+            </div>
+        </div>
+        """
+        
+        return card_html
+    
+    # MODEL PERFORMANCE CARD SECTION
+    gr.HTML("""
+    <div class="domain-selector-container performance-card-container">
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
+            <p class="domain-subtitle" style="color: white;">
+                Check out the precision analysis card that visualizes the model's performance spectrum with 6 key metrics and overall success rate (SR) by L1~L7 levels.
+            </p>
+            <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
+                 ※ Rank is calculated based on the average SR value across L1–L7 levels.
+            </p>
+        </div>
+        <div class="performance-card-content">
+    """)
+    
+    with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
+        gr.HTML("""
+        <div class="domain-header">
+            <h2 class="domain-title" style="color: white;">🤖 Select Model</h2>
+            <p class="domain-subtitle" style="color: white;">Select models for the analysis card.</p>
+        </div>
+        """)
+        card_model_selector = gr.Dropdown(
+            choices=initial_df['Model'].tolist(),
+            value=initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None,
+            label="",
+            info=None,
+            container=False,
+            elem_classes=["model-dropdown"]
+        )
+        download_card_btn = gr.Button(
+            "Download as PNG",
+            elem_id="download-card-btn",
+            elem_classes=["pill-button"]
+        )
+    
+    gr.HTML("""
+            <div class="performance-card-display" id="card-display-container">
+    """)
+    
+    # Card display area - generate initial card
+    initial_model = initial_df['Model'].tolist()[0] if len(initial_df) > 0 else None
+    initial_card_html = generate_performance_card(initial_model) if initial_model else ""
+    card_display = gr.HTML(value=initial_card_html, elem_id="performance-card-html")
+    
+    gr.HTML("""
+            </div>
+        </div>
+    </div>
+    """)
+    
+    # Add custom CSS for the performance card
+    gr.HTML("""
+    <style>
+    /* Performance Card Styles */
+    .performance-card {
+        background: linear-gradient(140deg, rgba(1, 9, 26, 0.95) 0%, rgba(20, 34, 58, 0.65) 100%);
+        border: 1px solid rgba(255, 210, 30, 0.35);
+        border-radius: 24px;
+        padding: 28px;
+        max-width: 820px;
+        margin: 0 auto;
+        box-shadow: 0 18px 36px rgba(0, 0, 0, 0.35);
+        display: flex;
+        flex-direction: column;
+        gap: 28px;
+    }
+    
+    .card-top-row {
+        display: flex;
+        justify-content: space-between;
+        gap: 24px;
+        align-items: stretch;
+    }
+
+    .card-main {
+        display: flex;
+        flex-direction: column;
+        gap: 20px;
+    }
+    
+    .model-identity {
+        display: flex;
+        gap: 20px;
+        align-items: center;
+    }
+    
+    .model-icon {
+        width: 88px;
+        height: 88px;
+        background: rgba(245, 246, 247, 0.08);
+        border-radius: 20px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        padding: 12px;
+        border: 1px solid rgba(245, 246, 247, 0.12);
+    }
+    
+    .model-icon img {
+        width: 100%;
+        height: 100%;
+        object-fit: contain;
+    }
+    
+    .icon-fallback {
+        font-size: 2.8rem;
+    }
+    
+    .model-meta {
+        display: flex;
+        flex-direction: column;
+        gap: 6px;
+    }
+    
+    .card-model-name {
+        font-size: 1.9rem;
+        font-weight: 800;
+        letter-spacing: 0.01em;
+        color: var(--text-primary);
+    }
+    
+    .meta-line {
+        font-size: 0.95rem;
+        color: var(--text-secondary);
+        display: flex;
+        gap: 6px;
+        align-items: center;
+    }
+    
+    .meta-line span {
+        color: var(--text-primary);
+        font-weight: 600;
+    }
+    
+    .date-pill {
+        display: inline-flex;
+        align-items: center;
+        gap: 6px;
+        background: rgba(255, 210, 30, 0.18);
+        color: var(--accent-primary);
+        padding: 2px 10px;
+        border-radius: 999px;
+        font-size: 0.75rem;
+        font-weight: 600;
+        letter-spacing: 0.05em;
+        text-transform: uppercase;
+    }
+    
+    .rank-panel {
+        min-width: 140px;
+        background: rgba(255, 210, 30, 0.12);
+        border: 1px solid rgba(255, 210, 30, 0.35);
+        border-radius: 20px;
+        display: flex;
+        flex-direction: column;
+        align-items: center;
+        justify-content: center;
+        padding: 18px;
+        gap: 6px;
+    }
+    
+    .rank-label {
+        font-size: 0.9rem;
+        letter-spacing: 0.1em;
+        color: var(--text-secondary);
+    }
+    
+    .rank-value {
+        font-size: 2.4rem;
+        font-weight: 800;
+        color: var(--text-primary);
+        letter-spacing: 0.04em;
+    }
+    
+    .radar-legend {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
+        gap: 10px;
+        font-size: 0.8rem;
+        color: var(--text-tertiary);
+        letter-spacing: 0.04em;
+        text-transform: uppercase;
+    }
+    
+    .card-body {
+        display: grid;
+        grid-template-columns: 240px 1fr;
+        gap: 28px;
+        align-items: start;
+    }
+    
+    .radar-slot {
+        width: 240px;
+        height: 240px;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        border-radius: 20px;
+        background: rgba(245, 246, 247, 0.04);
+        border: 1px dashed rgba(245, 246, 247, 0.12);
+        padding: 10px;
+        box-sizing: border-box;
+        justify-self: center;
+    }
+    
+    .radar-slot svg {
+        width: 100%;
+        height: 100%;
+    }
+    
+    .heatmap-wrapper {
+        margin-top: 40px;
+    }
+    
+    .heatmap-chart-container {
+        padding: 0 20px 32px;
+    }
+            
+    .radar-chart-container {
+        padding: 0 20px 32px;
+    }            
+    
+    .heatmap-plot {
+        width: 100%;
+        max-width: 1400px;
+        margin: 0 auto;
+        border-radius: 22px;
+        overflow: hidden;
+        background: rgba(245, 246, 247, 0.02);
+        border: 1px solid var(--border-subtle);
+        box-shadow: 0 14px 40px rgba(0, 0, 0, 0.35);
+    }
+    
+    .heatmap-plot .modebar {
+        display: none;
+    }
+
+    .level-metrics-wrapper {
+        margin-top: 40px;
+    }
+    
+    .level-metric-controls {
+        justify-content: center;
+        align-items: center;
+        margin-bottom: 20px;
+        gap: 12px;
+    }
+    
+    .level-metric-chart-container {
+        padding: 0 20px 32px;
+    }
+    
+    .level-metric-plot {
+        width: 100%;
+        max-width: 1400px;
+        margin: 0 auto;
+        border-radius: 22px;
+        overflow: hidden;
+        background: rgba(245, 246, 247, 0.02);
+        border: 1px solid var(--border-subtle);
+        box-shadow: 0 14px 40px rgba(0, 0, 0, 0.35);
+    }
+    
+    .level-dropdown {
+        max-width: 260px !important;
+        margin: 0 auto !important;
+    }
+    
+    .level-dropdown select,
+    .level-dropdown [role="combobox"],
+    .level-dropdown button {
+        background: rgba(245, 246, 247, 0.06) !important;
+        border: 1px solid var(--border-subtle) !important;
+        border-radius: 999px !important;
+        padding: 12px 20px !important;
+        color: var(--text-primary) !important;
+        font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
+        font-weight: 600 !important;
+        font-size: 0.95rem !important;
+        text-align: center !important;
+        min-height: 46px !important;
+        transition: all 0.3s ease !important;
+        box-shadow: 0 10px 24px rgba(255, 210, 30, 0.15) !important;
+    }
+    
+    .level-dropdown select:hover,
+    .level-dropdown [role="combobox"]:hover,
+    .level-dropdown button:hover:not(:disabled) {
+        transform: translateY(-1px) !important;
+        border-color: var(--accent-primary) !important;
+        box-shadow: 0 12px 28px rgba(255, 210, 30, 0.25) !important;
+        background: rgba(255, 210, 30, 0.12) !important;
+    }
+    
+    .level-model-dropdown {
+        width: 100% !important;
+        margin: 12px auto 0 !important;
+    }
+    
+    .radar-placeholder {
+        display: flex;
+        flex-direction: column;
+        gap: 10px;
+        color: var(--text-secondary);
+        font-size: 1rem;
+        letter-spacing: 0.03em;
+    }
+    
+    .radar-placeholder small {
+        font-size: 0.68rem;
+        line-height: 1.6;
+        color: var(--text-tertiary);
+    }
+    
+    .core-section {
+        display: flex;
+        flex-direction: column;
+        gap: 20px;
+    }
+    
+    .core-metric-grid {
+        display: flex;
+        flex-direction: column;
+        gap: 12px;
+    }
+    
+    .core-metric-row {
+        display: grid;
+        grid-template-columns: repeat(3, minmax(150px, 1fr));
+        gap: 12px;
+    }
+    
+    .core-metric-card {
+        background: rgba(245, 246, 247, 0.06);
+        border: 1px solid rgba(245, 246, 247, 0.12);
+        border-radius: 16px;
+        padding: 18px;
+        display: flex;
+        flex-direction: column;
+        gap: 8px;
+        transition: transform 0.25s ease, border-color 0.25s ease;
+    }
+    
+    .core-metric-card:hover {
+        transform: translateY(-4px);
+        border-color: rgba(255, 210, 30, 0.45);
+    }
+    
+    .core-metric-card .metric-label {
+        font-size: 0.95rem;
+        color: var(--text-secondary);
+        font-weight: 600;
+    }
+    
+    .core-metric-card .metric-value {
+        font-size: 1.8rem;
+        font-weight: 700;
+        color: var(--text-primary);
+        font-family: 'Geist Mono', monospace;
+    }
+    
+    .level-strip {
+        display: grid;
+        grid-template-columns: repeat(7, 1fr);
+        gap: 12px;
+        margin-top: 0;
+        width: 100%;
+    }
+    
+    .level-tile {
+        background: rgba(245, 246, 247, 0.05);
+        border: 1px solid rgba(245, 246, 247, 0.1);
+        border-radius: 12px;
+        padding: 12px 10px;
+        display: flex;
+        flex-direction: column;
+        gap: 4px;
+        text-align: center;
+        min-width: 0;
+    }
+    
+    .level-tile-label {
+        font-size: 0.82rem;
+        font-weight: 600;
+        color: var(--text-secondary);
+    }
+    
+    .level-tile-score {
+        font-size: 1.25rem;
+        font-weight: 700;
+        color: var(--text-primary);
+        font-family: 'Geist Mono', monospace;
+    }
+    @media (max-width: 980px) {
+        .performance-card {
+            padding: 20px;
+        }
+        .card-top-row {
+            flex-direction: column;
+        }
+        .rank-panel {
+            align-self: flex-start;
+        }
+        .card-body {
+            grid-template-columns: 1fr;
+        }
+        .radar-slot {
+            width: 100%;
+            max-width: 280px;
+            margin: 0 auto;
+        }
+        .radar-chart-container {
+            overflow-x: auto;
+            padding: 0 12px 28px;
+            -webkit-overflow-scrolling: touch;
+        }
+        .radar-metric-plot {
+            min-width: 720px;
+        }
+        .level-strip {
+            grid-template-columns: repeat(4, 1fr);
+        }
+        .heatmap-chart-container {
+            overflow-x: auto;
+            padding: 0 12px 28px;
+            -webkit-overflow-scrolling: touch;
+        }
+        .heatmap-plot {
+            min-width: 720px;
+        }
+        .level-metric-chart-container {
+            overflow-x: auto;
+            padding: 0 12px 28px;
+            -webkit-overflow-scrolling: touch;
+        }
+        .level-metric-plot {
+            min-width: 720px;
+        }
+    }
+    @media (max-width: 640px) {
+        .level-strip {
+            grid-template-columns: repeat(3, 1fr);
+        }
+        .heatmap-plot {
+            min-width: 640px;
+        }
+        .level-metric-plot {
+            min-width: 640px;
+        }
+    }
+    
+    </style>
+    
+    """)
+    
+    
+    # Wire up the card generator to selection change
+    card_model_selector.change(
+        fn=generate_performance_card,
+        inputs=[card_model_selector],
+        outputs=[card_display]
+    )
+    
+    # Wire up download button with html2canvas capture
+    download_card_btn.click(
+        fn=None,
+        js="""
+        async () => {
+            const ensureHtml2Canvas = () => new Promise((resolve, reject) => {
+                if (window.html2canvas) {
+                    resolve(window.html2canvas);
+                    return;
+                }
+                const existing = document.querySelector('script[data-html2canvas]');
+                if (existing) {
+                    existing.addEventListener('load', () => resolve(window.html2canvas));
+                    existing.addEventListener('error', reject);
+                    return;
+                }
+                const script = document.createElement('script');
+                script.src = 'https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js';
+                script.async = true;
+                script.dataset.html2canvas = 'true';
+                script.onload = () => resolve(window.html2canvas);
+                script.onerror = () => reject(new Error('Failed to load html2canvas'));
+                document.head.appendChild(script);
+            });
+            
+            const pause = (ms) => new Promise(resolve => setTimeout(resolve, ms));
+            await pause(60);
+            
+            const card = document.querySelector('.performance-card');
+            if (!card) {
+                alert('Performance card not found. Please select a model first.');
+                return;
+            }
+            
+            const btn = document.getElementById('download-card-btn');
+            const originalText = btn?.textContent || '';
+            if (btn) {
+                btn.textContent = 'Generating...';
+                btn.disabled = true;
+            }
+            
+            try {
+                const html2canvasLib = await ensureHtml2Canvas();
+                if (!html2canvasLib) {
+                    throw new Error('html2canvas unavailable');
+                }
+                
+                const canvas = await html2canvasLib(card, {
+                    backgroundColor: '#01091A',
+                    scale: 2,
+                    logging: false,
+                    useCORS: true
+                });
+                
+                const link = document.createElement('a');
+                const modelName = card.querySelector('.card-model-name')?.textContent || 'model';
+                const timestamp = new Date().toISOString().slice(0, 10);
+                const fileName = `${modelName.replace(/[^a-z0-9]/gi, '-').toLowerCase()}-performance-${timestamp}.png`;
+                link.download = fileName;
+                link.href = canvas.toDataURL('image/png');
+                document.body.appendChild(link);
+                link.click();
+                document.body.removeChild(link);
+            } catch (error) {
+                console.error('Error capturing card:', error);
+                alert('Failed to capture performance card. Please try again.');
+            } finally {
+                if (btn) {
+                    btn.textContent = originalText;
+                    btn.disabled = false;
+                }
+            }
+        }
+        """
+    )
+    
+    # Also update card when filters change to keep model selector in sync
+    for input_component in filter_inputs:
+        def update_dropdown_and_card(*args):
+            filtered_df, _, _ = apply_filters(
+                load_leaderboard_data(),
+                args[0],
+                args[1],
+                args[2],
+                "Overall Success" if args[0] == "ALL" else sr_column_map.get(resolve_level(args[0]), "Overall Success")
+            )
+            choices = filtered_df['Model'].tolist()
+            # Select first model from filtered list
+            value = choices[0] if choices else None
+            return gr.Dropdown(
+                choices=choices,
+                value=value,
+                label="",
+                info=None,
+                container=False,
+                elem_classes=["model-dropdown"]
+            )
+        
+        input_component.change(
+            fn=update_dropdown_and_card,
+            inputs=filter_inputs,
+            outputs=[card_model_selector]
+        )
+    
+    return leaderboard_table
+
+
+def create_leaderboard_v2_interface():
+    """Create the complete leaderboard v1 interface"""
+    return create_leaderboard_v2_tab()
+
+
+def create_domain_radar_chart(df, selected_models=None, max_models=5):
+    """Visualize six core capability metrics on a radar chart."""
+    df = df.copy()
+    # Use the same metric order and Korean labels as the model performance card
+    # Match the model card order but place '견고성' (Robustness) last as requested
+    metrics_info = [
+        {"column": "Execution Accuracy", "label": "Execution Accuracy", "description": "CallEM · ArgAcc · SelectAcc"},
+        {"column": "Context & Efficiency", "label": "Context & Efficiency", "description": "ReuseRate · EffScore · ContextRetention"},
+        {"column": "Overall Success", "label": "Overall Success", "description": "L1~L7의 Average Success Rate"},
+        {"column": "Robustness", "label": "Robustness", "description": "AdaptiveRouting · FallbackSR"},
+        {"column": "Complex Reasoning", "label": "Complex Reasoning", "description": "ProvAcc · PSM · Coverage"},
+        {"column": "Call Validity", "label": "Validity", "description": "레벨별 EPR_CVR 평균"},
+    ]
+    
+    required_columns = [m["column"] for m in metrics_info]
+    if df.empty or not any(col in df.columns for col in required_columns):
+        return create_empty_radar_chart("Not enough data to build the capability radar")
+    
+    # Default model selection
+    if not selected_models:
+        if "Overall Success" in df.columns:
+            top_models = df.sort_values("Overall Success", ascending=False)
+        else:
+            top_models = df
+        selected_models = top_models['Model'].head(max_models).tolist()
+    
+    selected_models = selected_models[:max_models]
+    
+    # Ensure metric columns are numeric
+    for metric in metrics_info:
+        col = metric["column"]
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors='coerce')
+    
+    fig = go.Figure()
+    angle_labels = [m["label"] for m in metrics_info]
+    
+    palette = [
+        {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
+        {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
+        {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
+        {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
+        {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
+    ]
+    
+    for idx, model_name in enumerate(selected_models):
+        model_data = df[df['Model'] == model_name]
+        if model_data.empty:
+            continue
+        
+        row = model_data.iloc[0]
+        values = []
+        tooltips = []
+        for metric in metrics_info:
+            col = metric["column"]
+            value = row[col] if col in row else float('nan')
+            if pd.isna(value) or value == '':
+                value = 0
+            values.append(float(value))
+            tooltips.append(metric["description"])
+        
+        if not values:
+            continue
+        
+        values_loop = values + [values[0]]
+        angles_loop = angle_labels + [angle_labels[0]]
+        tooltips_loop = tooltips + [tooltips[0]]
+        colors = palette[idx % len(palette)]
+        
+        fig.add_trace(
+            go.Scatterpolar(
+                r=values_loop,
+                theta=angles_loop,
+                fill='toself',
+                fillcolor=colors['fill'],
+                line=dict(color=colors['line'], width=3),
+                marker=dict(
+                    size=10,
+                    color=colors['line'],
+                    symbol='circle',
+                    line=dict(width=2, color='#01091A')
+                ),
+                name=model_name,
+                customdata=tooltips_loop,
+                mode="lines+markers",
+                hovertemplate="<b>%{fullData.name}</b><br>" +
+                              "<span style='color: #94A3B8'>%{theta}</span><br>" +
+                              "<span style='color: #F5E7CB; font-size: 12px;'>%{customdata}</span><br>" +
+                              "<b style='font-size: 14px; color: #F5F6F7'>%{r:.3f}</b><br>" +
+                              "<extra></extra>",
+                hoverlabel=dict(
+                    bgcolor="rgba(1, 9, 26, 0.95)",
+                    bordercolor=colors['line'],
+                    font=dict(color="white", size=12, family="'Geist', sans-serif")
+                )
+            )
+        )
+    
+    tick_vals = [i / 5 for i in range(6)]
+    tick_text = [f"{val:.2f}" for val in tick_vals]
+    
+    fig.update_layout(
+        polar=dict(
+            bgcolor='rgba(245, 246, 247, 0.03)',
+            radialaxis=dict(
+                visible=True,
+                range=[0, 1],
+                showline=True,
+                linewidth=2,
+                linecolor='rgba(245, 246, 247, 0.2)',
+                gridcolor='rgba(245, 246, 247, 0.1)',
+                gridwidth=1,
+                tickvals=tick_vals,
+                ticktext=tick_text,
+                tickfont=dict(
+                    size=11,
+                    color='white',
+                    family="'Geist Mono', monospace"
+                )
+            ),
+            angularaxis=dict(
+                showline=True,
+                linewidth=2,
+                linecolor='rgba(245, 246, 247, 0.2)',
+                gridcolor='rgba(245, 246, 247, 0.08)',
+                tickfont=dict(
+                    size=13,
+                    family="'Geist', sans-serif",
+                    color='white',
+                    weight=600
+                ),
+                rotation=90,
+                direction="clockwise",
+            ),
+        ),
+        showlegend=True,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=-0.15,
+            xanchor="center",
+            x=0.5,
+            font=dict(size=12, family="'Geist', sans-serif", color='white'),
+            bgcolor='rgba(1, 9, 26, 0.8)',
+            bordercolor='rgba(245, 246, 247, 0.2)',
+            borderwidth=1,
+            itemsizing='constant',
+            itemwidth=30
+        ),
+        title=dict(
+            text="<b>Core Capability Radar</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(
+                size=22,
+                family="'Geist', sans-serif",
+                color="white",
+                weight=700
+            ),
+        ),
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=800,
+        width=900,
+        margin=dict(t=30, b=50, l=10, r=10),
+        autosize=True,
+        annotations=[
+            dict(
+                text="Ko-Agent Leaderboard",
+                xref="paper", yref="paper",
+                x=0.98, y=0.02,
+                xanchor='right', yanchor='bottom',
+                font=dict(size=10, color='#64748B'),
+                showarrow=False
+            )
+        ]
+    )
+    
+    return fig
+
+
+def create_performance_heatmap(df, ordered_models=None, max_models=12):
+    """Render a heatmap of SR scores across task levels for selected models."""
+    df = df.copy()
+    level_sequence = [f"L{i}" for i in range(1, 8)]
+    sr_columns = []
+    for level in level_sequence:
+        col = f"{level}_SR"
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+            sr_columns.append((level, col))
+    if df.empty or not sr_columns:
+        return create_empty_heatmap("Not enough SR data to render the heatmap")
+    
+    df = df.drop_duplicates(subset=["Model"])
+    if df.empty:
+        return create_empty_heatmap("No models available to render the heatmap")
+    
+    sort_column = "Overall Success" if "Overall Success" in df.columns else sr_columns[0]
+    df = df.sort_values(sort_column, ascending=False)
+    
+    if ordered_models:
+        ordered_models = [m for m in ordered_models if m in df["Model"].tolist()]
+    else:
+        ordered_models = df["Model"].tolist()
+    
+    if not ordered_models:
+        return create_empty_heatmap("No models available to render the heatmap")
+    
+    ordered_models = ordered_models[:max_models]
+    heatmap_df = df.set_index("Model").reindex(ordered_models)
+    
+    level_labels = []
+    z_matrix = []
+    has_values = False
+    
+    for level, col in sr_columns:
+        if col not in heatmap_df.columns:
+            continue
+        label = f"{level} · SR"
+        level_labels.append(label)
+        row_values = []
+        for model in ordered_models:
+            value = heatmap_df.at[model, col] if model in heatmap_df.index else None
+            if pd.isna(value):
+                row_values.append(None)
+            else:
+                val = float(value)
+                row_values.append(val)
+                has_values = True
+        z_matrix.append(row_values)
+    
+    if not level_labels or not has_values:
+        return create_empty_heatmap("Not enough SR data to render the heatmap")
+    
+    colorscale = [
+        [0.0, "#0A0A0A"],
+        [0.25, "#1A1411"],
+        [0.5, "#332818"],
+        [0.75, "#B8660A"],
+        [1.0, "#FFD21E"],
+    ]
+    
+    fig = go.Figure()
+    fig.add_trace(
+        go.Heatmap(
+            z=z_matrix,
+            x=ordered_models,
+            y=level_labels,
+            colorscale=colorscale,
+            zmin=0,
+            zmax=1,
+            hovertemplate="<b>%{y}</b><br><span style='color:#FFD21E'>%{x}</span><br>SR · %{z:.3f}<extra></extra>",
+            colorbar=dict(
+                title="Success Rate",
+                titlefont=dict(color="white", family="'Geist', sans-serif", size=12),
+                tickfont=dict(color="white", family="'Geist', sans-serif", size=10),
+                thickness=12,
+                len=0.7,
+                outlinecolor="rgba(255, 255, 255, 0.1)",
+                bgcolor="rgba(1, 9, 26, 0.75)"
+            ),
+            showscale=True
+        )
+    )
+    
+    annotations = []
+    for y_idx, level in enumerate(level_labels):
+        for x_idx, model in enumerate(ordered_models):
+            value = z_matrix[y_idx][x_idx]
+            if value is None:
+                continue
+            font_color = "#0B1120" if value >= 0.6 else "#F8FAFC"
+            annotations.append(
+                dict(
+                    x=model,
+                    y=level,
+                    text=f"{value:.3f}",
+                    showarrow=False,
+                    font=dict(
+                        family="'Geist Mono', monospace",
+                        size=11,
+                        color=font_color
+                    )
+                )
+            )
+    
+    fig.update_layout(
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        margin=dict(t=80, b=90, l=110, r=160),
+        height=520,
+        width=1450,
+        font=dict(family="'Geist', sans-serif", color="white"),
+        xaxis=dict(
+            tickangle=-25,
+            showgrid=False,
+            ticks="",
+            tickfont=dict(size=11, family="'Geist', sans-serif", color="white")
+        ),
+        yaxis=dict(
+            showgrid=False,
+            ticks="",
+            tickfont=dict(size=12, family="'Geist', sans-serif", color="white")
+        ),
+        annotations=annotations,
+        title=dict(
+            text="<b>Comprehensive Performance Heatmap</b>",
+            x=0.5,
+            y=0.98,
+            font=dict(
+                size=20,
+                family="'Geist', sans-serif",
+                color="white",
+                weight=700
+            ),
+        )
+    )
+    fig.update_xaxes(side="bottom")
+    
+    return fig
+
+
+def create_empty_heatmap(message):
+    """Render an empty state for the heatmap with a centered message."""
+    fig = go.Figure()
+    fig.add_annotation(
+        text=f"🗺️ {message}",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5,
+        xanchor='center', yanchor='middle',
+        font=dict(
+            size=18,
+            color="white",
+            family="'Geist', sans-serif"
+        ),
+        showarrow=False,
+        bgcolor="rgba(245, 246, 247, 0.05)",
+        bordercolor="rgba(245, 246, 247, 0.2)",
+        borderwidth=1,
+        borderpad=20
+    )
+    
+    fig.update_layout(
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=520,
+        width=1450,
+        margin=dict(t=80, b=80, l=80, r=160),
+        title=dict(
+            text="<b>Comprehensive Performance Heatmap</b>",
+            x=0.5,
+            y=0.98,
+            font=dict(
+                size=20,
+                family="'Geist', sans-serif",
+                color="white",
+                weight=700
+            ),
+        )
+    )
+    fig.update_xaxes(visible=False)
+    fig.update_yaxes(visible=False)
+    return fig
+
+
+def create_level_metric_chart(df, level, selected_models=None, max_models=5):
+    """Render a grouped horizontal bar chart showing per-model scores for a level's metrics."""
+    if not level:
+        return create_empty_level_metric_chart("Select a level to view its metrics")
+    df = df.copy()
+    level_prefix = f"{level}_"
+    level_columns = [col for col in df.columns if col.startswith(level_prefix)]
+    metric_columns = []
+    for col in level_columns:
+        metric_suffix = col[len(level_prefix):]
+        metric_key_lower = metric_suffix.lower()
+        if "cost" in metric_key_lower:
+            continue
+        numeric_series = pd.to_numeric(df[col], errors='coerce')
+        valid_values = numeric_series.dropna()
+        if valid_values.empty:
+            continue
+        if (valid_values < 0).any() or (valid_values > 1.05).any():
+            continue
+        df[col] = numeric_series
+        metric_columns.append(col)
+    if not metric_columns:
+        return create_empty_level_metric_chart("This level has no 0-1 metrics to visualize")
+    df = df.drop_duplicates(subset=['Model'])
+    if df.empty:
+        return create_empty_level_metric_chart("No models available to render level metrics")
+    if selected_models:
+        model_order = [m for m in selected_models if m in df['Model'].tolist()]
+    else:
+        sort_col = 'Overall Success' if 'Overall Success' in df.columns else metric_columns[0]
+        model_order = df.sort_values(sort_col, ascending=False)['Model'].tolist()
+    if not model_order:
+        model_order = df['Model'].tolist()
+    model_order = model_order[:max_models]
+    df_models = df[df['Model'].isin(model_order)].set_index('Model')
+    if df_models.empty:
+        return create_empty_level_metric_chart("No matching models for selected filters")
+    def prettify_metric_name(metric_key):
+        raw = metric_key[len(level_prefix):]
+        text = raw.replace('_', ' ')
+        text = re.sub(r'(?<=.)([A-Z])', r' \1', text)
+        text = text.replace('Avg', 'Average')
+        replacements = {
+            'Sr': 'SR',
+            'Ac': 'AC',
+            'Tsq': 'TSQ',
+            'Cvr': 'CVR',
+            'Psm': 'PSM',
+            'Prov': 'Prov',
+            'Call Em': 'CallEM',
+            'Reuse Rate': 'Reuse Rate',
+            'Eff Score': 'Eff Score'
+        }
+        words = text.title().split()
+        words = [replacements.get(word, word) for word in words]
+        return ' '.join(words)
+    metric_labels = []
+    for col in metric_columns:
+        label = prettify_metric_name(col)
+        if label in metric_labels:
+            suffix = 2
+            while f"{label} ({suffix})" in metric_labels:
+                suffix += 1
+            label = f"{label} ({suffix})"
+        metric_labels.append(label)
+    model_palette = [
+        '#ffd21e',
+        '#FF8A3C',
+        '#F970B9',
+        '#8B5CF6',
+        '#F8FAFC',
+        '#38BDF8',
+    ]
+    fig = go.Figure()
+    max_value = 0
+    for idx, model in enumerate(model_order):
+        values = []
+        for col in metric_columns:
+            value = df_models.at[model, col] if (model in df_models.index and col in df_models.columns) else float('nan')
+            if pd.notna(value):
+                values.append(float(value))
+                max_value = max(max_value, float(value))
+            else:
+                values.append(None)
+        color = model_palette[idx % len(model_palette)]
+        fig.add_trace(
+            go.Bar(
+                name=model,
+                y=metric_labels,
+                x=values,
+                orientation='h',
+                marker=dict(color=color, line=dict(color='rgba(1,9,26,0.8)', width=1)),
+                hovertemplate="<b>%{y}</b><br>Model · <span style='color:#FFD21E'>%{fullData.name}</span><br>Score · %{x:.3f}<extra></extra>",
+            )
+        )
+    plot_height = max(360, 140 + 48 * len(metric_labels))
+    if max_value <= 0:
+        x_range = [0, 1]
+    else:
+        x_range = [0, max_value * 1.05]
+    fig.update_layout(
+        barmode='group',
+        bargap=0.25,
+        bargroupgap=0.18,
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=plot_height,
+        width=1450,
+        margin=dict(t=90, b=80, l=220, r=160),
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            bgcolor='rgba(1, 9, 26, 0.75)',
+            bordercolor='rgba(245, 246, 247, 0.2)',
+            borderwidth=1,
+            font=dict(size=11, family="'Geist', sans-serif", color='white')
+        ),
+        xaxis=dict(
+            title=dict(text=f"<b>{level} Metric Score</b>", font=dict(size=14, color="white")),
+            tickfont=dict(size=11, color="white"),
+            gridcolor='rgba(245, 246, 247, 0.08)',
+            zerolinecolor='rgba(245, 246, 247, 0.18)',
+            range=x_range
+        ),
+        yaxis=dict(
+            tickfont=dict(size=13, color="white"),
+            automargin=True
+        ),
+        title=dict(
+            text=f"<b>{level} Metric Breakdown</b>",
+            x=0.5,
+            y=0.98,
+            font=dict(size=20, family="'Geist', sans-serif", color="white", weight=700)
+        )
+    )
+    return fig
+
+
+def create_empty_level_metric_chart(message):
+    fig = go.Figure()
+    fig.add_annotation(
+        text=f"🧭 {message}",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5,
+        xanchor='center', yanchor='middle',
+        font=dict(size=18, color="white", family="'Geist', sans-serif"),
+        showarrow=False,
+        bgcolor="rgba(245, 246, 247, 0.05)",
+        bordercolor="rgba(245, 246, 247, 0.2)",
+        borderwidth=1,
+        borderpad=20
+    )
+    fig.update_layout(
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=420,
+        width=1450,
+        margin=dict(t=80, b=60, l=80, r=120),
+        title=dict(
+            text="<b>Level Metric Breakdown</b>",
+            x=0.5,
+            y=0.98,
+            font=dict(size=20, family="'Geist', sans-serif", color="white", weight=700)
+        )
+    )
+    fig.update_xaxes(visible=False)
+    fig.update_yaxes(visible=False)
+    return fig
+
+
+def create_empty_radar_chart(message):
+    """Create an empty radar chart with a message"""
+    fig = go.Figure()
+    
+    fig.add_annotation(
+        text=f"📊 {message}",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5,
+        xanchor='center', yanchor='middle',
+        font=dict(
+            size=18, 
+            color="white",
+            family="'Geist', sans-serif"
+        ),
+        showarrow=False,
+        bgcolor="rgba(245, 246, 247, 0.05)",
+        bordercolor="rgba(245, 246, 247, 0.2)",
+        borderwidth=1,
+        borderpad=20
+    )
+    
+    fig.update_layout(
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)", 
+        height=1450,
+        width=1450,
+        margin=dict(t=100, b=80, l=80, r=200),
+        title=dict(
+            text="<b>Core Capability Radar</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(
+                size=22, 
+                family="'Geist', sans-serif", 
+                color="white",
+                weight=700
+            ),
+        ),
+        annotations=[
+            dict(
+                xref="paper", yref="paper",
+                x=0.98, y=0.02,
+                xanchor='right', yanchor='bottom',
+                font=dict(size=10, color='#64748B'),
+                showarrow=False
+            )
+        ]
+    )
+    
+    return fig
+
+
+# NEW VISUALIZATION FUNCTIONS
+
+def create_cost_performance_scatter(df, metric="Avg AC"):
+    """Create scatter plot showing cost vs performance efficiency"""
+    # Filter out models without cost or performance data
+    df_filtered = df[(df['Avg Total Cost'] != '') & (df[metric] != '')].copy()
+    label_map = {
+        'Proprietary': 'API',
+        'Open source': 'OSS'
+    }
+    
+    if df_filtered.empty:
+        return create_empty_chart("No data available for cost-performance analysis")
+    
+    # Convert to numeric
+    df_filtered['Avg Total Cost'] = pd.to_numeric(df_filtered['Avg Total Cost'], errors='coerce')
+    df_filtered[metric] = pd.to_numeric(df_filtered[metric], errors='coerce')
+    df_filtered['Avg Turns'] = pd.to_numeric(df_filtered['Avg Turns'], errors='coerce')
+    
+    # Create color mapping for model type
+    color_map = {
+        'Proprietary': '#1098F7',  # Airglow Blue for Proprietary
+        'Open source': '#58BC82'   # Green for Open source
+    }
+    df_filtered['Color'] = df_filtered['Model Type'].map(color_map).fillna('#F5F6F7')
+    
+    fig = go.Figure()
+    
+    # Add scatter points
+    for model_type in df_filtered['Model Type'].unique():
+        df_type = df_filtered[df_filtered['Model Type'] == model_type]
+        legend_name = label_map.get(model_type, model_type)
+        
+        fig.add_trace(go.Scatter(
+            x=df_type[metric],
+            y=df_type['Avg Total Cost'],
+            mode='markers+text',
+            name=legend_name,
+            text=df_type['Model'],
+            textposition="top center",
+            textfont=dict(size=10, color='white'),
+            marker=dict(
+                size=df_type['Avg Turns'] * 3,  # Size based on number of turns
+                color=color_map.get(model_type, '#F5F6F7'),
+                opacity=0.8,
+                line=dict(width=2, color='#01091A')
+            ),
+            hovertemplate="<b>%{text}</b><br>" +
+                         f"{metric}: %{{x:.3f}}<br>" +
+                         "Cost: $%{y:.3f}<br>" +
+                         "Turns: %{marker.size:.1f}<br>" +
+                         "<extra></extra>"
+        ))
+    
+    # Add quadrant lines
+    median_x = df_filtered[metric].median()
+    median_y = df_filtered['Avg Total Cost'].median()
+    
+    fig.add_hline(y=median_y, line_dash="dash", line_color="#64748B", opacity=0.5)
+    fig.add_vline(x=median_x, line_dash="dash", line_color="#64748B", opacity=0.5)
+    
+    # Add quadrant labels
+    fig.add_annotation(x=0.95, y=0.05, text="💎 High Performance<br>Low Cost", 
+                      showarrow=False, xref="paper", yref="paper",
+                      font=dict(size=12, color="white"), bgcolor="rgba(245, 246, 247, 0.1)")
+    fig.add_annotation(x=0.05, y=0.95, text="⚠️ Low Performance<br>High Cost", 
+                      showarrow=False, xref="paper", yref="paper",
+                      font=dict(size=12, color="#ffd21e"), bgcolor="rgba(255, 210, 30, 0.1)")
+    
+    metric_display = "Action Completion" if metric == "Avg AC" else "Tool Selection Quality"
+    
+    fig.update_layout(
+        title=dict(
+            text=f"<b>Cost-Performance Efficiency: {metric_display}</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
+        ),
+        xaxis=dict(
+            title=dict(
+                text=f"<b>{metric_display}</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=12, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)",
+            zerolinecolor="rgba(245, 246, 247, 0.2)"
+        ),
+        yaxis=dict(
+            title=dict(
+                text="<b>Average Session Cost ($)</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=12, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)",
+            zerolinecolor="rgba(245, 246, 247, 0.2)"
+        ),
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=900,
+        width=1450,
+        showlegend=True,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1,
+            font=dict(size=12, family="'Geist', sans-serif", color='white'),
+            bgcolor='rgba(1, 9, 26, 0.8)',
+            bordercolor='rgba(245, 246, 247, 0.2)',
+            borderwidth=1
+        ),
+        margin=dict(t=100, b=80, l=80, r=80)
+    )
+    
+    return fig
+
+
+def create_speed_accuracy_plot(df, metric="Avg AC"):
+    """Create scatter plot showing speed vs accuracy trade-off"""
+    # Filter out models without duration or performance data
+    df_filtered = df[(df['Avg Session Duration'] != '') & (df[metric] != '')].copy()
+    
+    if df_filtered.empty:
+        return create_empty_chart("No data available for speed-accuracy analysis")
+    
+    # Convert to numeric
+    df_filtered['Avg Session Duration'] = pd.to_numeric(df_filtered['Avg Session Duration'], errors='coerce')
+    df_filtered[metric] = pd.to_numeric(df_filtered[metric], errors='coerce')
+    
+    # Create color scale based on cost
+    df_filtered['Avg Total Cost'] = pd.to_numeric(df_filtered['Avg Total Cost'], errors='coerce')
+    
+    fig = go.Figure()
+    
+    # Add scatter trace
+    fig.add_trace(go.Scatter(
+        x=df_filtered[metric],
+        y=df_filtered['Avg Session Duration'],
+        mode='markers+text',
+        text=df_filtered['Model'],
+        textposition="top center",
+        textfont=dict(size=9, color='white'),
+        marker=dict(
+            size=12,
+            color=df_filtered['Avg Total Cost'],
+            colorscale=[[0, '#0A0A0A'], [0.5, '#B8660A'], [1, '#ffd21e']],
+            showscale=True,
+            colorbar=dict(
+                title=dict(
+                    text="Cost ($)",
+                    font=dict(color="white")
+                ),
+                tickfont=dict(color="white"),
+                bgcolor="rgba(1, 9, 26, 0.8)",
+                bordercolor="rgba(245, 246, 247, 0.2)",
+                borderwidth=1,
+                x=1.02
+            ),
+            line=dict(width=2, color='#01091A')
+        ),
+        hovertemplate="<b>%{text}</b><br>" +
+                     f"{metric}: %{{x:.3f}}<br>" +
+                     "Duration: %{y:.1f}s<br>" +
+                     "Cost: $%{marker.color:.3f}<br>" +
+                     "<extra></extra>"
+    ))
+    
+    # Add quadrant lines
+    median_x = df_filtered[metric].median()
+    median_y = df_filtered['Avg Session Duration'].median()
+    
+    fig.add_hline(y=median_y, line_dash="dash", line_color="#64748B", opacity=0.5)
+    fig.add_vline(x=median_x, line_dash="dash", line_color="#64748B", opacity=0.5)
+    
+    # Add quadrant labels
+    fig.add_annotation(x=0.95, y=0.05, text="⚡ Fast & Accurate", 
+                      showarrow=False, xref="paper", yref="paper",
+                      font=dict(size=12, color="white", weight=600))
+    fig.add_annotation(x=0.05, y=0.95, text="🐌 Slow & Inaccurate", 
+                      showarrow=False, xref="paper", yref="paper",
+                      font=dict(size=12, color="#ffd21e", weight=600))
+    
+    metric_display = "Action Completion" if metric == "Avg AC" else "Tool Selection Quality"
+    
+    fig.update_layout(
+        title=dict(
+            text=f"<b>Speed vs Accuracy Trade-off: {metric_display}</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
+        ),
+        xaxis=dict(
+            title=dict(
+                text=f"<b>{metric_display}</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=12, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)",
+            zerolinecolor="rgba(245, 246, 247, 0.2)"
+        ),
+        yaxis=dict(
+            title=dict(
+                text="<b>Average Session Duration (seconds)</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=12, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)",
+            zerolinecolor="rgba(245, 246, 247, 0.2)"
+        ),
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=900,
+        width=1450,
+        margin=dict(t=100, b=80, l=80, r=120)
+    )
+    
+    return fig
+
+
+def create_domain_specialization_matrix(df, metric_type="AC"):
+    """Create bubble chart showing domain specialization"""
+    domains = ['Banking', 'Healthcare', 'Insurance', 'Investment', 'Telecom']
+    
+    # Prepare data
+    data = []
+    for _, model in df.iterrows():
+        if model['Model'] == '':
+            continue
+            
+        model_avg = pd.to_numeric(model[f'Avg {metric_type}'], errors='coerce')
+        if pd.isna(model_avg):
+            continue
+            
+        for domain in domains:
+            domain_col = f'{domain} {metric_type}'
+            if domain_col in model and model[domain_col] != '':
+                domain_val = pd.to_numeric(model[domain_col], errors='coerce')
+                if not pd.isna(domain_val):
+                    # Calculate specialization strength (deviation from model average)
+                    specialization = domain_val - model_avg
+                    data.append({
+                        'Model': model['Model'],
+                        'Domain': domain,
+                        'Performance': domain_val,
+                        'Specialization': specialization,
+                        'Model Type': model['Model Type']
+                    })
+    
+    if not data:
+        return create_empty_chart("No domain specialization data available")
+    
+    df_plot = pd.DataFrame(data)
+    
+    # Create bubble chart
+    fig = go.Figure()
+    
+    # Color based on specialization strength
+    fig.add_trace(go.Scatter(
+        x=df_plot['Domain'],
+        y=df_plot['Model'],
+        mode='markers',
+        marker=dict(
+            size=df_plot['Performance'] * 30,  # Size based on absolute performance
+            color=df_plot['Specialization'],
+            colorscale=[[0, '#B8660A'], [0.5, '#E6B800'], [1, '#ffd21e']],
+            showscale=True,
+            colorbar=dict(
+                title=dict(
+                    text="Specialization<br>Strength",
+                    font=dict(color="white")
+                ),
+                tickfont=dict(color="white"),
+                bgcolor="rgba(1, 9, 26, 0.8)",
+                bordercolor="rgba(245, 246, 247, 0.2)",
+                borderwidth=1
+            ),
+            line=dict(width=2, color='#01091A'),
+            opacity=0.8
+        ),
+        text=[f"Performance: {p:.3f}<br>Specialization: {s:+.3f}" 
+              for p, s in zip(df_plot['Performance'], df_plot['Specialization'])],
+        hovertemplate="<b>%{y}</b><br>" +
+                     "Domain: %{x}<br>" +
+                     "%{text}<br>" +
+                     "<extra></extra>"
+    ))
+    
+    metric_display = "Action Completion" if metric_type == "AC" else "Tool Selection Quality"
+    
+    fig.update_layout(
+        title=dict(
+            text=f"<b>Domain Specialization Matrix: {metric_display}</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
+        ),
+        xaxis=dict(
+            title=dict(
+                text="<b>Business Domains</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=13, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)"
+        ),
+        yaxis=dict(
+            title=dict(
+                text="<b>Models</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=11, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)"
+        ),
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=1100,
+        width=1450,
+        margin=dict(t=100, b=80, l=220, r=120)
+    )
+    
+    return fig
+
+
+def create_performance_gap_analysis(df, metric_type="AC"):
+    """Create range plot showing performance gaps by domain"""
+    domains = ['Banking', 'Healthcare', 'Insurance', 'Investment', 'Telecom']
+    
+    # Calculate min, max, median for each domain
+    gap_data = []
+    for domain in domains:
+        domain_col = f'{domain} {metric_type}'
+        if domain_col in df.columns:
+            domain_values = pd.to_numeric(df[domain_col], errors='coerce').dropna()
+            if len(domain_values) > 0:
+                gap_data.append({
+                    'Domain': domain,
+                    'Min': domain_values.min(),
+                    'Max': domain_values.max(),
+                    'Median': domain_values.median(),
+                    'Q1': domain_values.quantile(0.25),
+                    'Q3': domain_values.quantile(0.75),
+                    'Gap': domain_values.max() - domain_values.min()
+                })
+    
+    if not gap_data:
+        return create_empty_chart("No data available for gap analysis")
+    
+    df_gap = pd.DataFrame(gap_data)
+    df_gap = df_gap.sort_values('Gap', ascending=True)
+    
+    fig = go.Figure()
+    
+    # Add range bars
+    for idx, row in df_gap.iterrows():
+        # Add full range line
+        fig.add_trace(go.Scatter(
+            x=[row['Min'], row['Max']],
+            y=[row['Domain'], row['Domain']],
+            mode='lines',
+            line=dict(color='#64748B', width=2),
+            showlegend=False,
+            hoverinfo='skip'
+        ))
+        
+        # Add IQR box
+        fig.add_trace(go.Scatter(
+            x=[row['Q1'], row['Q3'], row['Q3'], row['Q1'], row['Q1']],
+            y=[row['Domain'], row['Domain'], row['Domain'], row['Domain'], row['Domain']],
+            fill='toself',
+            fillcolor='rgba(255, 210, 30, 0.3)',
+            line=dict(color='#ffd21e', width=2),
+            showlegend=False,
+            hoverinfo='skip',
+            mode='lines'
+        ))
+        
+        # Add median marker
+        fig.add_trace(go.Scatter(
+            x=[row['Median']],
+            y=[row['Domain']],
+            mode='markers',
+            marker=dict(
+                size=12,
+                color='#ffd21e',
+                symbol='diamond',
+                line=dict(width=2, color='#01091A')
+            ),
+            showlegend=False,
+            hovertemplate=f"<b>{row['Domain']}</b><br>" +
+                         f"Min: {row['Min']:.3f}<br>" +
+                         f"Q1: {row['Q1']:.3f}<br>" +
+                         f"Median: {row['Median']:.3f}<br>" +
+                         f"Q3: {row['Q3']:.3f}<br>" +
+                         f"Max: {row['Max']:.3f}<br>" +
+                         f"Gap: {row['Gap']:.3f}<br>" +
+                         "<extra></extra>"
+        ))
+    
+    # Add min/max points
+    for idx, row in df_gap.iterrows():
+        fig.add_trace(go.Scatter(
+            x=[row['Min'], row['Max']],
+            y=[row['Domain'], row['Domain']],
+            mode='markers',
+            marker=dict(size=8, color='white', line=dict(width=2, color='#01091A')),
+            showlegend=False,
+            hoverinfo='skip'
+        ))
+    
+    metric_display = "Action Completion" if metric_type == "AC" else "Tool Selection Quality"
+    
+    fig.update_layout(
+        title=dict(
+            text=f"<b>Performance Gap Analysis by Domain: {metric_display}</b>",
+            x=0.5,
+            y=0.97,
+            font=dict(size=22, family="'Geist', sans-serif", color="white", weight=700)
+        ),
+        xaxis=dict(
+            title=dict(
+                text=f"<b>{metric_display} Score</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=12, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)",
+            range=[0, 1] if metric_type in ['AC', 'TSQ'] else None
+        ),
+        yaxis=dict(
+            title=dict(
+                text="<b>Business Domain</b>",
+                font=dict(size=16, color="white")
+            ),
+            tickfont=dict(size=13, color="white"),
+            gridcolor="rgba(245, 246, 247, 0.1)"
+        ),
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)",
+        height=800,
+        width=1450,
+        margin=dict(t=100, b=80, l=140, r=80),
+        showlegend=False
+    )
+    
+    # Add legend manually
+    fig.add_annotation(
+        text="◆ Median   ━ IQR   ─ Full Range",
+        xref="paper", yref="paper",
+        x=0.98, y=0.02,
+        xanchor='right', yanchor='bottom',
+        font=dict(size=12, color='white'),
+        showarrow=False
+    )
+    
+    return fig
+
+
+def create_empty_chart(message):
+    """Create an empty chart with a message"""
+    fig = go.Figure()
+    
+    fig.add_annotation(
+        text=f"📊 {message}",
+        xref="paper", yref="paper",
+        x=0.5, y=0.5,
+        xanchor='center', yanchor='middle',
+        font=dict(
+            size=18, 
+            color="white",
+            family="'Geist', sans-serif"
+        ),
+        showarrow=False,
+        bgcolor="rgba(245, 246, 247, 0.05)",
+        bordercolor="rgba(245, 246, 247, 0.2)",
+        borderwidth=1,
+        borderpad=20
+    )
+    
+    fig.update_layout(
+        paper_bgcolor="#01091A",
+        plot_bgcolor="rgba(245, 246, 247, 0.02)", 
+        height=700,
+        width=1450,
+        margin=dict(t=80, b=80, l=80, r=80)
+    )